1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** |
30 | * \file |
31 | * \brief Noodle literal matcher: build code. |
32 | */ |
33 | |
34 | #include "noodle_build.h" |
35 | |
36 | #include "hwlm_literal.h" |
37 | #include "noodle_internal.h" |
38 | #include "util/bitutils.h" |
39 | #include "util/compare.h" |
40 | #include "util/verify_types.h" |
41 | #include "ue2common.h" |
42 | |
43 | #include <cstring> // for memcpy |
44 | #include <vector> |
45 | |
46 | using std::vector; |
47 | |
48 | namespace ue2 { |
49 | |
50 | static |
51 | u64a make_u64a_mask(const vector<u8> &v) { |
52 | assert(v.size() <= sizeof(u64a)); |
53 | if (v.size() > sizeof(u64a)) { |
54 | throw std::exception(); |
55 | } |
56 | |
57 | u64a mask = 0; |
58 | size_t len = v.size(); |
59 | unsigned char *m = (unsigned char *)&mask; |
60 | DEBUG_PRINTF("making mask len %zu\n" , len); |
61 | memcpy(m, &v[0], len); |
62 | return mask; |
63 | } |
64 | |
65 | static |
66 | size_t findNoodFragOffset(const hwlmLiteral &lit) { |
67 | const auto &s = lit.s; |
68 | const size_t len = lit.s.length(); |
69 | |
70 | size_t offset = 0; |
71 | for (size_t i = 0; i + 1 < len; i++) { |
72 | int diff = 0; |
73 | const char c = s[i]; |
74 | const char d = s[i + 1]; |
75 | if (lit.nocase && ourisalpha(c)) { |
76 | diff = (mytoupper(c) != mytoupper(d)); |
77 | } else { |
78 | diff = (c != d); |
79 | } |
80 | offset = i; |
81 | if (diff) { |
82 | break; |
83 | } |
84 | } |
85 | return offset; |
86 | } |
87 | |
88 | bytecode_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit) { |
89 | const auto &s = lit.s; |
90 | |
91 | size_t mask_len = std::max(s.length(), lit.msk.size()); |
92 | DEBUG_PRINTF("mask is %zu bytes\n" , lit.msk.size()); |
93 | assert(mask_len <= 8); |
94 | assert(lit.msk.size() == lit.cmp.size()); |
95 | |
96 | vector<u8> n_msk(mask_len); |
97 | vector<u8> n_cmp(mask_len); |
98 | |
99 | for (unsigned i = mask_len - lit.msk.size(), j = 0; i < mask_len; |
100 | i++, j++) { |
101 | DEBUG_PRINTF("m[%u] %hhx c[%u] %hhx\n" , i, lit.msk[j], i, lit.cmp[j]); |
102 | n_msk[i] = lit.msk[j]; |
103 | n_cmp[i] = lit.cmp[j]; |
104 | } |
105 | |
106 | size_t s_off = mask_len - s.length(); |
107 | for (unsigned i = s_off; i < mask_len; i++) { |
108 | u8 c = s[i - s_off]; |
109 | u8 si_msk = lit.nocase && ourisalpha(c) ? (u8)CASE_CLEAR : (u8)0xff; |
110 | n_msk[i] |= si_msk; |
111 | n_cmp[i] |= c & si_msk; |
112 | assert((n_cmp[i] & si_msk) == c); |
113 | DEBUG_PRINTF("m[%u] %hhx c[%u] %hhx '%c'\n" , i, n_msk[i], i, n_cmp[i], |
114 | ourisprint(c) ? (char)c : '.'); |
115 | } |
116 | |
117 | auto n = make_zeroed_bytecode_ptr<noodTable>(sizeof(noodTable)); |
118 | assert(n); |
119 | DEBUG_PRINTF("size of nood %zu\n" , sizeof(noodTable)); |
120 | |
121 | size_t key_offset = findNoodFragOffset(lit); |
122 | |
123 | n->id = lit.id; |
124 | n->single = s.length() == 1 ? 1 : 0; |
125 | n->key_offset = verify_u8(s.length() - key_offset); |
126 | n->nocase = lit.nocase ? 1 : 0; |
127 | n->key0 = s[key_offset]; |
128 | if (n->single) { |
129 | n->key1 = 0; |
130 | } else { |
131 | n->key1 = s[key_offset + 1]; |
132 | } |
133 | n->msk = make_u64a_mask(n_msk); |
134 | n->cmp = make_u64a_mask(n_cmp); |
135 | n->msk_len = mask_len; |
136 | |
137 | return n; |
138 | } |
139 | |
140 | size_t noodSize(const noodTable *) { |
141 | return sizeof(noodTable); |
142 | } |
143 | |
144 | } // namespace ue2 |
145 | |
146 | #ifdef DUMP_SUPPORT |
147 | #include <cctype> |
148 | |
149 | namespace ue2 { |
150 | |
151 | void noodPrintStats(const noodTable *n, FILE *f) { |
152 | fprintf(f, "Noodle table\n" ); |
153 | fprintf(f, "Key Offset: %u\n" , n->key_offset); |
154 | fprintf(f, "Msk: %llx Cmp: %llx MskLen %u\n" , |
155 | n->msk >> 8 * (8 - n->msk_len), n->cmp >> 8 * (8 - n->msk_len), |
156 | n->msk_len); |
157 | fprintf(f, "String: " ); |
158 | for (u32 i = 0; i < n->msk_len; i++) { |
159 | const u8 *m = (const u8 *)&n->cmp; |
160 | if (isgraph(m[i]) && m[i] != '\\') { |
161 | fprintf(f, "%c" , m[i]); |
162 | } else { |
163 | fprintf(f, "\\x%02hhx" , m[i]); |
164 | } |
165 | } |
166 | fprintf(f, "\n" ); |
167 | } |
168 | |
169 | } // namespace ue2 |
170 | |
171 | #endif |
172 | |