1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Shufti acceleration: compile code. |
31 | */ |
32 | #include "shufticompile.h" |
33 | #include "ue2common.h" |
34 | #include "util/charreach.h" |
35 | #include "util/container.h" |
36 | #include "util/flat_containers.h" |
37 | |
38 | #include <array> |
39 | #include <cassert> |
40 | #include <cstring> |
41 | #include <map> |
42 | |
43 | using namespace std; |
44 | |
45 | namespace ue2 { |
46 | |
47 | /** \brief Single-byte variant. |
48 | * |
49 | * Returns -1 if unable to construct masks, otherwise returns number of bits |
50 | * used in the mask. |
51 | * |
52 | * Note: always able to construct masks for 8 or fewer characters. |
53 | */ |
54 | int shuftiBuildMasks(const CharReach &c, u8 *lo, u8 *hi) { |
55 | /* Things could be packed much more optimally, but this should be able to |
56 | * handle any set of characters entirely in the lower half. */ |
57 | |
58 | assert(c.count() < 256); |
59 | assert(!c.none()); |
60 | |
61 | map<u8, CharReach> by_hi; /* hi nibble -> set of matching lo nibbles */ |
62 | /* group matching characters by high nibble */ |
63 | for (size_t i = c.find_first(); i != CharReach::npos; i = c.find_next(i)) { |
64 | u8 it_hi = i >> 4; |
65 | u8 it_lo = i & 0xf; |
66 | by_hi[it_hi].set(it_lo); |
67 | } |
68 | |
69 | map<CharReach, CharReach> by_lo_set; |
70 | /* group all hi nibbles with a common set of lo nibbles together */ |
71 | for (map<u8, CharReach>::const_iterator it = by_hi.begin(); |
72 | it != by_hi.end(); ++it) { |
73 | by_lo_set[it->second].set(it->first); |
74 | } |
75 | |
76 | if (by_lo_set.size() > 8) { |
77 | /* too many char classes on the dance floor */ |
78 | assert(c.size() > 8); |
79 | return -1; |
80 | } |
81 | |
82 | u8 bit_index = 0; |
83 | array<u8, 16> lo_a; lo_a.fill(0); |
84 | array<u8, 16> hi_a; hi_a.fill(0); |
85 | for (map<CharReach, CharReach>::const_iterator it = by_lo_set.begin(); |
86 | it != by_lo_set.end(); ++it) { |
87 | const CharReach &lo_nibbles = it->first; |
88 | const CharReach &hi_nibbles = it->second; |
89 | |
90 | /* set bits in low mask */ |
91 | for (size_t j = lo_nibbles.find_first(); j != CharReach::npos; |
92 | j = lo_nibbles.find_next(j)) { |
93 | lo_a[j] |= (1 << bit_index); |
94 | } |
95 | |
96 | /* set bits in high mask */ |
97 | for (size_t j = hi_nibbles.find_first(); j != CharReach::npos; |
98 | j = hi_nibbles.find_next(j)) { |
99 | hi_a[j] |= (1 << bit_index); |
100 | } |
101 | |
102 | bit_index++; |
103 | } |
104 | |
105 | memcpy(lo, lo_a.data(), sizeof(m128)); |
106 | memcpy(hi, hi_a.data(), sizeof(m128)); |
107 | |
108 | return bit_index; |
109 | } |
110 | |
111 | static |
112 | array<u16, 4> or_array(array<u16, 4> a, const array<u16, 4> &b) { |
113 | a[0] |= b[0]; |
114 | a[1] |= b[1]; |
115 | a[2] |= b[2]; |
116 | a[3] |= b[3]; |
117 | |
118 | return a; |
119 | } |
120 | |
121 | |
122 | #define MAX_BUCKETS 8 |
123 | static |
124 | void set_buckets_from_mask(u16 nibble_mask, u32 bucket, |
125 | array<u8, 16> &byte_mask) { |
126 | assert(bucket < MAX_BUCKETS); |
127 | |
128 | u32 mask = nibble_mask; |
129 | while (mask) { |
130 | u32 n = findAndClearLSB_32(&mask); |
131 | byte_mask[n] &= ~(1 << bucket); |
132 | } |
133 | } |
134 | |
135 | bool shuftiBuildDoubleMasks(const CharReach &onechar, |
136 | const flat_set<pair<u8, u8>> &twochar, |
137 | u8 *lo1, u8 *hi1, u8 *lo2, u8 *hi2) { |
138 | DEBUG_PRINTF("unibytes %zu dibytes %zu\n" , onechar.size(), |
139 | twochar.size()); |
140 | array<u8, 16> lo1_a; |
141 | array<u8, 16> lo2_a; |
142 | array<u8, 16> hi1_a; |
143 | array<u8, 16> hi2_a; |
144 | |
145 | lo1_a.fill(0xff); |
146 | lo2_a.fill(0xff); |
147 | hi1_a.fill(0xff); |
148 | hi2_a.fill(0xff); |
149 | |
150 | // two-byte literals |
151 | vector<array<u16, 4>> nibble_masks; |
152 | for (const auto &p : twochar) { |
153 | DEBUG_PRINTF("%02hhx %02hhx\n" , p.first, p.second); |
154 | u16 a_lo = 1U << (p.first & 0xf); |
155 | u16 a_hi = 1U << (p.first >> 4); |
156 | u16 b_lo = 1U << (p.second & 0xf); |
157 | u16 b_hi = 1U << (p.second >> 4); |
158 | nibble_masks.push_back({{a_lo, a_hi, b_lo, b_hi}}); |
159 | } |
160 | |
161 | // one-byte literals (second byte is a wildcard) |
162 | for (size_t it = onechar.find_first(); it != CharReach::npos; |
163 | it = onechar.find_next(it)) { |
164 | DEBUG_PRINTF("%02hhx\n" , (u8)it); |
165 | u16 a_lo = 1U << (it & 0xf); |
166 | u16 a_hi = 1U << (it >> 4); |
167 | u16 wildcard = 0xffff; |
168 | nibble_masks.push_back({{a_lo, a_hi, wildcard, wildcard}}); |
169 | } |
170 | |
171 | // try to merge strings into shared buckets |
172 | for (u32 i = 0; i < 4; i++) { |
173 | map<array<u16, 4>, array<u16, 4>> new_masks; |
174 | for (const auto &a : nibble_masks) { |
175 | auto key = a; |
176 | key[i] = 0; |
177 | if (!contains(new_masks, key)) { |
178 | new_masks[key] = a; |
179 | } else { |
180 | new_masks[key] = or_array(new_masks[key], a); |
181 | } |
182 | } |
183 | nibble_masks.clear(); |
184 | for (const auto &e : new_masks) { |
185 | nibble_masks.push_back(e.second); |
186 | } |
187 | } |
188 | |
189 | if (nibble_masks.size() > MAX_BUCKETS) { |
190 | DEBUG_PRINTF("too many buckets needed (%zu)\n" , nibble_masks.size()); |
191 | return false; |
192 | } |
193 | |
194 | u32 i = 0; |
195 | for (const auto &a : nibble_masks) { |
196 | set_buckets_from_mask(a[0], i, lo1_a); |
197 | set_buckets_from_mask(a[1], i, hi1_a); |
198 | set_buckets_from_mask(a[2], i, lo2_a); |
199 | set_buckets_from_mask(a[3], i, hi2_a); |
200 | i++; |
201 | } |
202 | |
203 | memcpy(lo1, lo1_a.data(), sizeof(m128)); |
204 | memcpy(lo2, lo2_a.data(), sizeof(m128)); |
205 | memcpy(hi1, hi1_a.data(), sizeof(m128)); |
206 | memcpy(hi2, hi2_a.data(), sizeof(m128)); |
207 | |
208 | return true; |
209 | } |
210 | |
211 | #ifdef DUMP_SUPPORT |
212 | |
213 | CharReach shufti2cr(const u8 *lo, const u8 *hi) { |
214 | CharReach cr; |
215 | for (u32 i = 0; i < 256; i++) { |
216 | if (lo[(u8)i & 0xf] & hi[(u8)i >> 4]) { |
217 | cr.set(i); |
218 | } |
219 | } |
220 | return cr; |
221 | } |
222 | |
223 | #endif // DUMP_SUPPORT |
224 | |
225 | } // namespace ue2 |
226 | |