1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Shufti acceleration: compile code.
31 */
32#include "shufticompile.h"
33#include "ue2common.h"
34#include "util/charreach.h"
35#include "util/container.h"
36#include "util/flat_containers.h"
37
38#include <array>
39#include <cassert>
40#include <cstring>
41#include <map>
42
43using namespace std;
44
45namespace ue2 {
46
47/** \brief Single-byte variant.
48 *
49 * Returns -1 if unable to construct masks, otherwise returns number of bits
50 * used in the mask.
51 *
52 * Note: always able to construct masks for 8 or fewer characters.
53 */
54int shuftiBuildMasks(const CharReach &c, u8 *lo, u8 *hi) {
55 /* Things could be packed much more optimally, but this should be able to
56 * handle any set of characters entirely in the lower half. */
57
58 assert(c.count() < 256);
59 assert(!c.none());
60
61 map<u8, CharReach> by_hi; /* hi nibble -> set of matching lo nibbles */
62 /* group matching characters by high nibble */
63 for (size_t i = c.find_first(); i != CharReach::npos; i = c.find_next(i)) {
64 u8 it_hi = i >> 4;
65 u8 it_lo = i & 0xf;
66 by_hi[it_hi].set(it_lo);
67 }
68
69 map<CharReach, CharReach> by_lo_set;
70 /* group all hi nibbles with a common set of lo nibbles together */
71 for (map<u8, CharReach>::const_iterator it = by_hi.begin();
72 it != by_hi.end(); ++it) {
73 by_lo_set[it->second].set(it->first);
74 }
75
76 if (by_lo_set.size() > 8) {
77 /* too many char classes on the dance floor */
78 assert(c.size() > 8);
79 return -1;
80 }
81
82 u8 bit_index = 0;
83 array<u8, 16> lo_a; lo_a.fill(0);
84 array<u8, 16> hi_a; hi_a.fill(0);
85 for (map<CharReach, CharReach>::const_iterator it = by_lo_set.begin();
86 it != by_lo_set.end(); ++it) {
87 const CharReach &lo_nibbles = it->first;
88 const CharReach &hi_nibbles = it->second;
89
90 /* set bits in low mask */
91 for (size_t j = lo_nibbles.find_first(); j != CharReach::npos;
92 j = lo_nibbles.find_next(j)) {
93 lo_a[j] |= (1 << bit_index);
94 }
95
96 /* set bits in high mask */
97 for (size_t j = hi_nibbles.find_first(); j != CharReach::npos;
98 j = hi_nibbles.find_next(j)) {
99 hi_a[j] |= (1 << bit_index);
100 }
101
102 bit_index++;
103 }
104
105 memcpy(lo, lo_a.data(), sizeof(m128));
106 memcpy(hi, hi_a.data(), sizeof(m128));
107
108 return bit_index;
109}
110
111static
112array<u16, 4> or_array(array<u16, 4> a, const array<u16, 4> &b) {
113 a[0] |= b[0];
114 a[1] |= b[1];
115 a[2] |= b[2];
116 a[3] |= b[3];
117
118 return a;
119}
120
121
122#define MAX_BUCKETS 8
123static
124void set_buckets_from_mask(u16 nibble_mask, u32 bucket,
125 array<u8, 16> &byte_mask) {
126 assert(bucket < MAX_BUCKETS);
127
128 u32 mask = nibble_mask;
129 while (mask) {
130 u32 n = findAndClearLSB_32(&mask);
131 byte_mask[n] &= ~(1 << bucket);
132 }
133}
134
135bool shuftiBuildDoubleMasks(const CharReach &onechar,
136 const flat_set<pair<u8, u8>> &twochar,
137 u8 *lo1, u8 *hi1, u8 *lo2, u8 *hi2) {
138 DEBUG_PRINTF("unibytes %zu dibytes %zu\n", onechar.size(),
139 twochar.size());
140 array<u8, 16> lo1_a;
141 array<u8, 16> lo2_a;
142 array<u8, 16> hi1_a;
143 array<u8, 16> hi2_a;
144
145 lo1_a.fill(0xff);
146 lo2_a.fill(0xff);
147 hi1_a.fill(0xff);
148 hi2_a.fill(0xff);
149
150 // two-byte literals
151 vector<array<u16, 4>> nibble_masks;
152 for (const auto &p : twochar) {
153 DEBUG_PRINTF("%02hhx %02hhx\n", p.first, p.second);
154 u16 a_lo = 1U << (p.first & 0xf);
155 u16 a_hi = 1U << (p.first >> 4);
156 u16 b_lo = 1U << (p.second & 0xf);
157 u16 b_hi = 1U << (p.second >> 4);
158 nibble_masks.push_back({{a_lo, a_hi, b_lo, b_hi}});
159 }
160
161 // one-byte literals (second byte is a wildcard)
162 for (size_t it = onechar.find_first(); it != CharReach::npos;
163 it = onechar.find_next(it)) {
164 DEBUG_PRINTF("%02hhx\n", (u8)it);
165 u16 a_lo = 1U << (it & 0xf);
166 u16 a_hi = 1U << (it >> 4);
167 u16 wildcard = 0xffff;
168 nibble_masks.push_back({{a_lo, a_hi, wildcard, wildcard}});
169 }
170
171 // try to merge strings into shared buckets
172 for (u32 i = 0; i < 4; i++) {
173 map<array<u16, 4>, array<u16, 4>> new_masks;
174 for (const auto &a : nibble_masks) {
175 auto key = a;
176 key[i] = 0;
177 if (!contains(new_masks, key)) {
178 new_masks[key] = a;
179 } else {
180 new_masks[key] = or_array(new_masks[key], a);
181 }
182 }
183 nibble_masks.clear();
184 for (const auto &e : new_masks) {
185 nibble_masks.push_back(e.second);
186 }
187 }
188
189 if (nibble_masks.size() > MAX_BUCKETS) {
190 DEBUG_PRINTF("too many buckets needed (%zu)\n", nibble_masks.size());
191 return false;
192 }
193
194 u32 i = 0;
195 for (const auto &a : nibble_masks) {
196 set_buckets_from_mask(a[0], i, lo1_a);
197 set_buckets_from_mask(a[1], i, hi1_a);
198 set_buckets_from_mask(a[2], i, lo2_a);
199 set_buckets_from_mask(a[3], i, hi2_a);
200 i++;
201 }
202
203 memcpy(lo1, lo1_a.data(), sizeof(m128));
204 memcpy(lo2, lo2_a.data(), sizeof(m128));
205 memcpy(hi1, hi1_a.data(), sizeof(m128));
206 memcpy(hi2, hi2_a.data(), sizeof(m128));
207
208 return true;
209}
210
211#ifdef DUMP_SUPPORT
212
213CharReach shufti2cr(const u8 *lo, const u8 *hi) {
214 CharReach cr;
215 for (u32 i = 0; i < 256; i++) {
216 if (lo[(u8)i & 0xf] & hi[(u8)i >> 4]) {
217 cr.set(i);
218 }
219 }
220 return cr;
221}
222
223#endif // DUMP_SUPPORT
224
225} // namespace ue2
226