1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Hamster Wheel Literal Matcher: build code.
31 */
32
33#include "hwlm_build.h"
34
35#include "grey.h"
36#include "hwlm.h"
37#include "hwlm_internal.h"
38#include "hwlm_literal.h"
39#include "noodle_engine.h"
40#include "noodle_build.h"
41#include "scratch.h"
42#include "ue2common.h"
43#include "fdr/fdr_compile.h"
44#include "fdr/fdr_compile_internal.h"
45#include "fdr/fdr_engine_description.h"
46#include "fdr/teddy_engine_description.h"
47#include "util/compile_context.h"
48#include "util/compile_error.h"
49#include "util/make_unique.h"
50#include "util/ue2string.h"
51
52#include <cassert>
53#include <cstring>
54#include <vector>
55
56using namespace std;
57
58namespace ue2 {
59
60HWLMProto::HWLMProto(u8 engType_in, vector<hwlmLiteral> lits_in)
61 : engType(engType_in), lits(move(lits_in)) {}
62
63HWLMProto::HWLMProto(u8 engType_in,
64 unique_ptr<FDREngineDescription> eng_in,
65 vector<hwlmLiteral> lits_in,
66 map<u32, vector<u32>> bucketToLits_in,
67 bool make_small_in)
68 : engType(engType_in), fdrEng(move(eng_in)), lits(move(lits_in)),
69 bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {}
70
71HWLMProto::HWLMProto(u8 engType_in,
72 unique_ptr<TeddyEngineDescription> eng_in,
73 vector<hwlmLiteral> lits_in,
74 map<u32, vector<u32>> bucketToLits_in,
75 bool make_small_in)
76 : engType(engType_in), teddyEng(move(eng_in)),
77 lits(move(lits_in)),
78 bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {}
79
80HWLMProto::~HWLMProto() {}
81
82static
83void dumpLits(UNUSED const vector<hwlmLiteral> &lits) {
84#ifdef DEBUG
85 DEBUG_PRINTF("building lit table for:\n");
86 for (const auto &lit : lits) {
87 printf("\t%u:%016llx %s%s\n", lit.id, lit.groups,
88 escapeString(lit.s).c_str(), lit.nocase ? " (nc)" : "");
89 }
90#endif
91}
92
93#ifndef NDEBUG
94// Called by an assertion.
95static
96bool everyoneHasGroups(const vector<hwlmLiteral> &lits) {
97 for (const auto &lit : lits) {
98 if (!lit.groups) {
99 return false;
100 }
101 }
102 return true;
103}
104#endif
105
106static
107bool isNoodleable(const vector<hwlmLiteral> &lits,
108 const CompileContext &cc) {
109 if (!cc.grey.allowNoodle) {
110 return false;
111 }
112
113 if (lits.size() != 1) {
114 DEBUG_PRINTF("too many literals for noodle\n");
115 return false;
116 }
117
118 return true;
119}
120
121bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc,
122 UNUSED hwlm_group_t expected_groups) {
123 size_t engSize = 0;
124 shared_ptr<void> eng;
125
126 const auto &lits = proto.lits;
127 DEBUG_PRINTF("building table with %zu strings\n", lits.size());
128
129 if (proto.engType == HWLM_ENGINE_NOOD) {
130 DEBUG_PRINTF("build noodle table\n");
131 const hwlmLiteral &lit = lits.front();
132 auto noodle = noodBuildTable(lit);
133 if (noodle) {
134 engSize = noodle.size();
135 }
136 eng = move(noodle);
137 } else {
138 DEBUG_PRINTF("building a new deal\n");
139 auto fdr = fdrBuildTable(proto, cc.grey);
140 if (fdr) {
141 engSize = fdr.size();
142 }
143 eng = move(fdr);
144 }
145
146 if (!eng) {
147 return nullptr;
148 }
149
150 assert(engSize);
151 if (engSize > cc.grey.limitLiteralMatcherSize) {
152 throw ResourceLimitError();
153 }
154
155 const size_t hwlm_len = ROUNDUP_CL(sizeof(HWLM)) + engSize;
156 auto h = make_zeroed_bytecode_ptr<HWLM>(hwlm_len, 64);
157
158 h->type = proto.engType;
159 memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
160
161 return h;
162}
163
164unique_ptr<HWLMProto>
165hwlmBuildProto(vector<hwlmLiteral> &lits, bool make_small,
166 const CompileContext &cc) {
167 assert(!lits.empty());
168 dumpLits(lits);
169
170 // Check that we haven't exceeded the maximum number of literals.
171 if (lits.size() > cc.grey.limitLiteralCount) {
172 throw ResourceLimitError();
173 }
174
175 // Safety and resource limit checks.
176 u64a total_chars = 0;
177 for (const auto &lit : lits) {
178 assert(!lit.s.empty());
179
180 if (lit.s.length() > cc.grey.limitLiteralLength) {
181 throw ResourceLimitError();
182 }
183 total_chars += lit.s.length();
184 if (total_chars > cc.grey.limitLiteralMatcherChars) {
185 throw ResourceLimitError();
186 }
187
188 // We do not allow the all-ones ID, as we reserve that for internal use
189 // within literal matchers.
190 if (lit.id == 0xffffffffu) {
191 assert(!"reserved id 0xffffffff used");
192 throw CompileError("Internal error.");
193 }
194 }
195
196 unique_ptr<HWLMProto> proto;
197
198 DEBUG_PRINTF("building table with %zu strings\n", lits.size());
199
200 assert(everyoneHasGroups(lits));
201
202 if (isNoodleable(lits, cc)) {
203 DEBUG_PRINTF("build noodle table\n");
204 proto = ue2::make_unique<HWLMProto>(HWLM_ENGINE_NOOD, lits);
205 } else {
206 DEBUG_PRINTF("building a new deal\n");
207 proto = fdrBuildProto(HWLM_ENGINE_FDR, lits, make_small,
208 cc.target_info, cc.grey);
209 if (!proto) {
210 return nullptr;
211 }
212 }
213
214 return proto;
215}
216
217size_t hwlmSize(const HWLM *h) {
218 size_t engSize = 0;
219
220 switch (h->type) {
221 case HWLM_ENGINE_NOOD:
222 engSize = noodSize((const noodTable *)HWLM_C_DATA(h));
223 break;
224 case HWLM_ENGINE_FDR:
225 engSize = fdrSize((const FDR *)HWLM_C_DATA(h));
226 break;
227 }
228
229 if (!engSize) {
230 return 0;
231 }
232
233 return engSize + ROUNDUP_CL(sizeof(*h));
234}
235
236size_t hwlmFloodProneSuffixLen(size_t numLiterals, const CompileContext &cc) {
237 const size_t NO_LIMIT = ~(size_t)0;
238
239 // NOTE: this function contains a number of magic numbers which are
240 // conservative estimates of flood-proneness based on internal details of
241 // the various literal engines that fall under the HWLM aegis. If you
242 // change those engines, you might need to change this function too.
243
244 DEBUG_PRINTF("%zu literals\n", numLiterals);
245
246 if (cc.grey.allowNoodle && numLiterals <= 1) {
247 DEBUG_PRINTF("noodle\n");
248 return NO_LIMIT;
249 }
250
251 if (cc.grey.fdrAllowTeddy) {
252 if (numLiterals <= 48) {
253 DEBUG_PRINTF("teddy\n");
254 return 3;
255 }
256 if (cc.target_info.has_avx2() && numLiterals <= 96) {
257 DEBUG_PRINTF("avx2 teddy\n");
258 return 3;
259 }
260 }
261
262 // TODO: we had thought we could push this value up to 9, but it seems that
263 // hurts performance on floods in some FDR models. Super-conservative for
264 // now.
265 DEBUG_PRINTF("fdr\n");
266 return 3;
267}
268
269} // namespace ue2
270