1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/**
30 * \file
31 * \brief Large Bounded Repeat (LBR) engine build code.
32 */
33
34#include "ng_lbr.h"
35
36#include "grey.h"
37#include "ng_holder.h"
38#include "ng_repeat.h"
39#include "ng_reports.h"
40#include "nfa/castlecompile.h"
41#include "nfa/lbr_internal.h"
42#include "nfa/nfa_internal.h"
43#include "nfa/repeatcompile.h"
44#include "nfa/shufticompile.h"
45#include "nfa/trufflecompile.h"
46#include "util/alloc.h"
47#include "util/bitutils.h" // for lg2
48#include "util/compile_context.h"
49#include "util/container.h"
50#include "util/depth.h"
51#include "util/dump_charclass.h"
52#include "util/report_manager.h"
53#include "util/verify_types.h"
54
55using namespace std;
56
57namespace ue2 {
58
59static
60u32 depth_to_u32(const depth &d) {
61 assert(d.is_reachable());
62 if (d.is_infinite()) {
63 return REPEAT_INF;
64 }
65
66 u32 d_val = d;
67 assert(d_val < REPEAT_INF);
68 return d_val;
69}
70
71template<class LbrStruct> static
72u64a* getTable(NFA *nfa) {
73 char *ptr = (char *)nfa + sizeof(struct NFA) + sizeof(LbrStruct) +
74 sizeof(RepeatInfo);
75 ptr = ROUNDUP_PTR(ptr, alignof(u64a));
76 return (u64a *)ptr;
77}
78
79template <class LbrStruct> static
80void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin,
81 const depth &repeatMax, u32 minPeriod, enum RepeatType rtype) {
82 assert(nfa);
83
84 RepeatStateInfo rsi(rtype, repeatMin, repeatMax, minPeriod);
85
86 DEBUG_PRINTF("selected %s model for {%s,%s} repeat\n",
87 repeatTypeName(rtype), repeatMin.str().c_str(),
88 repeatMax.str().c_str());
89
90 // Fill the lbr_common structure first. Note that the RepeatInfo structure
91 // directly follows the LbrStruct.
92 const u32 info_offset = sizeof(LbrStruct);
93 c->repeatInfoOffset = info_offset;
94 c->report = report;
95
96 RepeatInfo *info = (RepeatInfo *)((char *)c + info_offset);
97 info->type = verify_u8(rtype);
98 info->repeatMin = depth_to_u32(repeatMin);
99 info->repeatMax = depth_to_u32(repeatMax);
100 info->stateSize = rsi.stateSize;
101 info->packedCtrlSize = rsi.packedCtrlSize;
102 info->horizon = rsi.horizon;
103 info->minPeriod = minPeriod;
104 copy_bytes(&info->packedFieldSizes, rsi.packedFieldSizes);
105 info->patchCount = rsi.patchCount;
106 info->patchSize = rsi.patchSize;
107 info->encodingSize = rsi.encodingSize;
108 info->patchesOffset = rsi.patchesOffset;
109
110 // Fill the NFA structure.
111 nfa->nPositions = repeatMin;
112 nfa->streamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize);
113 nfa->scratchStateSize = (u32)sizeof(lbr_state);
114 nfa->minWidth = verify_u32(repeatMin);
115 nfa->maxWidth = repeatMax.is_finite() ? verify_u32(repeatMax) : 0;
116
117 // Fill the lbr table for sparse lbr model.
118 if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
119 u64a *table = getTable<LbrStruct>(nfa);
120 // Adjust table length according to the optimal patch length.
121 size_t len = nfa->length;
122 assert((u32)repeatMax >= rsi.patchSize);
123 len -= sizeof(u64a) * ((u32)repeatMax - rsi.patchSize);
124 nfa->length = verify_u32(len);
125 info->length = verify_u32(sizeof(RepeatInfo)
126 + sizeof(u64a) * (rsi.patchSize + 1));
127 copy_bytes(table, rsi.table);
128 }
129}
130
131template <class LbrStruct> static
132bytecode_ptr<NFA> makeLbrNfa(NFAEngineType nfa_type, enum RepeatType rtype,
133 const depth &repeatMax) {
134 size_t tableLen = 0;
135 if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
136 tableLen = sizeof(u64a) * (repeatMax + 1);
137 }
138 size_t len = sizeof(NFA) + sizeof(LbrStruct) + sizeof(RepeatInfo) +
139 tableLen + sizeof(u64a);
140 auto nfa = make_zeroed_bytecode_ptr<NFA>(len);
141 nfa->type = verify_u8(nfa_type);
142 nfa->length = verify_u32(len);
143 return nfa;
144}
145
146static
147bytecode_ptr<NFA> buildLbrDot(const CharReach &cr, const depth &repeatMin,
148 const depth &repeatMax, u32 minPeriod,
149 bool is_reset, ReportID report) {
150 if (!cr.all()) {
151 return nullptr;
152 }
153
154 enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
155 is_reset);
156 auto nfa = makeLbrNfa<lbr_dot>(LBR_NFA_DOT, rtype, repeatMax);
157 struct lbr_dot *ld = (struct lbr_dot *)getMutableImplNfa(nfa.get());
158
159 fillNfa<lbr_dot>(nfa.get(), &ld->common, report, repeatMin, repeatMax,
160 minPeriod, rtype);
161
162 DEBUG_PRINTF("built dot lbr\n");
163 return nfa;
164}
165
166static
167bytecode_ptr<NFA> buildLbrVerm(const CharReach &cr, const depth &repeatMin,
168 const depth &repeatMax, u32 minPeriod,
169 bool is_reset, ReportID report) {
170 const CharReach escapes(~cr);
171
172 if (escapes.count() != 1) {
173 return nullptr;
174 }
175
176 enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
177 is_reset);
178 auto nfa = makeLbrNfa<lbr_verm>(LBR_NFA_VERM, rtype, repeatMax);
179 struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get());
180 lv->c = escapes.find_first();
181
182 fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax,
183 minPeriod, rtype);
184
185 DEBUG_PRINTF("built verm lbr\n");
186 return nfa;
187}
188
189static
190bytecode_ptr<NFA> buildLbrNVerm(const CharReach &cr, const depth &repeatMin,
191 const depth &repeatMax, u32 minPeriod,
192 bool is_reset, ReportID report) {
193 const CharReach escapes(cr);
194
195 if (escapes.count() != 1) {
196 return nullptr;
197 }
198
199 enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
200 is_reset);
201 auto nfa = makeLbrNfa<lbr_verm>(LBR_NFA_NVERM, rtype, repeatMax);
202 struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get());
203 lv->c = escapes.find_first();
204
205 fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax,
206 minPeriod, rtype);
207
208 DEBUG_PRINTF("built negated verm lbr\n");
209 return nfa;
210}
211
212static
213bytecode_ptr<NFA> buildLbrShuf(const CharReach &cr, const depth &repeatMin,
214 const depth &repeatMax, u32 minPeriod,
215 bool is_reset, ReportID report) {
216 enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
217 is_reset);
218 auto nfa = makeLbrNfa<lbr_shuf>(LBR_NFA_SHUF, rtype, repeatMax);
219 struct lbr_shuf *ls = (struct lbr_shuf *)getMutableImplNfa(nfa.get());
220
221 fillNfa<lbr_shuf>(nfa.get(), &ls->common, report, repeatMin, repeatMax,
222 minPeriod, rtype);
223
224 if (shuftiBuildMasks(~cr, (u8 *)&ls->mask_lo, (u8 *)&ls->mask_hi) == -1) {
225 return nullptr;
226 }
227
228 DEBUG_PRINTF("built shuf lbr\n");
229 return nfa;
230}
231
232static
233bytecode_ptr<NFA> buildLbrTruf(const CharReach &cr, const depth &repeatMin,
234 const depth &repeatMax, u32 minPeriod,
235 bool is_reset, ReportID report) {
236 enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
237 is_reset);
238 auto nfa = makeLbrNfa<lbr_truf>(LBR_NFA_TRUF, rtype, repeatMax);
239 struct lbr_truf *lc = (struct lbr_truf *)getMutableImplNfa(nfa.get());
240
241 fillNfa<lbr_truf>(nfa.get(), &lc->common, report, repeatMin, repeatMax,
242 minPeriod, rtype);
243
244 truffleBuildMasks(~cr, (u8 *)&lc->mask1, (u8 *)&lc->mask2);
245
246 DEBUG_PRINTF("built truffle lbr\n");
247 return nfa;
248}
249
250static
251bytecode_ptr<NFA> constructLBR(const CharReach &cr, const depth &repeatMin,
252 const depth &repeatMax, u32 minPeriod,
253 bool is_reset, ReportID report) {
254 DEBUG_PRINTF("bounds={%s,%s}, cr=%s (count %zu), report=%u\n",
255 repeatMin.str().c_str(), repeatMax.str().c_str(),
256 describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count(),
257 report);
258 assert(repeatMin <= repeatMax);
259 assert(repeatMax.is_reachable());
260
261 auto nfa =
262 buildLbrDot(cr, repeatMin, repeatMax, minPeriod, is_reset, report);
263
264 if (!nfa) {
265 nfa = buildLbrVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
266 report);
267 }
268 if (!nfa) {
269 nfa = buildLbrNVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
270 report);
271 }
272 if (!nfa) {
273 nfa = buildLbrShuf(cr, repeatMin, repeatMax, minPeriod, is_reset,
274 report);
275 }
276 if (!nfa) {
277 nfa = buildLbrTruf(cr, repeatMin, repeatMax, minPeriod, is_reset,
278 report);
279 }
280
281 if (!nfa) {
282 assert(0);
283 return nullptr;
284 }
285
286 return nfa;
287}
288
289bytecode_ptr<NFA> constructLBR(const CastleProto &proto,
290 const vector<vector<CharReach>> &triggers,
291 const CompileContext &cc,
292 const ReportManager &rm) {
293 if (!cc.grey.allowLbr) {
294 return nullptr;
295 }
296
297 if (proto.repeats.size() != 1) {
298 return nullptr;
299 }
300
301 const PureRepeat &repeat = proto.repeats.begin()->second;
302 assert(!repeat.reach.none());
303
304 if (repeat.reports.size() != 1) {
305 DEBUG_PRINTF("too many reports\n");
306 return nullptr;
307 }
308
309 bool is_reset;
310 u32 min_period = minPeriod(triggers, repeat.reach, &is_reset);
311
312 if (depth(min_period) > repeat.bounds.max) {
313 DEBUG_PRINTF("trigger is longer than repeat; only need one offset\n");
314 is_reset = true;
315 }
316
317 ReportID report = *repeat.reports.begin();
318 if (has_managed_reports(proto.kind)) {
319 report = rm.getProgramOffset(report);
320 }
321
322 DEBUG_PRINTF("building LBR %s\n", repeat.bounds.str().c_str());
323 return constructLBR(repeat.reach, repeat.bounds.min, repeat.bounds.max,
324 min_period, is_reset, report);
325}
326
327/** \brief Construct an LBR engine from the given graph \p g. */
328bytecode_ptr<NFA> constructLBR(const NGHolder &g,
329 const vector<vector<CharReach>> &triggers,
330 const CompileContext &cc,
331 const ReportManager &rm) {
332 if (!cc.grey.allowLbr) {
333 return nullptr;
334 }
335
336 PureRepeat repeat;
337 if (!isPureRepeat(g, repeat)) {
338 return nullptr;
339 }
340 if (repeat.reports.size() != 1) {
341 DEBUG_PRINTF("too many reports\n");
342 return nullptr;
343 }
344
345 CastleProto proto(g.kind, repeat);
346 return constructLBR(proto, triggers, cc, rm);
347}
348
349} // namespace ue2
350