1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "mpvcompile.h"
30
31#include "mpv_internal.h"
32#include "nfa_api_queue.h"
33#include "nfa_internal.h"
34#include "shufticompile.h"
35#include "trufflecompile.h"
36#include "util/alloc.h"
37#include "util/multibit_build.h"
38#include "util/order_check.h"
39#include "util/report_manager.h"
40#include "util/verify_types.h"
41
42#include <algorithm>
43#include <iterator>
44#include <map>
45
46#include <boost/range/adaptor/map.hpp>
47
48using namespace std;
49using boost::adaptors::map_values;
50using boost::adaptors::map_keys;
51
52namespace ue2 {
53
54namespace {
55struct pcomp {
56 bool operator()(const raw_puff &a, const raw_puff &b) const {
57 return tie(a.repeats, a.unbounded, a.simple_exhaust, a.report) <
58 tie(b.repeats, b.unbounded, b.simple_exhaust, b.report);
59 }
60};
61
62struct ClusterKey {
63 explicit ClusterKey(const raw_puff &src)
64 : trigger_event(MQE_INVALID), reach(src.reach),
65 auto_restart(src.auto_restart) {}
66 ClusterKey(u32 event, const raw_puff &src)
67 : trigger_event(event), reach(src.reach),
68 auto_restart(src.auto_restart) {}
69
70 u32 trigger_event;
71 CharReach reach;
72 bool auto_restart;
73
74 bool operator<(const ClusterKey &b) const {
75 const ClusterKey &a = *this;
76 ORDER_CHECK(trigger_event); /* want triggered puffs first */
77 ORDER_CHECK(auto_restart);
78 ORDER_CHECK(reach);
79 return false;
80 }
81};
82
83} // namespace
84
85static
86void writePuffette(mpv_puffette *out, const raw_puff &rp,
87 const ReportManager &rm) {
88 DEBUG_PRINTF("outputting %u %d %u to %p\n", rp.repeats, (int)rp.unbounded,
89 rp.report, out);
90 out->repeats = rp.repeats;
91 out->unbounded = rp.unbounded;
92 out->simple_exhaust = rp.simple_exhaust;
93 out->report = rm.getProgramOffset(rp.report);
94}
95
96static
97void writeSentinel(mpv_puffette *out) {
98 DEBUG_PRINTF("outputting sentinel to %p\n", out);
99 memset(out, 0, sizeof(*out));
100 out->report = INVALID_REPORT;
101}
102
103static
104void writeDeadPoint(mpv_kilopuff *out, const vector<raw_puff> &puffs) {
105 for (const auto &puff : puffs) {
106 if (puff.unbounded) { /* mpv can never die */
107 out->dead_point = MPV_DEAD_VALUE;
108 return;
109 }
110 }
111
112 out->dead_point = puffs.back().repeats + 1;
113}
114
115static
116size_t calcSize(const map<ClusterKey, vector<raw_puff>> &raw,
117 const vector<mpv_counter_info> &counters) {
118 size_t len = sizeof(NFA) + sizeof(mpv);
119
120 len += sizeof(mpv_kilopuff) * raw.size(); /* need a kilopuff for each
121 distinct reach */
122
123 len += sizeof(mpv_counter_info) * counters.size();
124
125 len += sizeof(mpv_puffette); /* initial sent */
126
127 for (const vector<raw_puff> &puffs : raw | map_values) {
128 len += sizeof(mpv_puffette) * puffs.size();
129 len += sizeof(mpv_puffette); /* terminal sent */
130 }
131
132 return len;
133}
134
135static
136void populateClusters(const vector<raw_puff> &puffs_in,
137 const vector<raw_puff> &triggered_puffs,
138 map<ClusterKey, vector<raw_puff>> *raw) {
139 map<ClusterKey, vector<raw_puff>> &puff_clusters = *raw;
140
141 u32 e = MQE_TOP_FIRST;
142 for (const auto &puff : triggered_puffs) {
143 puff_clusters[ClusterKey(e, puff)].push_back(puff);
144 e++;
145 }
146
147 for (const auto &puff : puffs_in) {
148 puff_clusters[ClusterKey(puff)].push_back(puff);
149 }
150
151
152 for (vector<raw_puff> &puffs : puff_clusters | map_values) {
153 sort(puffs.begin(), puffs.end(), pcomp());
154 }
155}
156
157static
158void writeKiloPuff(const map<ClusterKey, vector<raw_puff>>::const_iterator &it,
159 const ReportManager &rm, u32 counter_offset, mpv *m,
160 mpv_kilopuff *kp, mpv_puffette **pa) {
161 const CharReach &reach = it->first.reach;
162 const vector<raw_puff> &puffs = it->second;
163
164 kp->auto_restart = it->first.auto_restart;
165
166 if (reach.all()) {
167 kp->type = MPV_DOT;
168 } else if (reach.count() == 255) {
169 kp->type = MPV_VERM;
170 size_t unset = (~reach).find_first();
171 assert(unset != CharReach::npos);
172 kp->u.verm.c = (char)unset;
173 } else if (reach.count() == 1) {
174 kp->type = MPV_NVERM;
175 size_t set = reach.find_first();
176 assert(set != CharReach::npos);
177 kp->u.verm.c = (char)set;
178 } else if (shuftiBuildMasks(~reach, (u8 *)&kp->u.shuf.mask_lo,
179 (u8 *)&kp->u.shuf.mask_hi) != -1) {
180 kp->type = MPV_SHUFTI;
181 } else {
182 kp->type = MPV_TRUFFLE;
183 truffleBuildMasks(~reach, (u8 *)&kp->u.truffle.mask1,
184 (u8 *)&kp->u.truffle.mask2);
185 }
186
187 kp->count = verify_u32(puffs.size());
188 kp->counter_offset = counter_offset;
189
190 /* start of real puffette array */
191 kp->puffette_offset = verify_u32((char *)*pa - (char *)m);
192 for (size_t i = 0; i < puffs.size(); i++) {
193 assert(!it->first.auto_restart || puffs[i].unbounded);
194 writePuffette(*pa + i, puffs[i], rm);
195 }
196
197 *pa += puffs.size();
198 writeSentinel(*pa);
199 ++*pa;
200
201 writeDeadPoint(kp, puffs);
202}
203
204static
205void writeCoreNfa(NFA *nfa, u32 len, u32 min_width, u32 max_counter,
206 u32 streamStateSize, u32 scratchStateSize) {
207 assert(nfa);
208
209 nfa->length = len;
210 nfa->nPositions = max_counter - 1;
211 nfa->type = MPV_NFA;
212 nfa->streamStateSize = streamStateSize;
213 assert(16 >= sizeof(mpv_decomp_kilo));
214 nfa->scratchStateSize = scratchStateSize;
215 nfa->minWidth = min_width;
216}
217
218static
219void findCounterSize(map<ClusterKey, vector<raw_puff>>::const_iterator kp_it,
220 map<ClusterKey, vector<raw_puff>>::const_iterator kp_ite,
221 u64a *max_counter_out, u32 *counter_size) {
222 u32 max_counter = 0; /* max counter that we may need to know about is one
223 more than largest repeat */
224 for (; kp_it != kp_ite; ++kp_it) {
225 max_counter = MAX(max_counter, kp_it->second.back().repeats + 1);
226 }
227
228 if (max_counter < (1U << 8)) {
229 *counter_size = 1;
230 } else if (max_counter < (1U << 16)) {
231 *counter_size = 2;
232 } else if (max_counter < (1U << 24)) {
233 *counter_size = 3;
234 } else {
235 *counter_size = 4;
236 }
237
238 *max_counter_out = max_counter;
239}
240
241static
242void fillCounterInfo(mpv_counter_info *out, u32 *curr_decomp_offset,
243 u32 *curr_comp_offset,
244 const map<ClusterKey, vector<raw_puff>> &kilopuffs,
245 map<ClusterKey, vector<raw_puff>>::const_iterator kp_it,
246 map<ClusterKey, vector<raw_puff>>::const_iterator kp_ite) {
247
248 out->kilo_begin = distance(kilopuffs.begin(), kp_it);
249 out->kilo_end = distance(kilopuffs.begin(), kp_ite);
250 findCounterSize(kp_it, kp_ite, &out->max_counter, &out->counter_size);
251 out->counter_offset = *curr_decomp_offset;
252 *curr_decomp_offset += sizeof(u64a);
253 *curr_comp_offset += out->counter_size;
254}
255
256static
257void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset,
258 u32 *curr_comp_offset,
259 const map<ClusterKey, vector<raw_puff>> &kilopuffs) {
260 /* first the triggered puffs */
261 map<ClusterKey, vector<raw_puff>>::const_iterator it = kilopuffs.begin();
262 while (it != kilopuffs.end() && it->first.trigger_event != MQE_INVALID) {
263 assert(!it->first.auto_restart);
264 assert(it->first.trigger_event
265 == MQE_TOP_FIRST + distance(kilopuffs.begin(), it));
266
267 out->push_back(mpv_counter_info());
268 map<ClusterKey, vector<raw_puff>>::const_iterator it_o = it;
269 ++it;
270 fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset,
271 kilopuffs, it_o, it);
272 }
273
274 /* we may have 2 sets of non triggered puffs:
275 * 1) always started with no auto_restart
276 * 2) always started with auto_restart
277 */
278 map<ClusterKey, vector<raw_puff>>::const_iterator trig_ite = it;
279 while (it != kilopuffs.end() && !it->first.auto_restart) {
280 assert(it->first.trigger_event == MQE_INVALID);
281
282 ++it;
283 }
284 if (it != trig_ite) {
285 out->push_back(mpv_counter_info());
286 fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset,
287 kilopuffs, kilopuffs.begin(), it);
288 }
289 while (it != kilopuffs.end() && it->first.auto_restart) {
290 assert(it->first.trigger_event == MQE_INVALID);
291
292 out->push_back(mpv_counter_info());
293 map<ClusterKey, vector<raw_puff>>::const_iterator it_o = it;
294 ++it;
295 fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset,
296 kilopuffs, it_o, it);
297 }
298}
299
300static
301const mpv_counter_info &findCounter(const vector<mpv_counter_info> &counters,
302 u32 i) {
303 for (const auto &counter : counters) {
304 if (i >= counter.kilo_begin && i < counter.kilo_end) {
305 return counter;
306 }
307 }
308 assert(0);
309 return counters.front();
310}
311
312bytecode_ptr<NFA> mpvCompile(const vector<raw_puff> &puffs_in,
313 const vector<raw_puff> &triggered_puffs,
314 const ReportManager &rm) {
315 assert(!puffs_in.empty() || !triggered_puffs.empty());
316 u32 puffette_count = puffs_in.size() + triggered_puffs.size();
317
318 map<ClusterKey, vector<raw_puff>> puff_clusters;
319 populateClusters(puffs_in, triggered_puffs, &puff_clusters);
320
321 u32 curr_comp_offset = 0;
322
323 u32 curr_decomp_offset = sizeof(mpv_decomp_state);
324 curr_decomp_offset += 16 * puff_clusters.size();
325
326 vector<mpv_counter_info> counters;
327 fillCounterInfos(&counters, &curr_decomp_offset, &curr_comp_offset,
328 puff_clusters);
329
330 u32 pq_offset = curr_decomp_offset;
331 curr_decomp_offset += sizeof(mpv_pq_item) * puff_clusters.size();
332
333 u32 rl_offset = curr_decomp_offset;
334 curr_decomp_offset += sizeof(ReportID) * puffette_count;
335
336 u32 reporter_offset = curr_decomp_offset;
337 curr_decomp_offset += mmbit_size(puff_clusters.size());
338
339 u32 active_offset = curr_comp_offset;
340 curr_comp_offset += mmbit_size(puff_clusters.size());
341
342 u32 len = calcSize(puff_clusters, counters);
343
344 DEBUG_PRINTF("%u puffs, len = %u\n", puffette_count, len);
345
346 auto nfa = make_zeroed_bytecode_ptr<NFA>(len);
347
348 mpv_puffette *pa_base = (mpv_puffette *)
349 ((char *)nfa.get() + sizeof(NFA) + sizeof(mpv)
350 + sizeof(mpv_kilopuff) * puff_clusters.size()
351 + sizeof(mpv_counter_info) * counters.size());
352 mpv_puffette *pa = pa_base;
353
354 writeSentinel(pa);
355
356 ++pa; /* skip init sentinel */
357
358 u32 min_repeat = ~0U;
359 u32 max_counter = 0; /* max counter that we may need to know about is one
360 more than largest repeat */
361 for (const vector<raw_puff> &puffs : puff_clusters | map_values) {
362 max_counter = max(max_counter, puffs.back().repeats + 1);
363 min_repeat = min(min_repeat, puffs.front().repeats);
364 }
365
366 mpv *m = (mpv *)getMutableImplNfa(nfa.get());
367 m->kilo_count = verify_u32(puff_clusters.size());
368 m->counter_count = verify_u32(counters.size());
369 m->puffette_count = puffette_count;
370 m->pq_offset = pq_offset;
371 m->reporter_offset = reporter_offset;
372 m->report_list_offset = rl_offset;
373 m->active_offset = active_offset;
374 m->top_kilo_begin = verify_u32(triggered_puffs.size());
375 m->top_kilo_end = verify_u32(puff_clusters.size());
376
377 mpv_kilopuff *kp_begin = (mpv_kilopuff *)(m + 1);
378 mpv_kilopuff *kp = kp_begin;
379 for (auto it = puff_clusters.begin(); it != puff_clusters.end(); ++it) {
380 writeKiloPuff(it, rm,
381 findCounter(counters, kp - kp_begin).counter_offset, m,
382 kp, &pa);
383 ++kp;
384 }
385 assert((char *)pa == (char *)nfa.get() + len);
386
387 mpv_counter_info *out_ci = (mpv_counter_info *)kp;
388 for (const auto &counter : counters) {
389 *out_ci = counter;
390 ++out_ci;
391 }
392 assert((char *)out_ci == (char *)pa_base);
393
394 writeCoreNfa(nfa.get(), len, min_repeat, max_counter, curr_comp_offset,
395 curr_decomp_offset);
396
397 return nfa;
398}
399
400} // namespace ue2
401