tamaramacompile.cpp source code [ClickHouse/contrib/hyperscan/src/nfa/tamaramacompile.cpp]

1	/*
2	* Copyright (c) 2016-2017, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/**
30	* \file
31	* \brief Tamarama: container engine for exclusive engines, compiler code.
32	*/
33
34	#include "config.h"
35
36	#include "tamaramacompile.h"
37
38	#include "tamarama_internal.h"
39	#include "nfa_internal.h"
40	#include "nfa_api_queue.h"
41	#include "repeatcompile.h"
42	#include "util/container.h"
43	#include "util/verify_types.h"
44
45	using namespace std;
46
47	namespace ue2 {
48
49	static
50	void remapTops(const TamaInfo &tamaInfo,
51	vector<u32> &top_base,
52	map<pair<const NFA *, u32>, u32> &out_top_remap) {
53	u32 i = `0`;
54	u32 cur = `0`;
55	for (const auto &sub : tamaInfo.subengines) {
56	u32 base = cur;
57	top_base.push_back(base + MQE_TOP_FIRST);
58	DEBUG_PRINTF("subengine:%u\n", i);
59	for (const auto &t : tamaInfo.tops [i++]) {
60	cur = base + t;
61	DEBUG_PRINTF("top remapping %u:%u\n", t ,cur);
62	out_top_remap.emplace(make_pair(sub, t), cur++);
63	}
64	}
65	}
66
67	/**
68	* update stream state and scratch state sizes and copy in
69	* subengines in Tamarama.
70	*/
71	static
72	void copyInSubnfas(const char *base_offset, NFA &nfa,
73	const TamaInfo &tamaInfo, u32 *offsets,
74	char sub_nfa_offset, const* u32 activeIdxSize) {
75	u32 maxStreamStateSize = `0`;
76	u32 maxScratchStateSize = `0`;
77	sub_nfa_offset = ROUNDUP_PTR(sub_nfa_offset, `64`);
78	bool infinite_max_width = false;
79	for (auto &sub : tamaInfo.subengines) {
80	u32 streamStateSize = verify_u32(sub->streamStateSize);
81	u32 scratchStateSize = verify_u32(sub->scratchStateSize);
82	maxStreamStateSize = max(maxStreamStateSize, streamStateSize);
83	maxScratchStateSize = max(maxScratchStateSize, scratchStateSize);
84	sub->queueIndex = nfa.queueIndex;
85
86	memcpy(sub_nfa_offset, sub, sub->length);
87	*offsets = verify_u32(sub_nfa_offset - base_offset);
88	DEBUG_PRINTF("type:%u offsets:%u\n", sub->type, *offsets);
89	++offsets;
90	sub_nfa_offset += ROUNDUP_CL(sub->length);
91
92	// update nfa properties
93	nfa.flags \|= sub->flags;
94	if (!sub->maxWidth) {
95	infinite_max_width = true;
96	} else if (!infinite_max_width) {
97	nfa.maxWidth = max(nfa.maxWidth, sub->maxWidth);
98	}
99	}
100
101	if (infinite_max_width) {
102	nfa.maxWidth = `0`;
103	}
104	nfa.maxBiAnchoredWidth = `0`;
105	nfa.streamStateSize = activeIdxSize + maxStreamStateSize;
106	nfa.scratchStateSize = maxScratchStateSize;
107	}
108
109	/**
110	* Take in a collection of exclusive sub engines and produces a tamarama, also
111	* returns via out_top_remap, a mapping indicating how tops in the subengines in
112	* relate to the tamarama's tops.
113	*/
114	bytecode_ptr<NFA>
115	buildTamarama(const TamaInfo &tamaInfo, const u32 queue,
116	map<pair<const NFA *, u32>, u32> &out_top_remap) {
117	vector<u32> top_base;
118	remapTops(tamaInfo, top_base, out_top_remap);
119
120	size_t subSize = tamaInfo.subengines.size();
121	DEBUG_PRINTF("subSize:%zu\n", subSize);
122	size_t total_size =
123	sizeof(NFA) + // initial NFA structure
124	sizeof(Tamarama) + // Tamarama structure
125	sizeof(u32) * subSize + // base top event value for subengines,
126	// used for top remapping at runtime
127	sizeof(u32) * subSize + `64`; // offsets to subengines in bytecode and
128	// padding for subengines
129
130	for (const auto &sub : tamaInfo.subengines) {
131	total_size += ROUNDUP_CL(sub->length);
132	}
133
134	// use subSize as a sentinel value for no active subengines,
135	// so add one to subSize here
136	u32 activeIdxSize = calcPackedBytes(subSize + `1`);
137	auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
138	nfa ->type = verify_u8(TAMARAMA_NFA);
139	nfa ->length = verify_u32(total_size);
140	nfa ->queueIndex = queue;
141
142	char ptr = (char* )nfa.get() + sizeof*(NFA);
143	char *base_offset = ptr;
144	Tamarama t = (Tamarama )ptr;
145	t->numSubEngines = verify_u32(subSize);
146	t->activeIdxSize = verify_u8(activeIdxSize);
147
148	ptr += sizeof(Tamarama);
149	copy_bytes(ptr, top_base);
150	ptr += byte_length(top_base);
151
152	u32 offsets = (u32 )ptr;
153	char sub_nfa_offset = ptr + sizeof(u32) subSize;
154	copyInSubnfas(base_offset, *nfa, tamaInfo, offsets, sub_nfa_offset,
155	activeIdxSize);
156	assert((size_t)(sub_nfa_offset - (char *)nfa.get()) <= total_size);
157	return nfa;
158	}
159
160	set<ReportID> all_reports(const TamaProto &proto) {
161	return proto.reports;
162	}
163
164	void TamaInfo::add(NFA sub, const* set<u32> &top) {
165	assert(subengines.size() < max_occupancy);
166	subengines.push_back(sub);
167	tops.push_back(top);
168	}
169
170	void TamaProto::add(const NFA n, const* u32 id, const u32 top,
171	const map<pair<const NFA *, u32>, u32> &out_top_remap) {
172	top_remap.emplace(make_pair(id, top), out_top_remap.at(make_pair(n, top)));
173	}
174
175	} // namespace ue2
176
177

Browse the source code of ClickHouse/contrib/hyperscan/src/nfa/tamaramacompile.cpp