scratch.c source code [ClickHouse/contrib/hyperscan/src/scratch.c]

1	/*
2	* Copyright (c) 2015-2019, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/* \file*
30	* \brief Functions for allocating and manipulating scratch space.
31	*/
32
33	#include <stdlib.h>
34	#include <string.h>
35
36	#include "allocator.h"
37	#include "hs_internal.h"
38	#include "hs_runtime.h"
39	#include "scratch.h"
40	#include "state.h"
41	#include "ue2common.h"
42	#include "database.h"
43	#include "nfa/nfa_api_queue.h"
44	#include "rose/rose_internal.h"
45	#include "util/fatbit.h"
46
47	/**
48	* Determine the space required for a correctly aligned array of fatbit
49	* structure, laid out as:
50	*
51	* - an array of num_entries pointers, each to a fatbit.
52	* - an array of fatbit structures, each of size fatbit_len.
53	*
54	* fatbit_len should have been determined at compile time, via the
55	* fatbit_size() call.
56	*/
57	static
58	size_t fatbit_array_size(u32 num_entries, u32 fatbit_len) {
59	size_t len = `0`;
60
61	// Array of pointers to each fatbit entry.
62	len += sizeof(struct fatbit ) num_entries;
63
64	// Fatbit entries themselves.
65	len = ROUNDUP_N(len, alignof(struct fatbit));
66	len += (size_t)fatbit_len * num_entries;
67
68	return ROUNDUP_N(len, `8`); // Round up for potential padding.
69	}
70
71	/* Used by hs_alloc_scratch and hs_clone_scratch to allocate a complete*
72	* scratch region from a prototype structure. */
73	static
74	hs_error_t alloc_scratch(const hs_scratch_t proto, hs_scratch_t *scratch) {
75	u32 queueCount = proto->queueCount;
76	u32 activeQueueArraySize = proto->activeQueueArraySize;
77	u32 deduperCount = proto->deduper.dkey_count;
78	u32 deduperLogSize = proto->deduper.log_size;
79	u32 bStateSize = proto->bStateSize;
80	u32 tStateSize = proto->tStateSize;
81	u32 fullStateSize = proto->fullStateSize;
82	u32 anchored_literal_region_len = proto->anchored_literal_region_len;
83	u32 anchored_literal_fatbit_size = proto->anchored_literal_fatbit_size;
84
85	u32 som_store_size = proto->som_store_count * sizeof(u64a);
86	u32 som_attempted_store_size = proto->som_store_count * sizeof(u64a);
87	u32 som_now_size = proto->som_fatbit_size;
88	u32 som_attempted_size = proto->som_fatbit_size;
89
90	struct hs_scratch *s;
91	struct hs_scratch *s_tmp;
92	size_t queue_size = queueCount * sizeof(struct mq);
93	size_t qmpq_size = queueCount * sizeof(struct queue_match);
94
95	assert(anchored_literal_region_len < `8` * sizeof(s->al_log_sum));
96
97	size_t anchored_literal_region_size = fatbit_array_size(
98	anchored_literal_region_len, proto->anchored_literal_fatbit_size);
99	size_t delay_region_size =
100	fatbit_array_size(DELAY_SLOT_COUNT, proto->delay_fatbit_size);
101
102	// the size is all the allocated stuff, not including the struct itself
103	size_t size = queue_size + `63`
104	+ bStateSize + tStateSize
105	+ fullStateSize + `63` / cacheline padding /
106	+ proto->handledKeyFatbitSize / handled roles /
107	+ activeQueueArraySize / active queue array /
108	+ `2` * deduperLogSize / need odd and even logs /
109	+ `2` * deduperLogSize / ditto som logs /
110	+ `2` * sizeof(u64a) * deduperCount / start offsets for som /
111	+ anchored_literal_region_size + qmpq_size
112	+ delay_region_size
113	+ som_store_size
114	+ som_now_size
115	+ som_attempted_size
116	+ som_attempted_store_size + `15`;
117
118	/ the struct plus the allocated stuff plus padding for cacheline*
119	* alignment */
120	const size_t alloc_size = sizeof(struct hs_scratch) + size + `256`;
121	s_tmp = hs_scratch_alloc(alloc_size);
122	hs_error_t err = hs_check_alloc(s_tmp);
123	if (err != HS_SUCCESS) {
124	hs_scratch_free(s_tmp);
125	*scratch = NULL;
126	return err;
127	}
128
129	memset(s_tmp, `0`, alloc_size);
130	s = ROUNDUP_PTR(s_tmp, `64`);
131	DEBUG_PRINTF("allocated %zu bytes at %p but realigning to %p\n", alloc_size, s_tmp, s);
132	DEBUG_PRINTF("sizeof %zu\n", sizeof(struct hs_scratch));
133	s = proto;
134
135	s->magic = SCRATCH_MAGIC;
136	s->in_use = `0`;
137	s->scratchSize = alloc_size;
138	s->scratch_alloc = (char *)s_tmp;
139	s->fdr_conf = NULL;
140	s->pure = `0`;
141
142	// each of these is at an offset from the previous
143	char current = (char* )s + sizeof(s);
144
145	// align current so that the following arrays are naturally aligned: this
146	// is accounted for in the padding allocated
147	current = ROUNDUP_PTR(current, `8`);
148
149	s->queues = (struct mq *)current;
150	current += queue_size;
151
152	assert(ISALIGNED_N(current, `8`));
153	s->som_store = (u64a *)current;
154	current += som_store_size;
155
156	s->som_attempted_store = (u64a *)current;
157	current += som_attempted_store_size;
158
159	current = ROUNDUP_PTR(current, alignof(struct fatbit *));
160	s->delay_slots = (struct fatbit **)current;
161	current += sizeof(struct fatbit ) DELAY_SLOT_COUNT;
162	current = ROUNDUP_PTR(current, alignof(struct fatbit));
163	for (u32 i = `0`; i < DELAY_SLOT_COUNT; i++) {
164	s->delay_slots[i] = (struct fatbit *)current;
165	assert(ISALIGNED(s->delay_slots[i]));
166	current += proto->delay_fatbit_size;
167	}
168
169	current = ROUNDUP_PTR(current, alignof(struct fatbit *));
170	s->al_log = (struct fatbit **)current;
171	current += sizeof(struct fatbit ) anchored_literal_region_len;
172	current = ROUNDUP_PTR(current, alignof(struct fatbit));
173	for (u32 i = `0`; i < anchored_literal_region_len; i++) {
174	s->al_log[i] = (struct fatbit *)current;
175	assert(ISALIGNED(s->al_log[i]));
176	current += anchored_literal_fatbit_size;
177	}
178
179	current = ROUNDUP_PTR(current, `8`);
180	s->catchup_pq.qm = (struct queue_match *)current;
181	current += qmpq_size;
182
183	s->bstate = (char *)current;
184	s->bStateSize = bStateSize;
185	current += bStateSize;
186
187	s->tstate = (char *)current;
188	s->tStateSize = tStateSize;
189	current += tStateSize;
190
191	current = ROUNDUP_PTR(current, `64`);
192
193	assert(ISALIGNED_N(current, `8`));
194	s->deduper.som_start_log[`0`] = (u64a *)current;
195	current += sizeof(u64a) * deduperCount;
196
197	s->deduper.som_start_log[`1`] = (u64a *)current;
198	current += sizeof(u64a) * deduperCount;
199
200	assert(ISALIGNED_N(current, `8`));
201	s->aqa = (struct fatbit *)current;
202	current += activeQueueArraySize;
203
204	s->handled_roles = (struct fatbit *)current;
205	current += proto->handledKeyFatbitSize;
206
207	s->deduper.log[`0`] = (struct fatbit *)current;
208	current += deduperLogSize;
209
210	s->deduper.log[`1`] = (struct fatbit *)current;
211	current += deduperLogSize;
212
213	s->deduper.som_log[`0`] = (struct fatbit *)current;
214	current += deduperLogSize;
215
216	s->deduper.som_log[`1`] = (struct fatbit *)current;
217	current += deduperLogSize;
218
219	s->som_set_now = (struct fatbit *)current;
220	current += som_now_size;
221
222	s->som_attempted_set = (struct fatbit *)current;
223	current += som_attempted_size;
224
225	current = ROUNDUP_PTR(current, `64`);
226	assert(ISALIGNED_CL(current));
227	s->fullState = (char *)current;
228	s->fullStateSize = fullStateSize;
229	current += fullStateSize;
230
231	*scratch = s;
232
233	// Don't get too big for your boots
234	assert((size_t)(current - (char *)s) <= alloc_size);
235
236	// Init q->scratch ptr for every queue.
237	for (struct mq *qi = s->queues; qi != s->queues + queueCount; ++qi) {
238	qi->scratch = s;
239	}
240
241	return HS_SUCCESS;
242	}
243
244	HS_PUBLIC_API
245	hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db,
246	hs_scratch_t **scratch) {
247	if (!db \|\| !scratch) {
248	return HS_INVALID;
249	}
250
251	/ We need to do some real sanity checks on the database as some users mmap*
252	* in old deserialised databases, so this is the first real opportunity we
253	* have to make sure it is sane.
254	*/
255	hs_error_t rv = dbIsValid(db);
256	if (rv != HS_SUCCESS) {
257	return rv;
258	}
259
260	/ We can also sanity-check the scratch parameter: if it points to an*
261	* existing scratch area, that scratch should have valid magic bits. */
262	if (*scratch != NULL) {
263	/ has to be aligned before we can do anything with it /
264	if (!ISALIGNED_CL(*scratch)) {
265	return HS_INVALID;
266	}
267	if ((*scratch)->magic != SCRATCH_MAGIC) {
268	return HS_INVALID;
269	}
270	if (markScratchInUse(*scratch)) {
271	return HS_SCRATCH_IN_USE;
272	}
273	}
274
275	const struct RoseEngine *rose = hs_get_bytecode(db);
276	int resize = `0`;
277
278	hs_scratch_t *proto;
279	hs_scratch_t proto_tmp = hs_scratch_alloc(sizeof(struct* hs_scratch) + `256`);
280	hs_error_t proto_ret = hs_check_alloc(proto_tmp);
281	if (proto_ret != HS_SUCCESS) {
282	hs_scratch_free(proto_tmp);
283	hs_scratch_free(*scratch);
284	*scratch = NULL;
285	return proto_ret;
286	}
287
288	proto = ROUNDUP_PTR(proto_tmp, `64`);
289
290	if (*scratch) {
291	proto = *scratch;
292	} else {
293	memset(proto, `0`, sizeof(*proto));
294	resize = `1`;
295	}
296	proto->scratch_alloc = (char *)proto_tmp;
297
298	if (rose->anchoredDistance > proto->anchored_literal_region_len) {
299	resize = `1`;
300	proto->anchored_literal_region_len = rose->anchoredDistance;
301	}
302
303	if (rose->anchored_fatbit_size > proto->anchored_literal_fatbit_size) {
304	resize = `1`;
305	proto->anchored_literal_fatbit_size = rose->anchored_fatbit_size;
306	}
307
308	if (rose->delay_fatbit_size > proto->delay_fatbit_size) {
309	resize = `1`;
310	proto->delay_fatbit_size = rose->delay_fatbit_size;
311	}
312
313	if (rose->handledKeyFatbitSize > proto->handledKeyFatbitSize) {
314	resize = `1`;
315	proto->handledKeyFatbitSize = rose->handledKeyFatbitSize;
316	}
317
318	if (rose->tStateSize > proto->tStateSize) {
319	resize = `1`;
320	proto->tStateSize = rose->tStateSize;
321	}
322
323	u32 som_store_count = rose->somLocationCount;
324	if (som_store_count > proto->som_store_count) {
325	resize = `1`;
326	proto->som_store_count = som_store_count;
327	}
328
329	if (rose->somLocationFatbitSize > proto->som_fatbit_size) {
330	resize = `1`;
331	proto->som_fatbit_size = rose->somLocationFatbitSize;
332	}
333
334	u32 queueCount = rose->queueCount;
335	if (queueCount > proto->queueCount) {
336	resize = `1`;
337	proto->queueCount = queueCount;
338	}
339
340	if (rose->activeQueueArraySize > proto->activeQueueArraySize) {
341	resize = `1`;
342	proto->activeQueueArraySize = rose->activeQueueArraySize;
343	}
344
345	u32 bStateSize = `0`;
346	if (rose->mode == HS_MODE_BLOCK) {
347	bStateSize = rose->stateOffsets.end;
348	} else if (rose->mode == HS_MODE_VECTORED) {
349	/ vectoring database require a full stream state (inc header) /
350	bStateSize = sizeof(struct hs_stream) + rose->stateOffsets.end;
351	}
352
353	if (bStateSize > proto->bStateSize) {
354	resize = `1`;
355	proto->bStateSize = bStateSize;
356	}
357
358	u32 fullStateSize = rose->scratchStateSize;
359	if (fullStateSize > proto->fullStateSize) {
360	resize = `1`;
361	proto->fullStateSize = fullStateSize;
362	}
363
364	if (rose->dkeyCount > proto->deduper.dkey_count) {
365	resize = `1`;
366	proto->deduper.dkey_count = rose->dkeyCount;
367	proto->deduper.log_size = rose->dkeyLogSize;
368	}
369
370	if (resize) {
371	if (*scratch) {
372	hs_scratch_free((*scratch)->scratch_alloc);
373	}
374
375	hs_error_t alloc_ret = alloc_scratch(proto, scratch);
376	hs_scratch_free(proto_tmp); / kill off temp used for sizing /
377	if (alloc_ret != HS_SUCCESS) {
378	*scratch = NULL;
379	return alloc_ret;
380	}
381	} else {
382	hs_scratch_free(proto_tmp); / kill off temp used for sizing /
383	unmarkScratchInUse(*scratch);
384	}
385
386	assert(!(*scratch)->in_use);
387	return HS_SUCCESS;
388	}
389
390	HS_PUBLIC_API
391	hs_error_t HS_CDECL hs_clone_scratch(const hs_scratch_t *src,
392	hs_scratch_t **dest) {
393	if (!dest \|\| !src \|\| !ISALIGNED_CL(src) \|\| src->magic != SCRATCH_MAGIC) {
394	return HS_INVALID;
395	}
396
397	*dest = NULL;
398	hs_error_t ret = alloc_scratch(src, dest);
399	if (ret != HS_SUCCESS) {
400	*dest = NULL;
401	return ret;
402	}
403
404	assert(!(*dest)->in_use);
405	return HS_SUCCESS;
406	}
407
408	HS_PUBLIC_API
409	hs_error_t HS_CDECL hs_free_scratch(hs_scratch_t *scratch) {
410	if (scratch) {
411	/ has to be aligned before we can do anything with it /
412	if (!ISALIGNED_CL(scratch)) {
413	return HS_INVALID;
414	}
415	if (scratch->magic != SCRATCH_MAGIC) {
416	return HS_INVALID;
417	}
418	if (markScratchInUse(scratch)) {
419	return HS_SCRATCH_IN_USE;
420	}
421
422	scratch->magic = `0`;
423	assert(scratch->scratch_alloc);
424	DEBUG_PRINTF("scratch %p is really at %p : freeing\n", scratch,
425	scratch->scratch_alloc);
426	hs_scratch_free(scratch->scratch_alloc);
427	}
428
429	return HS_SUCCESS;
430	}
431
432	HS_PUBLIC_API
433	hs_error_t HS_CDECL hs_scratch_size(const hs_scratch_t scratch, size_t size) {
434	if (!size \|\| !scratch \|\| !ISALIGNED_CL(scratch) \|\|
435	scratch->magic != SCRATCH_MAGIC) {
436	return HS_INVALID;
437	}
438
439	*size = scratch->scratchSize;
440
441	return HS_SUCCESS;
442	}
443

Browse the source code of ClickHouse/contrib/hyperscan/src/scratch.c