1 | /* |
2 | * Copyright (c) 2015-2019, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Functions for allocating and manipulating scratch space. |
31 | */ |
32 | |
33 | #include <stdlib.h> |
34 | #include <string.h> |
35 | |
36 | #include "allocator.h" |
37 | #include "hs_internal.h" |
38 | #include "hs_runtime.h" |
39 | #include "scratch.h" |
40 | #include "state.h" |
41 | #include "ue2common.h" |
42 | #include "database.h" |
43 | #include "nfa/nfa_api_queue.h" |
44 | #include "rose/rose_internal.h" |
45 | #include "util/fatbit.h" |
46 | |
47 | /** |
48 | * Determine the space required for a correctly aligned array of fatbit |
49 | * structure, laid out as: |
50 | * |
51 | * - an array of num_entries pointers, each to a fatbit. |
52 | * - an array of fatbit structures, each of size fatbit_len. |
53 | * |
54 | * fatbit_len should have been determined at compile time, via the |
55 | * fatbit_size() call. |
56 | */ |
57 | static |
58 | size_t fatbit_array_size(u32 num_entries, u32 fatbit_len) { |
59 | size_t len = 0; |
60 | |
61 | // Array of pointers to each fatbit entry. |
62 | len += sizeof(struct fatbit *) * num_entries; |
63 | |
64 | // Fatbit entries themselves. |
65 | len = ROUNDUP_N(len, alignof(struct fatbit)); |
66 | len += (size_t)fatbit_len * num_entries; |
67 | |
68 | return ROUNDUP_N(len, 8); // Round up for potential padding. |
69 | } |
70 | |
71 | /** Used by hs_alloc_scratch and hs_clone_scratch to allocate a complete |
72 | * scratch region from a prototype structure. */ |
73 | static |
74 | hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { |
75 | u32 queueCount = proto->queueCount; |
76 | u32 activeQueueArraySize = proto->activeQueueArraySize; |
77 | u32 deduperCount = proto->deduper.dkey_count; |
78 | u32 deduperLogSize = proto->deduper.log_size; |
79 | u32 bStateSize = proto->bStateSize; |
80 | u32 tStateSize = proto->tStateSize; |
81 | u32 fullStateSize = proto->fullStateSize; |
82 | u32 anchored_literal_region_len = proto->anchored_literal_region_len; |
83 | u32 anchored_literal_fatbit_size = proto->anchored_literal_fatbit_size; |
84 | |
85 | u32 som_store_size = proto->som_store_count * sizeof(u64a); |
86 | u32 som_attempted_store_size = proto->som_store_count * sizeof(u64a); |
87 | u32 som_now_size = proto->som_fatbit_size; |
88 | u32 som_attempted_size = proto->som_fatbit_size; |
89 | |
90 | struct hs_scratch *s; |
91 | struct hs_scratch *s_tmp; |
92 | size_t queue_size = queueCount * sizeof(struct mq); |
93 | size_t qmpq_size = queueCount * sizeof(struct queue_match); |
94 | |
95 | assert(anchored_literal_region_len < 8 * sizeof(s->al_log_sum)); |
96 | |
97 | size_t anchored_literal_region_size = fatbit_array_size( |
98 | anchored_literal_region_len, proto->anchored_literal_fatbit_size); |
99 | size_t delay_region_size = |
100 | fatbit_array_size(DELAY_SLOT_COUNT, proto->delay_fatbit_size); |
101 | |
102 | // the size is all the allocated stuff, not including the struct itself |
103 | size_t size = queue_size + 63 |
104 | + bStateSize + tStateSize |
105 | + fullStateSize + 63 /* cacheline padding */ |
106 | + proto->handledKeyFatbitSize /* handled roles */ |
107 | + activeQueueArraySize /* active queue array */ |
108 | + 2 * deduperLogSize /* need odd and even logs */ |
109 | + 2 * deduperLogSize /* ditto som logs */ |
110 | + 2 * sizeof(u64a) * deduperCount /* start offsets for som */ |
111 | + anchored_literal_region_size + qmpq_size |
112 | + delay_region_size |
113 | + som_store_size |
114 | + som_now_size |
115 | + som_attempted_size |
116 | + som_attempted_store_size + 15; |
117 | |
118 | /* the struct plus the allocated stuff plus padding for cacheline |
119 | * alignment */ |
120 | const size_t alloc_size = sizeof(struct hs_scratch) + size + 256; |
121 | s_tmp = hs_scratch_alloc(alloc_size); |
122 | hs_error_t err = hs_check_alloc(s_tmp); |
123 | if (err != HS_SUCCESS) { |
124 | hs_scratch_free(s_tmp); |
125 | *scratch = NULL; |
126 | return err; |
127 | } |
128 | |
129 | memset(s_tmp, 0, alloc_size); |
130 | s = ROUNDUP_PTR(s_tmp, 64); |
131 | DEBUG_PRINTF("allocated %zu bytes at %p but realigning to %p\n" , alloc_size, s_tmp, s); |
132 | DEBUG_PRINTF("sizeof %zu\n" , sizeof(struct hs_scratch)); |
133 | *s = *proto; |
134 | |
135 | s->magic = SCRATCH_MAGIC; |
136 | s->in_use = 0; |
137 | s->scratchSize = alloc_size; |
138 | s->scratch_alloc = (char *)s_tmp; |
139 | s->fdr_conf = NULL; |
140 | s->pure = 0; |
141 | |
142 | // each of these is at an offset from the previous |
143 | char *current = (char *)s + sizeof(*s); |
144 | |
145 | // align current so that the following arrays are naturally aligned: this |
146 | // is accounted for in the padding allocated |
147 | current = ROUNDUP_PTR(current, 8); |
148 | |
149 | s->queues = (struct mq *)current; |
150 | current += queue_size; |
151 | |
152 | assert(ISALIGNED_N(current, 8)); |
153 | s->som_store = (u64a *)current; |
154 | current += som_store_size; |
155 | |
156 | s->som_attempted_store = (u64a *)current; |
157 | current += som_attempted_store_size; |
158 | |
159 | current = ROUNDUP_PTR(current, alignof(struct fatbit *)); |
160 | s->delay_slots = (struct fatbit **)current; |
161 | current += sizeof(struct fatbit *) * DELAY_SLOT_COUNT; |
162 | current = ROUNDUP_PTR(current, alignof(struct fatbit)); |
163 | for (u32 i = 0; i < DELAY_SLOT_COUNT; i++) { |
164 | s->delay_slots[i] = (struct fatbit *)current; |
165 | assert(ISALIGNED(s->delay_slots[i])); |
166 | current += proto->delay_fatbit_size; |
167 | } |
168 | |
169 | current = ROUNDUP_PTR(current, alignof(struct fatbit *)); |
170 | s->al_log = (struct fatbit **)current; |
171 | current += sizeof(struct fatbit *) * anchored_literal_region_len; |
172 | current = ROUNDUP_PTR(current, alignof(struct fatbit)); |
173 | for (u32 i = 0; i < anchored_literal_region_len; i++) { |
174 | s->al_log[i] = (struct fatbit *)current; |
175 | assert(ISALIGNED(s->al_log[i])); |
176 | current += anchored_literal_fatbit_size; |
177 | } |
178 | |
179 | current = ROUNDUP_PTR(current, 8); |
180 | s->catchup_pq.qm = (struct queue_match *)current; |
181 | current += qmpq_size; |
182 | |
183 | s->bstate = (char *)current; |
184 | s->bStateSize = bStateSize; |
185 | current += bStateSize; |
186 | |
187 | s->tstate = (char *)current; |
188 | s->tStateSize = tStateSize; |
189 | current += tStateSize; |
190 | |
191 | current = ROUNDUP_PTR(current, 64); |
192 | |
193 | assert(ISALIGNED_N(current, 8)); |
194 | s->deduper.som_start_log[0] = (u64a *)current; |
195 | current += sizeof(u64a) * deduperCount; |
196 | |
197 | s->deduper.som_start_log[1] = (u64a *)current; |
198 | current += sizeof(u64a) * deduperCount; |
199 | |
200 | assert(ISALIGNED_N(current, 8)); |
201 | s->aqa = (struct fatbit *)current; |
202 | current += activeQueueArraySize; |
203 | |
204 | s->handled_roles = (struct fatbit *)current; |
205 | current += proto->handledKeyFatbitSize; |
206 | |
207 | s->deduper.log[0] = (struct fatbit *)current; |
208 | current += deduperLogSize; |
209 | |
210 | s->deduper.log[1] = (struct fatbit *)current; |
211 | current += deduperLogSize; |
212 | |
213 | s->deduper.som_log[0] = (struct fatbit *)current; |
214 | current += deduperLogSize; |
215 | |
216 | s->deduper.som_log[1] = (struct fatbit *)current; |
217 | current += deduperLogSize; |
218 | |
219 | s->som_set_now = (struct fatbit *)current; |
220 | current += som_now_size; |
221 | |
222 | s->som_attempted_set = (struct fatbit *)current; |
223 | current += som_attempted_size; |
224 | |
225 | current = ROUNDUP_PTR(current, 64); |
226 | assert(ISALIGNED_CL(current)); |
227 | s->fullState = (char *)current; |
228 | s->fullStateSize = fullStateSize; |
229 | current += fullStateSize; |
230 | |
231 | *scratch = s; |
232 | |
233 | // Don't get too big for your boots |
234 | assert((size_t)(current - (char *)s) <= alloc_size); |
235 | |
236 | // Init q->scratch ptr for every queue. |
237 | for (struct mq *qi = s->queues; qi != s->queues + queueCount; ++qi) { |
238 | qi->scratch = s; |
239 | } |
240 | |
241 | return HS_SUCCESS; |
242 | } |
243 | |
244 | HS_PUBLIC_API |
245 | hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, |
246 | hs_scratch_t **scratch) { |
247 | if (!db || !scratch) { |
248 | return HS_INVALID; |
249 | } |
250 | |
251 | /* We need to do some real sanity checks on the database as some users mmap |
252 | * in old deserialised databases, so this is the first real opportunity we |
253 | * have to make sure it is sane. |
254 | */ |
255 | hs_error_t rv = dbIsValid(db); |
256 | if (rv != HS_SUCCESS) { |
257 | return rv; |
258 | } |
259 | |
260 | /* We can also sanity-check the scratch parameter: if it points to an |
261 | * existing scratch area, that scratch should have valid magic bits. */ |
262 | if (*scratch != NULL) { |
263 | /* has to be aligned before we can do anything with it */ |
264 | if (!ISALIGNED_CL(*scratch)) { |
265 | return HS_INVALID; |
266 | } |
267 | if ((*scratch)->magic != SCRATCH_MAGIC) { |
268 | return HS_INVALID; |
269 | } |
270 | if (markScratchInUse(*scratch)) { |
271 | return HS_SCRATCH_IN_USE; |
272 | } |
273 | } |
274 | |
275 | const struct RoseEngine *rose = hs_get_bytecode(db); |
276 | int resize = 0; |
277 | |
278 | hs_scratch_t *proto; |
279 | hs_scratch_t *proto_tmp = hs_scratch_alloc(sizeof(struct hs_scratch) + 256); |
280 | hs_error_t proto_ret = hs_check_alloc(proto_tmp); |
281 | if (proto_ret != HS_SUCCESS) { |
282 | hs_scratch_free(proto_tmp); |
283 | hs_scratch_free(*scratch); |
284 | *scratch = NULL; |
285 | return proto_ret; |
286 | } |
287 | |
288 | proto = ROUNDUP_PTR(proto_tmp, 64); |
289 | |
290 | if (*scratch) { |
291 | *proto = **scratch; |
292 | } else { |
293 | memset(proto, 0, sizeof(*proto)); |
294 | resize = 1; |
295 | } |
296 | proto->scratch_alloc = (char *)proto_tmp; |
297 | |
298 | if (rose->anchoredDistance > proto->anchored_literal_region_len) { |
299 | resize = 1; |
300 | proto->anchored_literal_region_len = rose->anchoredDistance; |
301 | } |
302 | |
303 | if (rose->anchored_fatbit_size > proto->anchored_literal_fatbit_size) { |
304 | resize = 1; |
305 | proto->anchored_literal_fatbit_size = rose->anchored_fatbit_size; |
306 | } |
307 | |
308 | if (rose->delay_fatbit_size > proto->delay_fatbit_size) { |
309 | resize = 1; |
310 | proto->delay_fatbit_size = rose->delay_fatbit_size; |
311 | } |
312 | |
313 | if (rose->handledKeyFatbitSize > proto->handledKeyFatbitSize) { |
314 | resize = 1; |
315 | proto->handledKeyFatbitSize = rose->handledKeyFatbitSize; |
316 | } |
317 | |
318 | if (rose->tStateSize > proto->tStateSize) { |
319 | resize = 1; |
320 | proto->tStateSize = rose->tStateSize; |
321 | } |
322 | |
323 | u32 som_store_count = rose->somLocationCount; |
324 | if (som_store_count > proto->som_store_count) { |
325 | resize = 1; |
326 | proto->som_store_count = som_store_count; |
327 | } |
328 | |
329 | if (rose->somLocationFatbitSize > proto->som_fatbit_size) { |
330 | resize = 1; |
331 | proto->som_fatbit_size = rose->somLocationFatbitSize; |
332 | } |
333 | |
334 | u32 queueCount = rose->queueCount; |
335 | if (queueCount > proto->queueCount) { |
336 | resize = 1; |
337 | proto->queueCount = queueCount; |
338 | } |
339 | |
340 | if (rose->activeQueueArraySize > proto->activeQueueArraySize) { |
341 | resize = 1; |
342 | proto->activeQueueArraySize = rose->activeQueueArraySize; |
343 | } |
344 | |
345 | u32 bStateSize = 0; |
346 | if (rose->mode == HS_MODE_BLOCK) { |
347 | bStateSize = rose->stateOffsets.end; |
348 | } else if (rose->mode == HS_MODE_VECTORED) { |
349 | /* vectoring database require a full stream state (inc header) */ |
350 | bStateSize = sizeof(struct hs_stream) + rose->stateOffsets.end; |
351 | } |
352 | |
353 | if (bStateSize > proto->bStateSize) { |
354 | resize = 1; |
355 | proto->bStateSize = bStateSize; |
356 | } |
357 | |
358 | u32 fullStateSize = rose->scratchStateSize; |
359 | if (fullStateSize > proto->fullStateSize) { |
360 | resize = 1; |
361 | proto->fullStateSize = fullStateSize; |
362 | } |
363 | |
364 | if (rose->dkeyCount > proto->deduper.dkey_count) { |
365 | resize = 1; |
366 | proto->deduper.dkey_count = rose->dkeyCount; |
367 | proto->deduper.log_size = rose->dkeyLogSize; |
368 | } |
369 | |
370 | if (resize) { |
371 | if (*scratch) { |
372 | hs_scratch_free((*scratch)->scratch_alloc); |
373 | } |
374 | |
375 | hs_error_t alloc_ret = alloc_scratch(proto, scratch); |
376 | hs_scratch_free(proto_tmp); /* kill off temp used for sizing */ |
377 | if (alloc_ret != HS_SUCCESS) { |
378 | *scratch = NULL; |
379 | return alloc_ret; |
380 | } |
381 | } else { |
382 | hs_scratch_free(proto_tmp); /* kill off temp used for sizing */ |
383 | unmarkScratchInUse(*scratch); |
384 | } |
385 | |
386 | assert(!(*scratch)->in_use); |
387 | return HS_SUCCESS; |
388 | } |
389 | |
390 | HS_PUBLIC_API |
391 | hs_error_t HS_CDECL hs_clone_scratch(const hs_scratch_t *src, |
392 | hs_scratch_t **dest) { |
393 | if (!dest || !src || !ISALIGNED_CL(src) || src->magic != SCRATCH_MAGIC) { |
394 | return HS_INVALID; |
395 | } |
396 | |
397 | *dest = NULL; |
398 | hs_error_t ret = alloc_scratch(src, dest); |
399 | if (ret != HS_SUCCESS) { |
400 | *dest = NULL; |
401 | return ret; |
402 | } |
403 | |
404 | assert(!(*dest)->in_use); |
405 | return HS_SUCCESS; |
406 | } |
407 | |
408 | HS_PUBLIC_API |
409 | hs_error_t HS_CDECL hs_free_scratch(hs_scratch_t *scratch) { |
410 | if (scratch) { |
411 | /* has to be aligned before we can do anything with it */ |
412 | if (!ISALIGNED_CL(scratch)) { |
413 | return HS_INVALID; |
414 | } |
415 | if (scratch->magic != SCRATCH_MAGIC) { |
416 | return HS_INVALID; |
417 | } |
418 | if (markScratchInUse(scratch)) { |
419 | return HS_SCRATCH_IN_USE; |
420 | } |
421 | |
422 | scratch->magic = 0; |
423 | assert(scratch->scratch_alloc); |
424 | DEBUG_PRINTF("scratch %p is really at %p : freeing\n" , scratch, |
425 | scratch->scratch_alloc); |
426 | hs_scratch_free(scratch->scratch_alloc); |
427 | } |
428 | |
429 | return HS_SUCCESS; |
430 | } |
431 | |
432 | HS_PUBLIC_API |
433 | hs_error_t HS_CDECL hs_scratch_size(const hs_scratch_t *scratch, size_t *size) { |
434 | if (!size || !scratch || !ISALIGNED_CL(scratch) || |
435 | scratch->magic != SCRATCH_MAGIC) { |
436 | return HS_INVALID; |
437 | } |
438 | |
439 | *size = scratch->scratchSize; |
440 | |
441 | return HS_SUCCESS; |
442 | } |
443 | |