1/*
2 * Copyright (c) 2015-2019, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Functions for allocating and manipulating scratch space.
31 */
32
33#include <stdlib.h>
34#include <string.h>
35
36#include "allocator.h"
37#include "hs_internal.h"
38#include "hs_runtime.h"
39#include "scratch.h"
40#include "state.h"
41#include "ue2common.h"
42#include "database.h"
43#include "nfa/nfa_api_queue.h"
44#include "rose/rose_internal.h"
45#include "util/fatbit.h"
46
47/**
48 * Determine the space required for a correctly aligned array of fatbit
49 * structure, laid out as:
50 *
51 * - an array of num_entries pointers, each to a fatbit.
52 * - an array of fatbit structures, each of size fatbit_len.
53 *
54 * fatbit_len should have been determined at compile time, via the
55 * fatbit_size() call.
56 */
57static
58size_t fatbit_array_size(u32 num_entries, u32 fatbit_len) {
59 size_t len = 0;
60
61 // Array of pointers to each fatbit entry.
62 len += sizeof(struct fatbit *) * num_entries;
63
64 // Fatbit entries themselves.
65 len = ROUNDUP_N(len, alignof(struct fatbit));
66 len += (size_t)fatbit_len * num_entries;
67
68 return ROUNDUP_N(len, 8); // Round up for potential padding.
69}
70
71/** Used by hs_alloc_scratch and hs_clone_scratch to allocate a complete
72 * scratch region from a prototype structure. */
73static
74hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
75 u32 queueCount = proto->queueCount;
76 u32 activeQueueArraySize = proto->activeQueueArraySize;
77 u32 deduperCount = proto->deduper.dkey_count;
78 u32 deduperLogSize = proto->deduper.log_size;
79 u32 bStateSize = proto->bStateSize;
80 u32 tStateSize = proto->tStateSize;
81 u32 fullStateSize = proto->fullStateSize;
82 u32 anchored_literal_region_len = proto->anchored_literal_region_len;
83 u32 anchored_literal_fatbit_size = proto->anchored_literal_fatbit_size;
84
85 u32 som_store_size = proto->som_store_count * sizeof(u64a);
86 u32 som_attempted_store_size = proto->som_store_count * sizeof(u64a);
87 u32 som_now_size = proto->som_fatbit_size;
88 u32 som_attempted_size = proto->som_fatbit_size;
89
90 struct hs_scratch *s;
91 struct hs_scratch *s_tmp;
92 size_t queue_size = queueCount * sizeof(struct mq);
93 size_t qmpq_size = queueCount * sizeof(struct queue_match);
94
95 assert(anchored_literal_region_len < 8 * sizeof(s->al_log_sum));
96
97 size_t anchored_literal_region_size = fatbit_array_size(
98 anchored_literal_region_len, proto->anchored_literal_fatbit_size);
99 size_t delay_region_size =
100 fatbit_array_size(DELAY_SLOT_COUNT, proto->delay_fatbit_size);
101
102 // the size is all the allocated stuff, not including the struct itself
103 size_t size = queue_size + 63
104 + bStateSize + tStateSize
105 + fullStateSize + 63 /* cacheline padding */
106 + proto->handledKeyFatbitSize /* handled roles */
107 + activeQueueArraySize /* active queue array */
108 + 2 * deduperLogSize /* need odd and even logs */
109 + 2 * deduperLogSize /* ditto som logs */
110 + 2 * sizeof(u64a) * deduperCount /* start offsets for som */
111 + anchored_literal_region_size + qmpq_size
112 + delay_region_size
113 + som_store_size
114 + som_now_size
115 + som_attempted_size
116 + som_attempted_store_size + 15;
117
118 /* the struct plus the allocated stuff plus padding for cacheline
119 * alignment */
120 const size_t alloc_size = sizeof(struct hs_scratch) + size + 256;
121 s_tmp = hs_scratch_alloc(alloc_size);
122 hs_error_t err = hs_check_alloc(s_tmp);
123 if (err != HS_SUCCESS) {
124 hs_scratch_free(s_tmp);
125 *scratch = NULL;
126 return err;
127 }
128
129 memset(s_tmp, 0, alloc_size);
130 s = ROUNDUP_PTR(s_tmp, 64);
131 DEBUG_PRINTF("allocated %zu bytes at %p but realigning to %p\n", alloc_size, s_tmp, s);
132 DEBUG_PRINTF("sizeof %zu\n", sizeof(struct hs_scratch));
133 *s = *proto;
134
135 s->magic = SCRATCH_MAGIC;
136 s->in_use = 0;
137 s->scratchSize = alloc_size;
138 s->scratch_alloc = (char *)s_tmp;
139 s->fdr_conf = NULL;
140 s->pure = 0;
141
142 // each of these is at an offset from the previous
143 char *current = (char *)s + sizeof(*s);
144
145 // align current so that the following arrays are naturally aligned: this
146 // is accounted for in the padding allocated
147 current = ROUNDUP_PTR(current, 8);
148
149 s->queues = (struct mq *)current;
150 current += queue_size;
151
152 assert(ISALIGNED_N(current, 8));
153 s->som_store = (u64a *)current;
154 current += som_store_size;
155
156 s->som_attempted_store = (u64a *)current;
157 current += som_attempted_store_size;
158
159 current = ROUNDUP_PTR(current, alignof(struct fatbit *));
160 s->delay_slots = (struct fatbit **)current;
161 current += sizeof(struct fatbit *) * DELAY_SLOT_COUNT;
162 current = ROUNDUP_PTR(current, alignof(struct fatbit));
163 for (u32 i = 0; i < DELAY_SLOT_COUNT; i++) {
164 s->delay_slots[i] = (struct fatbit *)current;
165 assert(ISALIGNED(s->delay_slots[i]));
166 current += proto->delay_fatbit_size;
167 }
168
169 current = ROUNDUP_PTR(current, alignof(struct fatbit *));
170 s->al_log = (struct fatbit **)current;
171 current += sizeof(struct fatbit *) * anchored_literal_region_len;
172 current = ROUNDUP_PTR(current, alignof(struct fatbit));
173 for (u32 i = 0; i < anchored_literal_region_len; i++) {
174 s->al_log[i] = (struct fatbit *)current;
175 assert(ISALIGNED(s->al_log[i]));
176 current += anchored_literal_fatbit_size;
177 }
178
179 current = ROUNDUP_PTR(current, 8);
180 s->catchup_pq.qm = (struct queue_match *)current;
181 current += qmpq_size;
182
183 s->bstate = (char *)current;
184 s->bStateSize = bStateSize;
185 current += bStateSize;
186
187 s->tstate = (char *)current;
188 s->tStateSize = tStateSize;
189 current += tStateSize;
190
191 current = ROUNDUP_PTR(current, 64);
192
193 assert(ISALIGNED_N(current, 8));
194 s->deduper.som_start_log[0] = (u64a *)current;
195 current += sizeof(u64a) * deduperCount;
196
197 s->deduper.som_start_log[1] = (u64a *)current;
198 current += sizeof(u64a) * deduperCount;
199
200 assert(ISALIGNED_N(current, 8));
201 s->aqa = (struct fatbit *)current;
202 current += activeQueueArraySize;
203
204 s->handled_roles = (struct fatbit *)current;
205 current += proto->handledKeyFatbitSize;
206
207 s->deduper.log[0] = (struct fatbit *)current;
208 current += deduperLogSize;
209
210 s->deduper.log[1] = (struct fatbit *)current;
211 current += deduperLogSize;
212
213 s->deduper.som_log[0] = (struct fatbit *)current;
214 current += deduperLogSize;
215
216 s->deduper.som_log[1] = (struct fatbit *)current;
217 current += deduperLogSize;
218
219 s->som_set_now = (struct fatbit *)current;
220 current += som_now_size;
221
222 s->som_attempted_set = (struct fatbit *)current;
223 current += som_attempted_size;
224
225 current = ROUNDUP_PTR(current, 64);
226 assert(ISALIGNED_CL(current));
227 s->fullState = (char *)current;
228 s->fullStateSize = fullStateSize;
229 current += fullStateSize;
230
231 *scratch = s;
232
233 // Don't get too big for your boots
234 assert((size_t)(current - (char *)s) <= alloc_size);
235
236 // Init q->scratch ptr for every queue.
237 for (struct mq *qi = s->queues; qi != s->queues + queueCount; ++qi) {
238 qi->scratch = s;
239 }
240
241 return HS_SUCCESS;
242}
243
244HS_PUBLIC_API
245hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db,
246 hs_scratch_t **scratch) {
247 if (!db || !scratch) {
248 return HS_INVALID;
249 }
250
251 /* We need to do some real sanity checks on the database as some users mmap
252 * in old deserialised databases, so this is the first real opportunity we
253 * have to make sure it is sane.
254 */
255 hs_error_t rv = dbIsValid(db);
256 if (rv != HS_SUCCESS) {
257 return rv;
258 }
259
260 /* We can also sanity-check the scratch parameter: if it points to an
261 * existing scratch area, that scratch should have valid magic bits. */
262 if (*scratch != NULL) {
263 /* has to be aligned before we can do anything with it */
264 if (!ISALIGNED_CL(*scratch)) {
265 return HS_INVALID;
266 }
267 if ((*scratch)->magic != SCRATCH_MAGIC) {
268 return HS_INVALID;
269 }
270 if (markScratchInUse(*scratch)) {
271 return HS_SCRATCH_IN_USE;
272 }
273 }
274
275 const struct RoseEngine *rose = hs_get_bytecode(db);
276 int resize = 0;
277
278 hs_scratch_t *proto;
279 hs_scratch_t *proto_tmp = hs_scratch_alloc(sizeof(struct hs_scratch) + 256);
280 hs_error_t proto_ret = hs_check_alloc(proto_tmp);
281 if (proto_ret != HS_SUCCESS) {
282 hs_scratch_free(proto_tmp);
283 hs_scratch_free(*scratch);
284 *scratch = NULL;
285 return proto_ret;
286 }
287
288 proto = ROUNDUP_PTR(proto_tmp, 64);
289
290 if (*scratch) {
291 *proto = **scratch;
292 } else {
293 memset(proto, 0, sizeof(*proto));
294 resize = 1;
295 }
296 proto->scratch_alloc = (char *)proto_tmp;
297
298 if (rose->anchoredDistance > proto->anchored_literal_region_len) {
299 resize = 1;
300 proto->anchored_literal_region_len = rose->anchoredDistance;
301 }
302
303 if (rose->anchored_fatbit_size > proto->anchored_literal_fatbit_size) {
304 resize = 1;
305 proto->anchored_literal_fatbit_size = rose->anchored_fatbit_size;
306 }
307
308 if (rose->delay_fatbit_size > proto->delay_fatbit_size) {
309 resize = 1;
310 proto->delay_fatbit_size = rose->delay_fatbit_size;
311 }
312
313 if (rose->handledKeyFatbitSize > proto->handledKeyFatbitSize) {
314 resize = 1;
315 proto->handledKeyFatbitSize = rose->handledKeyFatbitSize;
316 }
317
318 if (rose->tStateSize > proto->tStateSize) {
319 resize = 1;
320 proto->tStateSize = rose->tStateSize;
321 }
322
323 u32 som_store_count = rose->somLocationCount;
324 if (som_store_count > proto->som_store_count) {
325 resize = 1;
326 proto->som_store_count = som_store_count;
327 }
328
329 if (rose->somLocationFatbitSize > proto->som_fatbit_size) {
330 resize = 1;
331 proto->som_fatbit_size = rose->somLocationFatbitSize;
332 }
333
334 u32 queueCount = rose->queueCount;
335 if (queueCount > proto->queueCount) {
336 resize = 1;
337 proto->queueCount = queueCount;
338 }
339
340 if (rose->activeQueueArraySize > proto->activeQueueArraySize) {
341 resize = 1;
342 proto->activeQueueArraySize = rose->activeQueueArraySize;
343 }
344
345 u32 bStateSize = 0;
346 if (rose->mode == HS_MODE_BLOCK) {
347 bStateSize = rose->stateOffsets.end;
348 } else if (rose->mode == HS_MODE_VECTORED) {
349 /* vectoring database require a full stream state (inc header) */
350 bStateSize = sizeof(struct hs_stream) + rose->stateOffsets.end;
351 }
352
353 if (bStateSize > proto->bStateSize) {
354 resize = 1;
355 proto->bStateSize = bStateSize;
356 }
357
358 u32 fullStateSize = rose->scratchStateSize;
359 if (fullStateSize > proto->fullStateSize) {
360 resize = 1;
361 proto->fullStateSize = fullStateSize;
362 }
363
364 if (rose->dkeyCount > proto->deduper.dkey_count) {
365 resize = 1;
366 proto->deduper.dkey_count = rose->dkeyCount;
367 proto->deduper.log_size = rose->dkeyLogSize;
368 }
369
370 if (resize) {
371 if (*scratch) {
372 hs_scratch_free((*scratch)->scratch_alloc);
373 }
374
375 hs_error_t alloc_ret = alloc_scratch(proto, scratch);
376 hs_scratch_free(proto_tmp); /* kill off temp used for sizing */
377 if (alloc_ret != HS_SUCCESS) {
378 *scratch = NULL;
379 return alloc_ret;
380 }
381 } else {
382 hs_scratch_free(proto_tmp); /* kill off temp used for sizing */
383 unmarkScratchInUse(*scratch);
384 }
385
386 assert(!(*scratch)->in_use);
387 return HS_SUCCESS;
388}
389
390HS_PUBLIC_API
391hs_error_t HS_CDECL hs_clone_scratch(const hs_scratch_t *src,
392 hs_scratch_t **dest) {
393 if (!dest || !src || !ISALIGNED_CL(src) || src->magic != SCRATCH_MAGIC) {
394 return HS_INVALID;
395 }
396
397 *dest = NULL;
398 hs_error_t ret = alloc_scratch(src, dest);
399 if (ret != HS_SUCCESS) {
400 *dest = NULL;
401 return ret;
402 }
403
404 assert(!(*dest)->in_use);
405 return HS_SUCCESS;
406}
407
408HS_PUBLIC_API
409hs_error_t HS_CDECL hs_free_scratch(hs_scratch_t *scratch) {
410 if (scratch) {
411 /* has to be aligned before we can do anything with it */
412 if (!ISALIGNED_CL(scratch)) {
413 return HS_INVALID;
414 }
415 if (scratch->magic != SCRATCH_MAGIC) {
416 return HS_INVALID;
417 }
418 if (markScratchInUse(scratch)) {
419 return HS_SCRATCH_IN_USE;
420 }
421
422 scratch->magic = 0;
423 assert(scratch->scratch_alloc);
424 DEBUG_PRINTF("scratch %p is really at %p : freeing\n", scratch,
425 scratch->scratch_alloc);
426 hs_scratch_free(scratch->scratch_alloc);
427 }
428
429 return HS_SUCCESS;
430}
431
432HS_PUBLIC_API
433hs_error_t HS_CDECL hs_scratch_size(const hs_scratch_t *scratch, size_t *size) {
434 if (!size || !scratch || !ISALIGNED_CL(scratch) ||
435 scratch->magic != SCRATCH_MAGIC) {
436 return HS_INVALID;
437 }
438
439 *size = scratch->scratchSize;
440
441 return HS_SUCCESS;
442}
443