| 1 | /* |
| 2 | * Copyright (c) 2015-2019, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | /** \file |
| 30 | * \brief Functions for allocating and manipulating scratch space. |
| 31 | */ |
| 32 | |
| 33 | #include <stdlib.h> |
| 34 | #include <string.h> |
| 35 | |
| 36 | #include "allocator.h" |
| 37 | #include "hs_internal.h" |
| 38 | #include "hs_runtime.h" |
| 39 | #include "scratch.h" |
| 40 | #include "state.h" |
| 41 | #include "ue2common.h" |
| 42 | #include "database.h" |
| 43 | #include "nfa/nfa_api_queue.h" |
| 44 | #include "rose/rose_internal.h" |
| 45 | #include "util/fatbit.h" |
| 46 | |
| 47 | /** |
| 48 | * Determine the space required for a correctly aligned array of fatbit |
| 49 | * structure, laid out as: |
| 50 | * |
| 51 | * - an array of num_entries pointers, each to a fatbit. |
| 52 | * - an array of fatbit structures, each of size fatbit_len. |
| 53 | * |
| 54 | * fatbit_len should have been determined at compile time, via the |
| 55 | * fatbit_size() call. |
| 56 | */ |
| 57 | static |
| 58 | size_t fatbit_array_size(u32 num_entries, u32 fatbit_len) { |
| 59 | size_t len = 0; |
| 60 | |
| 61 | // Array of pointers to each fatbit entry. |
| 62 | len += sizeof(struct fatbit *) * num_entries; |
| 63 | |
| 64 | // Fatbit entries themselves. |
| 65 | len = ROUNDUP_N(len, alignof(struct fatbit)); |
| 66 | len += (size_t)fatbit_len * num_entries; |
| 67 | |
| 68 | return ROUNDUP_N(len, 8); // Round up for potential padding. |
| 69 | } |
| 70 | |
| 71 | /** Used by hs_alloc_scratch and hs_clone_scratch to allocate a complete |
| 72 | * scratch region from a prototype structure. */ |
| 73 | static |
| 74 | hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { |
| 75 | u32 queueCount = proto->queueCount; |
| 76 | u32 activeQueueArraySize = proto->activeQueueArraySize; |
| 77 | u32 deduperCount = proto->deduper.dkey_count; |
| 78 | u32 deduperLogSize = proto->deduper.log_size; |
| 79 | u32 bStateSize = proto->bStateSize; |
| 80 | u32 tStateSize = proto->tStateSize; |
| 81 | u32 fullStateSize = proto->fullStateSize; |
| 82 | u32 anchored_literal_region_len = proto->anchored_literal_region_len; |
| 83 | u32 anchored_literal_fatbit_size = proto->anchored_literal_fatbit_size; |
| 84 | |
| 85 | u32 som_store_size = proto->som_store_count * sizeof(u64a); |
| 86 | u32 som_attempted_store_size = proto->som_store_count * sizeof(u64a); |
| 87 | u32 som_now_size = proto->som_fatbit_size; |
| 88 | u32 som_attempted_size = proto->som_fatbit_size; |
| 89 | |
| 90 | struct hs_scratch *s; |
| 91 | struct hs_scratch *s_tmp; |
| 92 | size_t queue_size = queueCount * sizeof(struct mq); |
| 93 | size_t qmpq_size = queueCount * sizeof(struct queue_match); |
| 94 | |
| 95 | assert(anchored_literal_region_len < 8 * sizeof(s->al_log_sum)); |
| 96 | |
| 97 | size_t anchored_literal_region_size = fatbit_array_size( |
| 98 | anchored_literal_region_len, proto->anchored_literal_fatbit_size); |
| 99 | size_t delay_region_size = |
| 100 | fatbit_array_size(DELAY_SLOT_COUNT, proto->delay_fatbit_size); |
| 101 | |
| 102 | // the size is all the allocated stuff, not including the struct itself |
| 103 | size_t size = queue_size + 63 |
| 104 | + bStateSize + tStateSize |
| 105 | + fullStateSize + 63 /* cacheline padding */ |
| 106 | + proto->handledKeyFatbitSize /* handled roles */ |
| 107 | + activeQueueArraySize /* active queue array */ |
| 108 | + 2 * deduperLogSize /* need odd and even logs */ |
| 109 | + 2 * deduperLogSize /* ditto som logs */ |
| 110 | + 2 * sizeof(u64a) * deduperCount /* start offsets for som */ |
| 111 | + anchored_literal_region_size + qmpq_size |
| 112 | + delay_region_size |
| 113 | + som_store_size |
| 114 | + som_now_size |
| 115 | + som_attempted_size |
| 116 | + som_attempted_store_size + 15; |
| 117 | |
| 118 | /* the struct plus the allocated stuff plus padding for cacheline |
| 119 | * alignment */ |
| 120 | const size_t alloc_size = sizeof(struct hs_scratch) + size + 256; |
| 121 | s_tmp = hs_scratch_alloc(alloc_size); |
| 122 | hs_error_t err = hs_check_alloc(s_tmp); |
| 123 | if (err != HS_SUCCESS) { |
| 124 | hs_scratch_free(s_tmp); |
| 125 | *scratch = NULL; |
| 126 | return err; |
| 127 | } |
| 128 | |
| 129 | memset(s_tmp, 0, alloc_size); |
| 130 | s = ROUNDUP_PTR(s_tmp, 64); |
| 131 | DEBUG_PRINTF("allocated %zu bytes at %p but realigning to %p\n" , alloc_size, s_tmp, s); |
| 132 | DEBUG_PRINTF("sizeof %zu\n" , sizeof(struct hs_scratch)); |
| 133 | *s = *proto; |
| 134 | |
| 135 | s->magic = SCRATCH_MAGIC; |
| 136 | s->in_use = 0; |
| 137 | s->scratchSize = alloc_size; |
| 138 | s->scratch_alloc = (char *)s_tmp; |
| 139 | s->fdr_conf = NULL; |
| 140 | s->pure = 0; |
| 141 | |
| 142 | // each of these is at an offset from the previous |
| 143 | char *current = (char *)s + sizeof(*s); |
| 144 | |
| 145 | // align current so that the following arrays are naturally aligned: this |
| 146 | // is accounted for in the padding allocated |
| 147 | current = ROUNDUP_PTR(current, 8); |
| 148 | |
| 149 | s->queues = (struct mq *)current; |
| 150 | current += queue_size; |
| 151 | |
| 152 | assert(ISALIGNED_N(current, 8)); |
| 153 | s->som_store = (u64a *)current; |
| 154 | current += som_store_size; |
| 155 | |
| 156 | s->som_attempted_store = (u64a *)current; |
| 157 | current += som_attempted_store_size; |
| 158 | |
| 159 | current = ROUNDUP_PTR(current, alignof(struct fatbit *)); |
| 160 | s->delay_slots = (struct fatbit **)current; |
| 161 | current += sizeof(struct fatbit *) * DELAY_SLOT_COUNT; |
| 162 | current = ROUNDUP_PTR(current, alignof(struct fatbit)); |
| 163 | for (u32 i = 0; i < DELAY_SLOT_COUNT; i++) { |
| 164 | s->delay_slots[i] = (struct fatbit *)current; |
| 165 | assert(ISALIGNED(s->delay_slots[i])); |
| 166 | current += proto->delay_fatbit_size; |
| 167 | } |
| 168 | |
| 169 | current = ROUNDUP_PTR(current, alignof(struct fatbit *)); |
| 170 | s->al_log = (struct fatbit **)current; |
| 171 | current += sizeof(struct fatbit *) * anchored_literal_region_len; |
| 172 | current = ROUNDUP_PTR(current, alignof(struct fatbit)); |
| 173 | for (u32 i = 0; i < anchored_literal_region_len; i++) { |
| 174 | s->al_log[i] = (struct fatbit *)current; |
| 175 | assert(ISALIGNED(s->al_log[i])); |
| 176 | current += anchored_literal_fatbit_size; |
| 177 | } |
| 178 | |
| 179 | current = ROUNDUP_PTR(current, 8); |
| 180 | s->catchup_pq.qm = (struct queue_match *)current; |
| 181 | current += qmpq_size; |
| 182 | |
| 183 | s->bstate = (char *)current; |
| 184 | s->bStateSize = bStateSize; |
| 185 | current += bStateSize; |
| 186 | |
| 187 | s->tstate = (char *)current; |
| 188 | s->tStateSize = tStateSize; |
| 189 | current += tStateSize; |
| 190 | |
| 191 | current = ROUNDUP_PTR(current, 64); |
| 192 | |
| 193 | assert(ISALIGNED_N(current, 8)); |
| 194 | s->deduper.som_start_log[0] = (u64a *)current; |
| 195 | current += sizeof(u64a) * deduperCount; |
| 196 | |
| 197 | s->deduper.som_start_log[1] = (u64a *)current; |
| 198 | current += sizeof(u64a) * deduperCount; |
| 199 | |
| 200 | assert(ISALIGNED_N(current, 8)); |
| 201 | s->aqa = (struct fatbit *)current; |
| 202 | current += activeQueueArraySize; |
| 203 | |
| 204 | s->handled_roles = (struct fatbit *)current; |
| 205 | current += proto->handledKeyFatbitSize; |
| 206 | |
| 207 | s->deduper.log[0] = (struct fatbit *)current; |
| 208 | current += deduperLogSize; |
| 209 | |
| 210 | s->deduper.log[1] = (struct fatbit *)current; |
| 211 | current += deduperLogSize; |
| 212 | |
| 213 | s->deduper.som_log[0] = (struct fatbit *)current; |
| 214 | current += deduperLogSize; |
| 215 | |
| 216 | s->deduper.som_log[1] = (struct fatbit *)current; |
| 217 | current += deduperLogSize; |
| 218 | |
| 219 | s->som_set_now = (struct fatbit *)current; |
| 220 | current += som_now_size; |
| 221 | |
| 222 | s->som_attempted_set = (struct fatbit *)current; |
| 223 | current += som_attempted_size; |
| 224 | |
| 225 | current = ROUNDUP_PTR(current, 64); |
| 226 | assert(ISALIGNED_CL(current)); |
| 227 | s->fullState = (char *)current; |
| 228 | s->fullStateSize = fullStateSize; |
| 229 | current += fullStateSize; |
| 230 | |
| 231 | *scratch = s; |
| 232 | |
| 233 | // Don't get too big for your boots |
| 234 | assert((size_t)(current - (char *)s) <= alloc_size); |
| 235 | |
| 236 | // Init q->scratch ptr for every queue. |
| 237 | for (struct mq *qi = s->queues; qi != s->queues + queueCount; ++qi) { |
| 238 | qi->scratch = s; |
| 239 | } |
| 240 | |
| 241 | return HS_SUCCESS; |
| 242 | } |
| 243 | |
| 244 | HS_PUBLIC_API |
| 245 | hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, |
| 246 | hs_scratch_t **scratch) { |
| 247 | if (!db || !scratch) { |
| 248 | return HS_INVALID; |
| 249 | } |
| 250 | |
| 251 | /* We need to do some real sanity checks on the database as some users mmap |
| 252 | * in old deserialised databases, so this is the first real opportunity we |
| 253 | * have to make sure it is sane. |
| 254 | */ |
| 255 | hs_error_t rv = dbIsValid(db); |
| 256 | if (rv != HS_SUCCESS) { |
| 257 | return rv; |
| 258 | } |
| 259 | |
| 260 | /* We can also sanity-check the scratch parameter: if it points to an |
| 261 | * existing scratch area, that scratch should have valid magic bits. */ |
| 262 | if (*scratch != NULL) { |
| 263 | /* has to be aligned before we can do anything with it */ |
| 264 | if (!ISALIGNED_CL(*scratch)) { |
| 265 | return HS_INVALID; |
| 266 | } |
| 267 | if ((*scratch)->magic != SCRATCH_MAGIC) { |
| 268 | return HS_INVALID; |
| 269 | } |
| 270 | if (markScratchInUse(*scratch)) { |
| 271 | return HS_SCRATCH_IN_USE; |
| 272 | } |
| 273 | } |
| 274 | |
| 275 | const struct RoseEngine *rose = hs_get_bytecode(db); |
| 276 | int resize = 0; |
| 277 | |
| 278 | hs_scratch_t *proto; |
| 279 | hs_scratch_t *proto_tmp = hs_scratch_alloc(sizeof(struct hs_scratch) + 256); |
| 280 | hs_error_t proto_ret = hs_check_alloc(proto_tmp); |
| 281 | if (proto_ret != HS_SUCCESS) { |
| 282 | hs_scratch_free(proto_tmp); |
| 283 | hs_scratch_free(*scratch); |
| 284 | *scratch = NULL; |
| 285 | return proto_ret; |
| 286 | } |
| 287 | |
| 288 | proto = ROUNDUP_PTR(proto_tmp, 64); |
| 289 | |
| 290 | if (*scratch) { |
| 291 | *proto = **scratch; |
| 292 | } else { |
| 293 | memset(proto, 0, sizeof(*proto)); |
| 294 | resize = 1; |
| 295 | } |
| 296 | proto->scratch_alloc = (char *)proto_tmp; |
| 297 | |
| 298 | if (rose->anchoredDistance > proto->anchored_literal_region_len) { |
| 299 | resize = 1; |
| 300 | proto->anchored_literal_region_len = rose->anchoredDistance; |
| 301 | } |
| 302 | |
| 303 | if (rose->anchored_fatbit_size > proto->anchored_literal_fatbit_size) { |
| 304 | resize = 1; |
| 305 | proto->anchored_literal_fatbit_size = rose->anchored_fatbit_size; |
| 306 | } |
| 307 | |
| 308 | if (rose->delay_fatbit_size > proto->delay_fatbit_size) { |
| 309 | resize = 1; |
| 310 | proto->delay_fatbit_size = rose->delay_fatbit_size; |
| 311 | } |
| 312 | |
| 313 | if (rose->handledKeyFatbitSize > proto->handledKeyFatbitSize) { |
| 314 | resize = 1; |
| 315 | proto->handledKeyFatbitSize = rose->handledKeyFatbitSize; |
| 316 | } |
| 317 | |
| 318 | if (rose->tStateSize > proto->tStateSize) { |
| 319 | resize = 1; |
| 320 | proto->tStateSize = rose->tStateSize; |
| 321 | } |
| 322 | |
| 323 | u32 som_store_count = rose->somLocationCount; |
| 324 | if (som_store_count > proto->som_store_count) { |
| 325 | resize = 1; |
| 326 | proto->som_store_count = som_store_count; |
| 327 | } |
| 328 | |
| 329 | if (rose->somLocationFatbitSize > proto->som_fatbit_size) { |
| 330 | resize = 1; |
| 331 | proto->som_fatbit_size = rose->somLocationFatbitSize; |
| 332 | } |
| 333 | |
| 334 | u32 queueCount = rose->queueCount; |
| 335 | if (queueCount > proto->queueCount) { |
| 336 | resize = 1; |
| 337 | proto->queueCount = queueCount; |
| 338 | } |
| 339 | |
| 340 | if (rose->activeQueueArraySize > proto->activeQueueArraySize) { |
| 341 | resize = 1; |
| 342 | proto->activeQueueArraySize = rose->activeQueueArraySize; |
| 343 | } |
| 344 | |
| 345 | u32 bStateSize = 0; |
| 346 | if (rose->mode == HS_MODE_BLOCK) { |
| 347 | bStateSize = rose->stateOffsets.end; |
| 348 | } else if (rose->mode == HS_MODE_VECTORED) { |
| 349 | /* vectoring database require a full stream state (inc header) */ |
| 350 | bStateSize = sizeof(struct hs_stream) + rose->stateOffsets.end; |
| 351 | } |
| 352 | |
| 353 | if (bStateSize > proto->bStateSize) { |
| 354 | resize = 1; |
| 355 | proto->bStateSize = bStateSize; |
| 356 | } |
| 357 | |
| 358 | u32 fullStateSize = rose->scratchStateSize; |
| 359 | if (fullStateSize > proto->fullStateSize) { |
| 360 | resize = 1; |
| 361 | proto->fullStateSize = fullStateSize; |
| 362 | } |
| 363 | |
| 364 | if (rose->dkeyCount > proto->deduper.dkey_count) { |
| 365 | resize = 1; |
| 366 | proto->deduper.dkey_count = rose->dkeyCount; |
| 367 | proto->deduper.log_size = rose->dkeyLogSize; |
| 368 | } |
| 369 | |
| 370 | if (resize) { |
| 371 | if (*scratch) { |
| 372 | hs_scratch_free((*scratch)->scratch_alloc); |
| 373 | } |
| 374 | |
| 375 | hs_error_t alloc_ret = alloc_scratch(proto, scratch); |
| 376 | hs_scratch_free(proto_tmp); /* kill off temp used for sizing */ |
| 377 | if (alloc_ret != HS_SUCCESS) { |
| 378 | *scratch = NULL; |
| 379 | return alloc_ret; |
| 380 | } |
| 381 | } else { |
| 382 | hs_scratch_free(proto_tmp); /* kill off temp used for sizing */ |
| 383 | unmarkScratchInUse(*scratch); |
| 384 | } |
| 385 | |
| 386 | assert(!(*scratch)->in_use); |
| 387 | return HS_SUCCESS; |
| 388 | } |
| 389 | |
| 390 | HS_PUBLIC_API |
| 391 | hs_error_t HS_CDECL hs_clone_scratch(const hs_scratch_t *src, |
| 392 | hs_scratch_t **dest) { |
| 393 | if (!dest || !src || !ISALIGNED_CL(src) || src->magic != SCRATCH_MAGIC) { |
| 394 | return HS_INVALID; |
| 395 | } |
| 396 | |
| 397 | *dest = NULL; |
| 398 | hs_error_t ret = alloc_scratch(src, dest); |
| 399 | if (ret != HS_SUCCESS) { |
| 400 | *dest = NULL; |
| 401 | return ret; |
| 402 | } |
| 403 | |
| 404 | assert(!(*dest)->in_use); |
| 405 | return HS_SUCCESS; |
| 406 | } |
| 407 | |
| 408 | HS_PUBLIC_API |
| 409 | hs_error_t HS_CDECL hs_free_scratch(hs_scratch_t *scratch) { |
| 410 | if (scratch) { |
| 411 | /* has to be aligned before we can do anything with it */ |
| 412 | if (!ISALIGNED_CL(scratch)) { |
| 413 | return HS_INVALID; |
| 414 | } |
| 415 | if (scratch->magic != SCRATCH_MAGIC) { |
| 416 | return HS_INVALID; |
| 417 | } |
| 418 | if (markScratchInUse(scratch)) { |
| 419 | return HS_SCRATCH_IN_USE; |
| 420 | } |
| 421 | |
| 422 | scratch->magic = 0; |
| 423 | assert(scratch->scratch_alloc); |
| 424 | DEBUG_PRINTF("scratch %p is really at %p : freeing\n" , scratch, |
| 425 | scratch->scratch_alloc); |
| 426 | hs_scratch_free(scratch->scratch_alloc); |
| 427 | } |
| 428 | |
| 429 | return HS_SUCCESS; |
| 430 | } |
| 431 | |
| 432 | HS_PUBLIC_API |
| 433 | hs_error_t HS_CDECL hs_scratch_size(const hs_scratch_t *scratch, size_t *size) { |
| 434 | if (!size || !scratch || !ISALIGNED_CL(scratch) || |
| 435 | scratch->magic != SCRATCH_MAGIC) { |
| 436 | return HS_INVALID; |
| 437 | } |
| 438 | |
| 439 | *size = scratch->scratchSize; |
| 440 | |
| 441 | return HS_SUCCESS; |
| 442 | } |
| 443 | |