1/*
2 * Copyright (c) 2016, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#ifndef STREAM_LONG_LIT_H
30#define STREAM_LONG_LIT_H
31
32#include "rose.h"
33#include "rose_common.h"
34#include "rose_internal.h"
35#include "stream_long_lit_hash.h"
36#include "util/compare.h"
37#include "util/copybytes.h"
38
39static really_inline
40const struct RoseLongLitHashEntry *
41getHashTableBase(const struct RoseLongLitTable *ll_table,
42 const struct RoseLongLitSubtable *ll_sub) {
43 assert(ll_sub->hashOffset);
44 return (const struct RoseLongLitHashEntry *)((const char *)ll_table +
45 ll_sub->hashOffset);
46}
47
48// Reads from stream state and unpacks values into stream state table.
49static really_inline
50void loadLongLitStreamState(const struct RoseLongLitTable *ll_table,
51 const u8 *ll_state, u32 *state_case,
52 u32 *state_nocase) {
53 assert(ll_table);
54 assert(ll_state);
55 assert(state_case && state_nocase);
56
57 u8 ss_bytes = ll_table->streamStateBytes;
58 u8 ssb = ll_table->caseful.streamStateBits;
59 UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits;
60 assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
61
62#if defined(ARCH_32_BIT)
63 // On 32-bit hosts, we may be able to avoid having to do any u64a
64 // manipulation at all.
65 if (ss_bytes <= 4) {
66 u32 ssb_mask = (1U << ssb) - 1;
67 u32 streamVal = partial_load_u32(ll_state, ss_bytes);
68 *state_case = (u32)(streamVal & ssb_mask);
69 *state_nocase = (u32)(streamVal >> ssb);
70 return;
71 }
72#endif
73
74 u64a ssb_mask = (1ULL << ssb) - 1;
75 u64a streamVal = partial_load_u64a(ll_state, ss_bytes);
76 *state_case = (u32)(streamVal & ssb_mask);
77 *state_nocase = (u32)(streamVal >> ssb);
78}
79
80static rose_inline
81void loadLongLiteralStateMode(struct hs_scratch *scratch,
82 const struct RoseLongLitTable *ll_table,
83 const struct RoseLongLitSubtable *ll_sub,
84 const u32 state, const char nocase) {
85 if (!state) {
86 DEBUG_PRINTF("no state for %s\n", nocase ? "caseless" : "caseful");
87 return;
88 }
89
90 const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub);
91 const struct RoseLongLitHashEntry *ent = tab + state - 1;
92
93 assert(ent->str_offset + ent->str_len <= ll_table->size);
94 const u8 *found_buf = (const u8 *)ll_table + ent->str_offset;
95 size_t found_sz = ent->str_len;
96
97 struct RoseContext *tctxt = &scratch->tctxt;
98 if (nocase) {
99 tctxt->ll_buf_nocase = found_buf;
100 tctxt->ll_len_nocase = found_sz;
101 } else {
102 tctxt->ll_buf = found_buf;
103 tctxt->ll_len = found_sz;
104 }
105}
106
107static rose_inline
108void loadLongLiteralState(const struct RoseEngine *t, char *state,
109 struct hs_scratch *scratch) {
110 if (!t->longLitTableOffset) {
111 return;
112 }
113
114 // If we don't have any long literals in play, these values must point to
115 // the real history buffer so that CHECK_LONG_LIT instructions examine the
116 // history buffer.
117 scratch->tctxt.ll_buf = scratch->core_info.hbuf;
118 scratch->tctxt.ll_len = scratch->core_info.hlen;
119 scratch->tctxt.ll_buf_nocase = scratch->core_info.hbuf;
120 scratch->tctxt.ll_len_nocase = scratch->core_info.hlen;
121
122 if (!scratch->core_info.hlen) {
123 return;
124 }
125
126 const struct RoseLongLitTable *ll_table =
127 getByOffset(t, t->longLitTableOffset);
128 const u8 *ll_state = getLongLitState(t, state);
129
130 u32 state_case;
131 u32 state_nocase;
132 loadLongLitStreamState(ll_table, ll_state, &state_case, &state_nocase);
133
134 DEBUG_PRINTF("loaded {%u, %u}\n", state_case, state_nocase);
135
136 loadLongLiteralStateMode(scratch, ll_table, &ll_table->caseful,
137 state_case, 0);
138 loadLongLiteralStateMode(scratch, ll_table, &ll_table->nocase,
139 state_nocase, 1);
140}
141
142static rose_inline
143char confirmLongLiteral(const struct RoseLongLitTable *ll_table,
144 const struct hs_scratch *scratch,
145 const struct RoseLongLitHashEntry *ent,
146 const char nocase) {
147 assert(ent->str_offset + ent->str_len <= ll_table->size);
148 const u8 *s = (const u8 *)ll_table + ent->str_offset;
149 size_t len = ent->str_len;
150 const u8 *buf = scratch->core_info.buf;
151 const size_t buf_len = scratch->core_info.len;
152
153 if (len > buf_len) {
154 const struct RoseContext *tctxt = &scratch->tctxt;
155 const u8 *hist = nocase ? tctxt->ll_buf_nocase : tctxt->ll_buf;
156 size_t hist_len = nocase ? tctxt->ll_len_nocase : tctxt->ll_len;
157
158 if (len > buf_len + hist_len) {
159 return 0; // Break out - not enough total history
160 }
161
162 size_t overhang = len - buf_len;
163 assert(overhang <= hist_len);
164
165 if (cmpForward(hist + hist_len - overhang, s, overhang, nocase)) {
166 return 0;
167 }
168 s += overhang;
169 len -= overhang;
170 }
171
172 // if we got here, we don't need history or we compared ok out of history
173 assert(len <= buf_len);
174
175 if (cmpForward(buf + buf_len - len, s, len, nocase)) {
176 return 0;
177 }
178
179 return 1;
180}
181
182static rose_inline
183const u8 *prepScanBuffer(const struct core_info *ci,
184 const struct RoseLongLitTable *ll_table, u8 *tempbuf) {
185 const u8 hash_len = ll_table->maxLen;
186 assert(hash_len >= LONG_LIT_HASH_LEN);
187
188 // Our hash function operates over LONG_LIT_HASH_LEN bytes, starting from
189 // location (end of buffer - hash_len). If this block can be satisfied
190 // entirely from either the current buffer or the history buffer, we pass
191 // in the pointer directly; otherwise we must make a copy.
192
193 const u8 *base;
194
195 if (hash_len > ci->len) {
196 size_t overhang = hash_len - ci->len;
197 if (overhang >= LONG_LIT_HASH_LEN) {
198 // Can read enough to hash from inside the history buffer.
199 assert(overhang <= ci->hlen);
200 base = ci->hbuf + ci->hlen - overhang;
201 } else {
202 // Copy: first chunk from history buffer.
203 assert(overhang <= ci->hlen);
204 copy_upto_32_bytes(tempbuf, ci->hbuf + ci->hlen - overhang,
205 overhang);
206 // Copy: second chunk from current buffer.
207 size_t copy_buf_len = LONG_LIT_HASH_LEN - overhang;
208 assert(copy_buf_len <= ci->len);
209 copy_upto_32_bytes(tempbuf + overhang, ci->buf, copy_buf_len);
210 // Read from our temporary buffer for the hash.
211 base = tempbuf;
212 }
213 } else {
214 // Can read enough to hash from inside the current buffer.
215 base = ci->buf + ci->len - hash_len;
216 }
217
218 return base;
219}
220
221#ifndef NDEBUG
222// Defensive checking (used in assert) that these table values don't overflow
223// the range available.
224static really_inline
225char streamingTableOverflow(u32 state_case, u32 state_nocase, u8 ssb,
226 u8 ssb_nc) {
227 u32 ssb_mask = (1ULL << (ssb)) - 1;
228 if (state_case & ~ssb_mask) {
229 return 1;
230 }
231 u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1;
232 if (state_nocase & ~ssb_nc_mask) {
233 return 1;
234 }
235 return 0;
236}
237#endif
238
239// Reads from stream state table and packs values into stream state.
240static rose_inline
241void storeLongLitStreamState(const struct RoseLongLitTable *ll_table,
242 u8 *ll_state, u32 state_case, u32 state_nocase) {
243 assert(ll_table);
244 assert(ll_state);
245
246 u8 ss_bytes = ll_table->streamStateBytes;
247 u8 ssb = ll_table->caseful.streamStateBits;
248 UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits;
249 assert(ss_bytes == ROUNDUP_N(ssb + ssb_nc, 8) / 8);
250 assert(!streamingTableOverflow(state_case, state_nocase, ssb, ssb_nc));
251
252#if defined(ARCH_32_BIT)
253 // On 32-bit hosts, we may be able to avoid having to do any u64a
254 // manipulation at all.
255 if (ss_bytes <= 4) {
256 u32 stagingStreamState = state_case;
257 stagingStreamState |= (state_nocase << ssb);
258 partial_store_u32(ll_state, stagingStreamState, ss_bytes);
259 return;
260 }
261#endif
262
263 u64a stagingStreamState = (u64a)state_case;
264 stagingStreamState |= (u64a)state_nocase << ssb;
265 partial_store_u64a(ll_state, stagingStreamState, ss_bytes);
266}
267
268static really_inline
269char has_bit(const u8 *data, u32 bit) {
270 return (data[bit / 8] >> (bit % 8)) & 1;
271}
272
273static rose_inline
274char bloomHasKey(const u8 *bloom, u32 bloom_mask, u32 hash) {
275 return has_bit(bloom, hash & bloom_mask);
276}
277
278static rose_inline
279char checkBloomFilter(const struct RoseLongLitTable *ll_table,
280 const struct RoseLongLitSubtable *ll_sub,
281 const u8 *scan_buf, char nocase) {
282 assert(ll_sub->bloomBits);
283
284 const u8 *bloom = (const u8 *)ll_table + ll_sub->bloomOffset;
285 const u32 bloom_mask = (1U << ll_sub->bloomBits) - 1;
286
287 char v = 1;
288 v &= bloomHasKey(bloom, bloom_mask, bloomHash_1(scan_buf, nocase));
289 v &= bloomHasKey(bloom, bloom_mask, bloomHash_2(scan_buf, nocase));
290 v &= bloomHasKey(bloom, bloom_mask, bloomHash_3(scan_buf, nocase));
291 return v;
292}
293
294/**
295 * \brief Look for a hit in the hash table.
296 *
297 * Returns zero if not found, otherwise returns (bucket + 1).
298 */
299static rose_inline
300u32 checkHashTable(const struct RoseLongLitTable *ll_table,
301 const struct RoseLongLitSubtable *ll_sub, const u8 *scan_buf,
302 const struct hs_scratch *scratch, char nocase) {
303 const u32 nbits = ll_sub->hashBits;
304 assert(nbits && nbits < 32);
305 const u32 num_entries = 1U << nbits;
306
307 const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub);
308
309 u32 hash = hashLongLiteral(scan_buf, LONG_LIT_HASH_LEN, nocase);
310 u32 bucket = hash & ((1U << nbits) - 1);
311
312 while (tab[bucket].str_offset != 0) {
313 DEBUG_PRINTF("checking bucket %u\n", bucket);
314 if (confirmLongLiteral(ll_table, scratch, &tab[bucket], nocase)) {
315 DEBUG_PRINTF("found hit for bucket %u\n", bucket);
316 return bucket + 1;
317 }
318
319 if (++bucket == num_entries) {
320 bucket = 0;
321 }
322 }
323
324 return 0;
325}
326
327static rose_inline
328void storeLongLiteralState(const struct RoseEngine *t, char *state,
329 struct hs_scratch *scratch) {
330 if (!t->longLitTableOffset) {
331 DEBUG_PRINTF("no table\n");
332 return;
333 }
334
335 struct core_info *ci = &scratch->core_info;
336 const struct RoseLongLitTable *ll_table =
337 getByOffset(t, t->longLitTableOffset);
338 assert(ll_table->maxLen);
339
340 DEBUG_PRINTF("maxLen=%u, len=%zu, hlen=%zu\n", ll_table->maxLen, ci->len,
341 ci->hlen);
342
343 u32 state_case = 0;
344 u32 state_nocase = 0;
345
346 // If we don't have enough history, we don't need to do anything.
347 if (ll_table->maxLen <= ci->len + ci->hlen) {
348 u8 tempbuf[LONG_LIT_HASH_LEN];
349 const u8 *scan_buf = prepScanBuffer(ci, ll_table, tempbuf);
350
351 if (ll_table->caseful.hashBits &&
352 checkBloomFilter(ll_table, &ll_table->caseful, scan_buf, 0)) {
353 state_case = checkHashTable(ll_table, &ll_table->caseful, scan_buf,
354 scratch, 0);
355 }
356
357 if (ll_table->nocase.hashBits &&
358 checkBloomFilter(ll_table, &ll_table->nocase, scan_buf, 1)) {
359 state_nocase = checkHashTable(ll_table, &ll_table->nocase, scan_buf,
360 scratch, 1);
361 }
362 } else {
363 DEBUG_PRINTF("not enough history (%zu bytes)\n", ci->len + ci->hlen);
364 }
365
366 DEBUG_PRINTF("store {%u, %u}\n", state_case, state_nocase);
367
368 u8 *ll_state = getLongLitState(t, state);
369 storeLongLitStreamState(ll_table, ll_state, state_case, state_nocase);
370}
371
372#endif // STREAM_LONG_LIT_H
373