1/*
2 * Copyright (c) 2016, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#ifndef STREAM_LONG_LIT_HASH_H
30#define STREAM_LONG_LIT_HASH_H
31
32#include "ue2common.h"
33#include "util/bitutils.h"
34#include "util/unaligned.h"
35
36/** \brief Length of the buffer operated on by \ref hashLongLiteral(). */
37#define LONG_LIT_HASH_LEN 24
38
39/** \brief Multiplier used by al the hash functions below. */
40#define HASH_MULTIPLIER 0x0b4e0ef37bc32127ULL
41
42/** \brief Hash function used for long literal table in streaming mode. */
43static really_inline
44u32 hashLongLiteral(const u8 *ptr, UNUSED size_t len, char nocase) {
45 // We unconditionally hash LONG_LIT_HASH_LEN bytes; all use cases of this
46 // hash are for strings longer than this.
47 assert(len >= 24);
48
49 u64a v1 = unaligned_load_u64a(ptr);
50 u64a v2 = unaligned_load_u64a(ptr + 8);
51 u64a v3 = unaligned_load_u64a(ptr + 16);
52 if (nocase) {
53 v1 &= OCTO_CASE_CLEAR;
54 v2 &= OCTO_CASE_CLEAR;
55 v3 &= OCTO_CASE_CLEAR;
56 }
57 v1 *= HASH_MULTIPLIER;
58 v2 *= HASH_MULTIPLIER * HASH_MULTIPLIER;
59 v3 *= HASH_MULTIPLIER * HASH_MULTIPLIER * HASH_MULTIPLIER;
60 v1 >>= 32;
61 v2 >>= 32;
62 v3 >>= 32;
63 return v1 ^ v2 ^ v3;
64}
65
66/**
67 * \brief Internal, used by the bloom filter hash functions below. Hashes 16
68 * bytes beginning at (ptr + offset).
69 */
70static really_inline
71u32 bloomHash_i(const u8 *ptr, u32 offset, u64a multiplier, char nocase) {
72 assert(offset + 16 <= LONG_LIT_HASH_LEN);
73
74 u64a v = unaligned_load_u64a(ptr + offset);
75 if (nocase) {
76 v &= OCTO_CASE_CLEAR;
77 }
78 v *= multiplier;
79 return v >> 32;
80}
81
82/*
83 * We ensure that we see every byte the first LONG_LIT_HASH_LEN bytes of input
84 * data (using at least one of the following functions).
85 */
86
87static really_inline
88u32 bloomHash_1(const u8 *ptr, char nocase) {
89 const u64a multiplier = HASH_MULTIPLIER;
90 return bloomHash_i(ptr, 0, multiplier, nocase);
91}
92
93static really_inline
94u32 bloomHash_2(const u8 *ptr, char nocase) {
95 const u64a multiplier = HASH_MULTIPLIER * HASH_MULTIPLIER;
96 return bloomHash_i(ptr, 4, multiplier, nocase);
97}
98
99static really_inline
100u32 bloomHash_3(const u8 *ptr, char nocase) {
101 const u64a multiplier = HASH_MULTIPLIER * HASH_MULTIPLIER * HASH_MULTIPLIER;
102 return bloomHash_i(ptr, 8, multiplier, nocase);
103}
104
105#endif // STREAM_LONG_LIT_HASH_H
106