1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/* noodle scan parts for SSE */
30
31static really_inline m128 getMask(u8 c, bool noCase) {
32 u8 k = caseClear8(c, noCase);
33 return set16x8(k);
34}
35
36static really_inline m128 getCaseMask(void) {
37 return set16x8(0xdf);
38}
39
40static really_inline
41hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf,
42 size_t len, bool noCase, m128 caseMask, m128 mask1,
43 const struct cb_info *cbi, size_t start,
44 size_t end) {
45 const u8 *d = buf + start;
46 size_t l = end - start;
47 DEBUG_PRINTF("l %zu\n", l);
48 assert(l <= 16);
49 if (!l) {
50 return HWLM_SUCCESS;
51 }
52 m128 v = zeroes128();
53 // we don't have a clever way of doing this move yet
54 memcpy(&v, d, l);
55 if (noCase) {
56 v = and128(v, caseMask);
57 }
58
59 // mask out where we can't match
60 u32 mask = (0xFFFF >> (16 - l));
61
62 u32 z = mask & movemask128(eq128(mask1, v));
63
64 SINGLE_ZSCAN();
65
66 return HWLM_SUCCESS;
67}
68
69static really_inline
70hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf,
71 size_t len, size_t offset, bool noCase,
72 m128 caseMask, m128 mask1,
73 const struct cb_info *cbi, size_t start,
74 size_t end) {
75 const u8 *d = buf + offset;
76 DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
77 const size_t l = end - start;
78
79 m128 v = loadu128(d);
80
81 if (noCase) {
82 v = and128(v, caseMask);
83 }
84
85 u32 buf_off = start - offset;
86 u32 mask = ((1 << l) - 1) << buf_off;
87
88 u32 z = mask & movemask128(eq128(mask1, v));
89
90 DEBUG_PRINTF("mask 0x%08x z 0x%08x\n", mask, z);
91
92 z &= mask;
93
94 SINGLE_ZSCAN();
95
96 return HWLM_SUCCESS;
97}
98
99static really_inline
100hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf,
101 size_t len, bool noCase, m128 caseMask, m128 mask1,
102 m128 mask2, const struct cb_info *cbi,
103 size_t start, size_t end) {
104 const u8 *d = buf + start;
105 size_t l = end - start;
106 if (!l) {
107 return HWLM_SUCCESS;
108 }
109 assert(l <= 32);
110
111 DEBUG_PRINTF("d %zu\n", d - buf);
112 m128 v = zeroes128();
113 memcpy(&v, d, l);
114 if (noCase) {
115 v = and128(v, caseMask);
116 }
117
118 u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1),
119 eq128(mask2, v)));
120
121 // mask out where we can't match
122 u32 mask = (0xFFFF >> (16 - l));
123 z &= mask;
124
125 DOUBLE_ZSCAN();
126
127 return HWLM_SUCCESS;
128}
129
130static really_inline
131hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf,
132 size_t len, size_t offset, bool noCase,
133 m128 caseMask, m128 mask1, m128 mask2,
134 const struct cb_info *cbi, size_t start,
135 size_t end) {
136 const u8 *d = buf + offset;
137 DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
138 size_t l = end - start;
139
140 m128 v = loadu128(d);
141
142 if (noCase) {
143 v = and128(v, caseMask);
144 }
145
146 u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1),
147 eq128(mask2, v)));
148
149 // mask out where we can't match
150 u32 buf_off = start - offset;
151 u32 mask = ((1 << l) - 1) << buf_off;
152 DEBUG_PRINTF("mask 0x%08x z 0x%08x\n", mask, z);
153 z &= mask;
154
155 DOUBLE_ZSCAN();
156
157 return HWLM_SUCCESS;
158}
159
160static really_inline
161hwlm_error_t scanSingleFast(const struct noodTable *n, const u8 *buf,
162 size_t len, bool noCase, m128 caseMask, m128 mask1,
163 const struct cb_info *cbi, size_t start,
164 size_t end) {
165 const u8 *d = buf + start, *e = buf + end;
166 assert(d < e);
167
168 for (; d < e; d += 16) {
169 m128 v = noCase ? and128(load128(d), caseMask) : load128(d);
170
171 u32 z = movemask128(eq128(mask1, v));
172
173 // On large packet buffers, this prefetch appears to get us about 2%.
174 __builtin_prefetch(d + 128);
175
176 SINGLE_ZSCAN();
177 }
178 return HWLM_SUCCESS;
179}
180
181static really_inline
182hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf,
183 size_t len, bool noCase, m128 caseMask, m128 mask1,
184 m128 mask2, const struct cb_info *cbi, size_t start,
185 size_t end) {
186 const u8 *d = buf + start, *e = buf + end;
187 assert(d < e);
188 m128 lastz1 = zeroes128();
189
190 for (; d < e; d += 16) {
191 m128 v = noCase ? and128(load128(d), caseMask) : load128(d);
192 m128 z1 = eq128(mask1, v);
193 m128 z2 = eq128(mask2, v);
194 u32 z = movemask128(and128(palignr(z1, lastz1, 15), z2));
195 lastz1 = z1;
196
197 // On large packet buffers, this prefetch appears to get us about 2%.
198 __builtin_prefetch(d + 128);
199 DEBUG_PRINTF("z 0x%08x\n", z);
200 DOUBLE_ZSCAN();
201 }
202 return HWLM_SUCCESS;
203}
204