uniform_ops.h source code [ClickHouse/contrib/hyperscan/src/util/uniform_ops.h]

1	/*
2	* Copyright (c) 2015-2016, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/* \file*
30	* \brief Uniformly-named primitives named by target type.
31	*
32	* The following are a set of primitives named by target type, so that we can
33	* macro the hell out of all our NFA implementations. Hurrah!
34	*/
35
36	#ifndef UNIFORM_OPS_H
37	#define UNIFORM_OPS_H
38
39	#include "ue2common.h"
40	#include "simd_utils.h"
41	#include "unaligned.h"
42
43	// Aligned loads
44	#define load_u8(a) ((const u8 )(a))
45	#define load_u16(a) ((const u16 )(a))
46	#define load_u32(a) ((const u32 )(a))
47	#define load_u64a(a) ((const u64a )(a))
48	#define load_m128(a) load128(a)
49	#define load_m256(a) load256(a)
50	#define load_m384(a) load384(a)
51	#define load_m512(a) load512(a)
52
53	// Unaligned loads
54	#define loadu_u8(a) ((const u8 )(a))
55	#define loadu_u16(a) unaligned_load_u16((const u8 *)(a))
56	#define loadu_u32(a) unaligned_load_u32((const u8 *)(a))
57	#define loadu_u64a(a) unaligned_load_u64a((const u8 *)(a))
58	#define loadu_m128(a) loadu128(a)
59	#define loadu_m256(a) loadu256(a)
60	#define loadu_m384(a) loadu384(a)
61	#define loadu_m512(a) loadu512(a)
62
63	// Aligned stores
64	#define store_u8(ptr, a) do { (u8 )(ptr) = (a); } while(0)
65	#define store_u16(ptr, a) do { (u16 )(ptr) = (a); } while(0)
66	#define store_u32(ptr, a) do { (u32 )(ptr) = (a); } while(0)
67	#define store_u64a(ptr, a) do { (u64a )(ptr) = (a); } while(0)
68	#define store_m128(ptr, a) store128(ptr, a)
69	#define store_m256(ptr, a) store256(ptr, a)
70	#define store_m384(ptr, a) store384(ptr, a)
71	#define store_m512(ptr, a) store512(ptr, a)
72
73	// Unaligned stores
74	#define storeu_u8(ptr, a) do { (u8 )(ptr) = (a); } while(0)
75	#define storeu_u16(ptr, a) unaligned_store_u16(ptr, a)
76	#define storeu_u32(ptr, a) unaligned_store_u32(ptr, a)
77	#define storeu_u64a(ptr, a) unaligned_store_u64a(ptr, a)
78	#define storeu_m128(ptr, a) storeu128(ptr, a)
79
80	#define zero_u8 0
81	#define zero_u32 0
82	#define zero_u64a 0
83	#define zero_m128 zeroes128()
84	#define zero_m256 zeroes256()
85	#define zero_m384 zeroes384()
86	#define zero_m512 zeroes512()
87
88	#define ones_u8 0xff
89	#define ones_u32 0xfffffffful
90	#define ones_u64a 0xffffffffffffffffull
91	#define ones_m128 ones128()
92	#define ones_m256 ones256()
93	#define ones_m384 ones384()
94	#define ones_m512 ones512()
95
96	#define or_u8(a, b) ((a) \| (b))
97	#define or_u32(a, b) ((a) \| (b))
98	#define or_u64a(a, b) ((a) \| (b))
99	#define or_m128(a, b) (or128(a, b))
100	#define or_m256(a, b) (or256(a, b))
101	#define or_m384(a, b) (or384(a, b))
102	#define or_m512(a, b) (or512(a, b))
103
104	#define and_u8(a, b) ((a) & (b))
105	#define and_u32(a, b) ((a) & (b))
106	#define and_u64a(a, b) ((a) & (b))
107	#define and_m128(a, b) (and128(a, b))
108	#define and_m256(a, b) (and256(a, b))
109	#define and_m384(a, b) (and384(a, b))
110	#define and_m512(a, b) (and512(a, b))
111
112	#define not_u8(a) (~(a))
113	#define not_u32(a) (~(a))
114	#define not_u64a(a) (~(a))
115	#define not_m128(a) (not128(a))
116	#define not_m256(a) (not256(a))
117	#define not_m384(a) (not384(a))
118	#define not_m512(a) (not512(a))
119
120	#define andnot_u8(a, b) ((~(a)) & (b))
121	#define andnot_u32(a, b) ((~(a)) & (b))
122	#define andnot_u64a(a, b) ((~(a)) & (b))
123	#define andnot_m128(a, b) (andnot128(a, b))
124	#define andnot_m256(a, b) (andnot256(a, b))
125	#define andnot_m384(a, b) (andnot384(a, b))
126	#define andnot_m512(a, b) (andnot512(a, b))
127
128	#define lshift_u32(a, b) ((a) << (b))
129	#define lshift_u64a(a, b) ((a) << (b))
130	#define lshift_m128(a, b) (lshift64_m128(a, b))
131	#define lshift_m256(a, b) (lshift64_m256(a, b))
132	#define lshift_m384(a, b) (lshift64_m384(a, b))
133	#define lshift_m512(a, b) (lshift64_m512(a, b))
134
135	#define isZero_u8(a) ((a) == 0)
136	#define isZero_u32(a) ((a) == 0)
137	#define isZero_u64a(a) ((a) == 0)
138	#define isZero_m128(a) (!isnonzero128(a))
139	#define isZero_m256(a) (!isnonzero256(a))
140	#define isZero_m384(a) (!isnonzero384(a))
141	#define isZero_m512(a) (!isnonzero512(a))
142
143	#define isNonZero_u8(a) ((a) != 0)
144	#define isNonZero_u32(a) ((a) != 0)
145	#define isNonZero_u64a(a) ((a) != 0)
146	#define isNonZero_m128(a) (isnonzero128(a))
147	#define isNonZero_m256(a) (isnonzero256(a))
148	#define isNonZero_m384(a) (isnonzero384(a))
149	#define isNonZero_m512(a) (isnonzero512(a))
150
151	#define diffrich_u32(a, b) ((a) != (b))
152	#define diffrich_u64a(a, b) ((a) != (b) ? 3 : 0) //TODO: impl 32bit granularity
153	#define diffrich_m128(a, b) (diffrich128(a, b))
154	#define diffrich_m256(a, b) (diffrich256(a, b))
155	#define diffrich_m384(a, b) (diffrich384(a, b))
156	#define diffrich_m512(a, b) (diffrich512(a, b))
157
158	#define diffrich64_u32(a, b) ((a) != (b))
159	#define diffrich64_u64a(a, b) ((a) != (b) ? 1 : 0)
160	#define diffrich64_m128(a, b) (diffrich64_128(a, b))
161	#define diffrich64_m256(a, b) (diffrich64_256(a, b))
162	#define diffrich64_m384(a, b) (diffrich64_384(a, b))
163	#define diffrich64_m512(a, b) (diffrich64_512(a, b))
164
165	#define noteq_u8(a, b) ((a) != (b))
166	#define noteq_u32(a, b) ((a) != (b))
167	#define noteq_u64a(a, b) ((a) != (b))
168	#define noteq_m128(a, b) (diff128(a, b))
169	#define noteq_m256(a, b) (diff256(a, b))
170	#define noteq_m384(a, b) (diff384(a, b))
171	#define noteq_m512(a, b) (diff512(a, b))
172
173	#define partial_store_m128(ptr, v, sz) storebytes128(ptr, v, sz)
174	#define partial_store_m256(ptr, v, sz) storebytes256(ptr, v, sz)
175	#define partial_store_m384(ptr, v, sz) storebytes384(ptr, v, sz)
176	#define partial_store_m512(ptr, v, sz) storebytes512(ptr, v, sz)
177
178	#define partial_load_m128(ptr, sz) loadbytes128(ptr, sz)
179	#define partial_load_m256(ptr, sz) loadbytes256(ptr, sz)
180	#define partial_load_m384(ptr, sz) loadbytes384(ptr, sz)
181	#define partial_load_m512(ptr, sz) loadbytes512(ptr, sz)
182
183	#define store_compressed_u32(ptr, x, m, len) storecompressed32(ptr, x, m, len)
184	#define store_compressed_u64a(ptr, x, m, len) storecompressed64(ptr, x, m, len)
185	#define store_compressed_m128(ptr, x, m, len) storecompressed128(ptr, x, m, len)
186	#define store_compressed_m256(ptr, x, m, len) storecompressed256(ptr, x, m, len)
187	#define store_compressed_m384(ptr, x, m, len) storecompressed384(ptr, x, m, len)
188	#define store_compressed_m512(ptr, x, m, len) storecompressed512(ptr, x, m, len)
189
190	#define load_compressed_u32(x, ptr, m, len) loadcompressed32(x, ptr, m, len)
191	#define load_compressed_u64a(x, ptr, m, len) loadcompressed64(x, ptr, m, len)
192	#define load_compressed_m128(x, ptr, m, len) loadcompressed128(x, ptr, m, len)
193	#define load_compressed_m256(x, ptr, m, len) loadcompressed256(x, ptr, m, len)
194	#define load_compressed_m384(x, ptr, m, len) loadcompressed384(x, ptr, m, len)
195	#define load_compressed_m512(x, ptr, m, len) loadcompressed512(x, ptr, m, len)
196
197	static really_inline
198	void clearbit_u32(u32 *p, u32 n) {
199	assert(n < sizeof(p) `8`);
200	*p &= ~(`1U` << n);
201	}
202
203	static really_inline
204	void clearbit_u64a(u64a *p, u32 n) {
205	assert(n < sizeof(p) `8`);
206	*p &= ~(`1ULL` << n);
207	}
208
209	#define clearbit_m128(ptr, n) (clearbit128(ptr, n))
210	#define clearbit_m256(ptr, n) (clearbit256(ptr, n))
211	#define clearbit_m384(ptr, n) (clearbit384(ptr, n))
212	#define clearbit_m512(ptr, n) (clearbit512(ptr, n))
213
214	static really_inline
215	char testbit_u32(u32 val, u32 n) {
216	assert(n < sizeof(val) * `8`);
217	return !!(val & (`1U` << n));
218	}
219
220	static really_inline
221	char testbit_u64a(u64a val, u32 n) {
222	assert(n < sizeof(val) * `8`);
223	return !!(val & (`1ULL` << n));
224	}
225
226	#define testbit_m128(val, n) (testbit128(val, n))
227	#define testbit_m256(val, n) (testbit256(val, n))
228	#define testbit_m384(val, n) (testbit384(val, n))
229	#define testbit_m512(val, n) (testbit512(val, n))
230
231	#endif
232

Browse the source code of ClickHouse/contrib/hyperscan/src/util/uniform_ops.h