limex_shuffle.h source code [ClickHouse/contrib/hyperscan/src/nfa/limex_shuffle.h]

1	/*
2	* Copyright (c) 2015-2017, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/* \file*
30	* \brief Naive dynamic shuffles.
31	*
32	* These are written with the assumption that the provided masks are sparsely
33	* populated and never contain more than 32 on bits. Other implementations will
34	* be faster and actually correct if these assumptions don't hold true.
35	*/
36
37	#ifndef LIMEX_SHUFFLE_H
38	#define LIMEX_SHUFFLE_H
39
40	#include "ue2common.h"
41	#include "util/arch.h"
42	#include "util/bitutils.h"
43	#include "util/simd_utils.h"
44
45	static really_inline
46	u32 packedExtract128(m128 s, const m128 permute, const m128 compare) {
47	m128 shuffled = pshufb_m128(s, permute);
48	m128 compared = and128(shuffled, compare);
49	u16 rv = ~movemask128(eq128(compared, shuffled));
50	return (u32)rv;
51	}
52
53	#if defined(HAVE_AVX2)
54	static really_inline
55	u32 packedExtract256(m256 s, const m256 permute, const m256 compare) {
56	// vpshufb doesn't cross lanes, so this is a bit of a cheat
57	m256 shuffled = pshufb_m256(s, permute);
58	m256 compared = and256(shuffled, compare);
59	u32 rv = ~movemask256(eq256(compared, shuffled));
60	// stitch the lane-wise results back together
61	return (u32)((rv >> `16`) \| (rv & `0xffffU`));
62	}
63	#endif // AVX2
64
65	#if defined(HAVE_AVX512)
66	static really_inline
67	u32 packedExtract512(m512 s, const m512 permute, const m512 compare) {
68	// vpshufb doesn't cross lanes, so this is a bit of a cheat
69	m512 shuffled = pshufb_m512(s, permute);
70	m512 compared = and512(shuffled, compare);
71	u64a rv = ~eq512mask(compared, shuffled);
72	// stitch the lane-wise results back together
73	rv = rv >> `32` \| rv;
74	return (u32)(((rv >> `16`) \| rv) & `0xffffU`);
75	}
76	#endif // AVX512
77
78	#endif // LIMEX_SHUFFLE_H
79

Browse the source code of ClickHouse/contrib/hyperscan/src/nfa/limex_shuffle.h