masked_move.h source code [ClickHouse/contrib/hyperscan/src/util/masked_move.h]

1	/*
2	* Copyright (c) 2015-2017, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	#ifndef MASKED_MOVE_H
30	#define MASKED_MOVE_H
31
32	#include "arch.h"
33
34	#if defined(HAVE_AVX2)
35
36	#include "unaligned.h"
37	#include "simd_utils.h"
38
39	#ifdef __cplusplus
40	extern "C" {
41	#endif
42	extern const u32 mm_mask_mask[`16`];
43	extern const u32 mm_shuffle_end[`32`][`8`];
44	#ifdef __cplusplus
45	}
46	#endif
47
48	/ load mask for len bytes from start of buffer /
49	static really_inline m256
50	_get_mm_mask_end(u32 len) {
51	assert(len <= `32`);
52	const u8 masky = (const* u8 *)mm_mask_mask;
53	m256 mask = load256(masky + `32`);
54	mask = _mm256_sll_epi32(mask, _mm_cvtsi32_si128(`8` - (len >> `2`)));
55	return mask;
56	}
57
58	/*
59	* masked_move256_len: Will load len bytes from *buf into m256
60	* _______________________________
61	* \|0<----len---->\| 32\|
62	* -------------------------------
63	*/
64	static really_inline m256
65	masked_move256_len(const u8 buf, const* u32 len) {
66	assert(len >= `4`);
67
68	m256 lmask = _get_mm_mask_end(len);
69
70	u32 end = unaligned_load_u32(buf + len - `4`);
71	m256 preshufend = _mm256_broadcastq_epi64(_mm_cvtsi32_si128(end));
72	m256 v = _mm256_maskload_epi32((const int *)buf, lmask);
73	m256 shufend = pshufb_m256(preshufend,
74	loadu256(&mm_shuffle_end[len - `4`]));
75	m256 target = or256(v, shufend);
76
77	return target;
78	}
79
80	#endif /* AVX2 */
81	#endif /* MASKED_MOVE_H */
82
83

Browse the source code of ClickHouse/contrib/hyperscan/src/util/masked_move.h