extract_bits.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/extract_bits.h]

1	/ Copyright (C) 2011-2017 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_EXTRACT_BITS_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_EXTRACT_BITS_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/bit_and.h>
17	#include <simdpp/core/bit_or.h>
18	#include <simdpp/core/extract.h>
19	#include <simdpp/core/i_shift_l.h>
20	#include <simdpp/core/i_sub.h>
21	#include <simdpp/core/make_uint.h>
22	#include <simdpp/core/move_l.h>
23
24	namespace simdpp {
25	namespace SIMDPP_ARCH_NAMESPACE {
26	namespace detail {
27	namespace insn {
28
29	SIMDPP_INL uint16_t i_extract_bits_any(const uint8<`16`>& ca)
30	{
31	uint8<`16`> a = ca;
32	#if SIMDPP_USE_NULL
33	uint16_t r = `0`;
34	for (unsigned i = `0`; i < a.length; i++) {
35	uint8_t x = ca.el(i);
36	x = x & `1`;
37	r = (r >> `1`) \| (uint16_t(x) << `15`);
38	}
39	return r;
40	#elif SIMDPP_USE_SSE2
41	// Note that i_extract_bits depends on the exact implementation of this
42	// function.
43	return _mm_movemask_epi8(a.native());
44	#elif SIMDPP_USE_NEON
45	uint8x16 mask = make_uint(`0x01`,`0x02`,`0x04`,`0x08`,`0x10`,`0x20`,`0x40`,`0x80`);
46
47	a = bit_and(a, mask);
48	uint16<`8`> a16 = vpaddlq_u8(a.native());
49	uint32<`4`> a32 = vpaddlq_u16(a16.native());
50	uint8<`16`> a8 = vreinterpretq_u8_u64(vpaddlq_u32(a32.native()));
51	uint8x8_t r = vzip_u8(vget_low_u8(a8.native()), vget_high_u8(a8.native())).val[`0`];
52	return vget_lane_u16(vreinterpret_u16_u8(r), `0`);
53	#elif SIMDPP_USE_ALTIVEC
54	uint8x16 mask = make_uint(`0x01`,`0x02`,`0x04`,`0x08`,`0x10`,`0x20`,`0x40`,`0x80`);
55	a = bit_and(a, mask);
56	uint32<`4`> zero = make_zero();
57	uint32x4 s = vec_sum4s(a.native(), zero.native());
58	uint32x4 shifts = make_uint(`0`, `0`, `8`, `8`);
59	s = (__vector uint32_t) vec_sl(s.native(), shifts.native());
60	s = (int32x4)vec_sums((__vector int32_t)s.native(),
61	(__vector int32_t)zero.native());
62	#if SIMDPP_BIG_ENDIAN
63	return extract<`7`>(uint16x8(s));
64	#else
65	return extract<`6`>(uint16x8(s));
66	#endif
67	#elif SIMDPP_USE_MSA
68	// Note: the implementation of extract_bits depends of the exact behavior
69	// of this function
70	uint8x16 mask = make_uint(`0x01`,`0x02`,`0x04`,`0x08`,`0x10`,`0x20`,`0x40`,`0x80`);
71
72	a = bit_and(a, mask);
73	uint16<`8`> a16 = __msa_hadd_u_h(a.native(), a.native());
74	uint32<`4`> a32 = __msa_hadd_u_w(a16.native(), a16.native());
75	a = (v16u8) __msa_hadd_u_d(a32.native(), a32.native());
76	a = bit_or(a, move16_l<`7`>(a));
77	return extract<`0`>((uint16<`8`>)a);
78	#endif
79	}
80
81	SIMDPP_INL uint32_t i_extract_bits_any(const uint8<`32`>& ca)
82	{
83	uint8<`32`> a = ca;
84	#if SIMDPP_USE_AVX2
85	return _mm256_movemask_epi8(a.native());
86	#else
87	uint8<`16`> lo, hi;
88	split(a, lo, hi);
89	return i_extract_bits_any(lo) \| (i_extract_bits_any(hi) << `16`);
90	#endif
91	}
92
93	template<unsigned id> SIMDPP_INL
94	uint16_t i_extract_bits(const uint8<`16`>& ca)
95	{
96	uint8<`16`> a = ca;
97	#if SIMDPP_USE_NULL
98	uint16_t r = `0`;
99	for (unsigned i = `0`; i < a.length; i++) {
100	uint8_t x = ca.el(i);
101	x = (x >> id) & `1`;
102	r = (r >> `1`) \| (uint16_t(x) << `15`);
103	}
104	return r;
105	#elif SIMDPP_USE_SSE2
106	a = shift_l<`7`-id>((uint16x8) a);
107	return i_extract_bits_any(a);
108	#elif SIMDPP_USE_NEON
109	int8x16 shift_mask = make_int(`0`-int(id), `1`-int(id), `2`-int(id), `3`-int(id),
110	`4`-int(id), `5`-int(id), `6`-int(id), `7`-int(id));
111
112	a = vshlq_u8(a.native(), shift_mask.native());
113	return i_extract_bits_any(a);
114	#elif SIMDPP_USE_ALTIVEC
115	uint8x16 rot_mask = make_int(`0`-int(id), `1`-int(id), `2`-int(id), `3`-int(id),
116	`4`-int(id), `5`-int(id), `6`-int(id), `7`-int(id));
117	a = vec_rl(a.native(), rot_mask.native());
118	return i_extract_bits_any(a);
119	#elif SIMDPP_USE_MSA
120	int8x16 shifts = make_int(`0`-int(id), `1`-int(id), `2`-int(id), `3`-int(id),
121	`4`-int(id), `5`-int(id), `6`-int(id), `7`-int(id));
122	uint8<`16`> a_l = (v16u8) __msa_sll_b((v16i8) a.native(), shifts.native());
123	shifts = sub((int8<`16`>) make_zero(), shifts);
124	uint8<`16`> a_r = (v16u8) __msa_srl_b((v16i8) a.native(), shifts.native());
125	a = bit_or(a_l, a_r);
126	return i_extract_bits_any(a);
127	#endif
128	}
129
130	template<unsigned id> SIMDPP_INL
131	uint32_t i_extract_bits(const uint8<`32`>& ca)
132	{
133	uint8<`32`> a = ca;
134	#if SIMDPP_USE_AVX2
135	a = shift_l<`7`-id>((uint16<`16`>) a);
136	return i_extract_bits_any(a);
137	#else
138	uint8<`16`> lo, hi;
139	split(a, lo, hi);
140	return i_extract_bits<id>(lo) \| (i_extract_bits<id>(hi) << `16`);
141	#endif
142	}
143
144	} // namespace insn
145	} // namespace detail
146	} // namespace SIMDPP_ARCH_NAMESPACE
147	} // namespace simdpp
148
149	#endif
150
151
152

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/extract_bits.h