combine.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/combine.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_COMBINE_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_COMBINE_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16
17	namespace simdpp {
18	namespace SIMDPP_ARCH_NAMESPACE {
19	namespace detail {
20	namespace insn {
21
22	#if SIMDPP_USE_AVX2
23	template<class Dummy> SIMDPP_INL
24	uint8<`32`> i_combine(const uint8<`16`>& a, const uint8<`16`>& b)
25	{
26	uint8<`32`> r;
27	r = _mm256_castsi128_si256(a.native());
28	r = _mm256_inserti128_si256(r.native(), b.native(), `1`);
29	return r;
30	}
31	#endif
32
33	#if SIMDPP_USE_AVX512BW
34	template<class Dummy> SIMDPP_INL
35	uint8<`64`> i_combine(const uint8<`32`>& a, const uint8<`32`>& b)
36	{
37	uint8<`64`> r;
38	r = _mm512_castsi256_si512(a.native());
39	r = _mm512_inserti64x4(r.native(), b.native(), `1`);
40	return r;
41	}
42	#endif
43
44	// -----------------------------------------------------------------------------
45
46	#if SIMDPP_USE_AVX2
47	template<class Dummy> SIMDPP_INL
48	uint16<`16`> i_combine(const uint16<`8`>& a, const uint16<`8`>& b)
49	{
50	uint16<`16`> r;
51	r = _mm256_castsi128_si256(a.native());
52	r = _mm256_inserti128_si256(r.native(), b.native(), `1`);
53	return r;
54	}
55	#endif
56
57	#if SIMDPP_USE_AVX512BW
58	template<class Dummy> SIMDPP_INL
59	uint16<`32`> i_combine(const uint16<`16`>& a, const uint16<`16`>& b)
60	{
61	uint16<`32`> r;
62	r = _mm512_castsi256_si512(a.native());
63	r = _mm512_inserti64x4(r.native(), b.native(), `1`);
64	return r;
65	}
66	#endif
67
68	// -----------------------------------------------------------------------------
69
70	#if SIMDPP_USE_AVX2
71	template<class Dummy> SIMDPP_INL
72	uint32<`8`> i_combine(const uint32<`4`>& a, const uint32<`4`>& b)
73	{
74	uint32<`8`> r;
75	r = _mm256_castsi128_si256(a.native());
76	r = _mm256_inserti128_si256(r.native(), b.native(), `1`);
77	return r;
78	}
79	#endif
80
81	#if SIMDPP_USE_AVX512F
82	template<class Dummy> SIMDPP_INL
83	uint32<`16`> i_combine(const uint32<`8`>& a, const uint32<`8`>& b)
84	{
85	uint32<`16`> r;
86	r = _mm512_castsi256_si512(a.native());
87	r = _mm512_inserti64x4(r.native(), b.native(), `1`);
88	return r;
89	}
90	#endif
91
92	// -----------------------------------------------------------------------------
93
94	#if SIMDPP_USE_AVX2
95	template<class Dummy> SIMDPP_INL
96	uint64<`4`> i_combine(const uint64<`2`>& a, const uint64<`2`>& b)
97	{
98	uint64<`4`> r;
99	r = _mm256_castsi128_si256(a.native());
100	r = _mm256_inserti128_si256(r.native(), b.native(), `1`);
101	return r;
102	}
103	#endif
104
105	#if SIMDPP_USE_AVX512F
106	template<class Dummy> SIMDPP_INL
107	uint64<`8`> i_combine(const uint64<`4`>& a, const uint64<`4`>& b)
108	{
109	uint64<`8`> r;
110	r = _mm512_castsi256_si512(a.native());
111	r = _mm512_inserti64x4(r.native(), b.native(), `1`);
112	return r;
113	}
114	#endif
115
116	// -----------------------------------------------------------------------------
117
118	#if SIMDPP_USE_AVX
119	template<class Dummy> SIMDPP_INL
120	float32<`8`> i_combine(const float32<`4`>& a, const float32<`4`>& b)
121	{
122	float32<`8`> r;
123	r = _mm256_castps128_ps256(a.native());
124	r = _mm256_insertf128_ps(r.native(), b.native(), `1`);
125	return r;
126	}
127	#endif
128
129	#if SIMDPP_USE_AVX512F
130	template<class Dummy> SIMDPP_INL
131	float32<`16`> i_combine(const float32<`8`>& a, const float32<`8`>& b)
132	{
133	float32<`16`> r;
134	r = _mm512_castps256_ps512(a.native());
135	r = _mm512_castpd_ps(_mm512_insertf64x4(_mm512_castps_pd(r.native()),
136	_mm256_castps_pd(b.native()), `1`));
137	return r;
138	}
139	#endif
140
141	// -----------------------------------------------------------------------------
142
143	#if SIMDPP_USE_AVX
144	template<class Dummy> SIMDPP_INL
145	float64<`4`> i_combine(const float64<`2`>& a, const float64<`2`>& b)
146	{
147	float64<`4`> r;
148	r = _mm256_castpd128_pd256(a.native());
149	r = _mm256_insertf128_pd(r.native(), b.native(), `1`);
150	return r;
151	}
152	#endif
153
154	#if SIMDPP_USE_AVX512F
155	template<class Dummy> SIMDPP_INL
156	float64<`8`> i_combine(const float64<`4`>& a, const float64<`4`>& b)
157	{
158	float64<`8`> r;
159	r = _mm512_castpd256_pd512(a.native());
160	r = _mm512_insertf64x4(r.native(), b.native(), `1`);
161	return r;
162	}
163	#endif
164
165	// -----------------------------------------------------------------------------
166	// generic implementation
167	template<class V, class H> SIMDPP_INL
168	V i_combine(const H& a1, const H& a2)
169	{
170	V r;
171	unsigned h = H::vec_length;
172	for (unsigned i = `0`; i < h; ++i) { r.vec(i) = a1.vec(i); }
173	for (unsigned i = `0`; i < h; ++i) { r.vec(i+h) = a2.vec(i); }
174	return r;
175	}
176
177	} // namespace insn
178	} // namespace detail
179	} // namespace SIMDPP_ARCH_NAMESPACE
180	} // namespace simdpp
181
182	#endif
183

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/combine.h