i_mul_hi.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/i_mul_hi.h]

1	/ Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_MUL_HI_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_MUL_HI_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/i_mull.h>
17	#include <simdpp/core/unzip_hi.h>
18	#include <simdpp/detail/null/math.h>
19	#include <simdpp/detail/vector_array_macros.h>
20
21	namespace simdpp {
22	namespace SIMDPP_ARCH_NAMESPACE {
23	namespace detail {
24	namespace insn {
25
26	static SIMDPP_INL
27	int16<`8`> i_mul_hi(const int16<`8`>& a, const int16<`8`>& b)
28	{
29	#if SIMDPP_USE_NULL
30	uint16<`8`> r;
31	for (unsigned i = `0`; i < a.length; i++) {
32	r.el(i) = (int32_t(a.el(i)) * b.el(i)) >> `16`;
33	}
34	return r;
35	#elif SIMDPP_USE_SSE2
36	return _mm_mulhi_epi16(a.native(), b.native());
37	#elif SIMDPP_USE_NEON
38	int32x4 lo = vmull_s16(vget_low_s16(a.native()), vget_low_s16(b.native()));
39	int32x4 hi = vmull_s16(vget_high_s16(a.native()), vget_high_s16(b.native()));
40	return unzip8_hi(int16x8(lo), int16x8(hi));
41	#elif SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
42	#if SIMDPP_BIG_ENDIAN
43	int16<`16`> ab;
44	ab = mull(a, b);
45	return unzip8_lo(ab.vec(`0`), ab.vec(`1`));
46	#else
47	int16<`16`> ab;
48	ab = mull(a, b);
49	return unzip8_hi(ab.vec(`0`), ab.vec(`1`));
50	#endif
51	#endif
52	}
53
54	#if SIMDPP_USE_AVX2
55	static SIMDPP_INL
56	int16<`16`> i_mul_hi(const int16<`16`>& a, const int16<`16`>& b)
57	{
58	return _mm256_mulhi_epi16(a.native(), b.native());
59	}
60	#endif
61
62	#if SIMDPP_USE_AVX512BW
63	static SIMDPP_INL
64	int16<`32`> i_mul_hi(const int16<`32`>& a, const int16<`32`>& b)
65	{
66	return _mm512_mulhi_epi16(a.native(), b.native());
67	}
68	#endif
69
70	// -----------------------------------------------------------------------------
71
72	static SIMDPP_INL
73	uint16<`8`> i_mul_hi(const uint16<`8`>& a, const uint16<`8`>& b)
74	{
75	#if SIMDPP_USE_NULL
76	uint16<`8`> r;
77	for (unsigned i = `0`; i < a.length; i++) {
78	r.el(i) = (uint32_t(a.el(i)) * b.el(i)) >> `16`;
79	}
80	return r;
81	#elif SIMDPP_USE_SSE2
82	return _mm_mulhi_epu16(a.native(), b.native());
83	#elif SIMDPP_USE_NEON
84	uint32x4 lo = vmull_u16(vget_low_u16(a.native()), vget_low_u16(b.native()));
85	uint32x4 hi = vmull_u16(vget_high_u16(a.native()), vget_high_u16(b.native()));
86	return unzip8_hi(uint16x8(lo), uint16x8(hi));
87	#elif SIMDPP_USE_ALTIVEC && SIMDPP_BIG_ENDIAN
88	uint16<`16`> ab;
89	ab = mull(a, b);
90	return unzip8_lo(ab.vec(`0`), ab.vec(`1`));
91	#elif (SIMDPP_USE_ALTIVEC && SIMDPP_LITTLE_ENDIAN) \|\| SIMDPP_USE_MSA
92	uint16<`16`> ab;
93	ab = mull(a, b);
94	return unzip8_hi(ab.vec(`0`), ab.vec(`1`));
95	#endif
96	}
97
98	#if SIMDPP_USE_AVX2
99	static SIMDPP_INL
100	uint16<`16`> i_mul_hi(const uint16<`16`>& a, const uint16<`16`>& b)
101	{
102	return _mm256_mulhi_epu16(a.native(), b.native());
103	}
104	#endif
105
106	#if SIMDPP_USE_AVX512BW
107	static SIMDPP_INL
108	uint16<`32`> i_mul_hi(const uint16<`32`>& a, const uint16<`32`>& b)
109	{
110	return _mm512_mulhi_epu16(a.native(), b.native());
111	}
112	#endif
113
114	// -----------------------------------------------------------------------------
115
116	template<class V> SIMDPP_INL
117	V i_mul_hi(const V& a, const V& b)
118	{
119	SIMDPP_VEC_ARRAY_IMPL2(V, i_mul_hi, a, b)
120	}
121
122	} // namespace insn
123	} // namespace detail
124	} // namespace SIMDPP_ARCH_NAMESPACE
125	} // namespace simdpp
126
127	#endif
128
129

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/i_mul_hi.h