1 | /* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_CORE_I_MUL_H |
9 | #define LIBSIMDPP_SIMDPP_CORE_I_MUL_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/detail/expr/i_mul.h> |
17 | #include <simdpp/core/detail/get_expr_uint.h> |
18 | #include <simdpp/core/detail/scalar_arg_impl.h> |
19 | #include <simdpp/core/detail/get_expr_uint.h> |
20 | |
21 | namespace simdpp { |
22 | namespace SIMDPP_ARCH_NAMESPACE { |
23 | |
24 | // no 8 bit multiplications in SSE |
25 | /** Multiplies 16-bit values and returns the lower part of the multiplication |
26 | |
27 | @code |
28 | r0 = low(a0 * b0) |
29 | ... |
30 | rN = low(aN * bN) |
31 | @endcode |
32 | |
33 | @par 256-bit version: |
34 | @icost{SSE2-AVX, NEON, ALTIVEC, 2} |
35 | */ |
36 | template<unsigned N, class V1, class V2> SIMDPP_INL |
37 | typename detail::get_expr_uint<expr_mul_lo, V1, V2>::type |
38 | mul_lo(const any_int16<N,V1>& a, |
39 | const any_int16<N,V2>& b) |
40 | { |
41 | return { { a.wrapped(), b.wrapped() } }; |
42 | } |
43 | |
44 | SIMDPP_SCALAR_ARG_IMPL_INT_UNSIGNED(mul_lo, expr_mul_lo, any_int16, int16) |
45 | |
46 | /** Multiplies signed 16-bit values and returns the higher half of the result. |
47 | |
48 | @code |
49 | r0 = high(a0 * b0) |
50 | ... |
51 | rN = high(aN * bN) |
52 | @endcode |
53 | |
54 | @par 128-bit version: |
55 | @icost{NEON, ALTIVEC, 3} |
56 | |
57 | @par 256-bit version: |
58 | @icost{SSE2-AVX, 2} |
59 | @icost{NEON, ALTIVEC, 6} |
60 | */ |
61 | template<unsigned N, class E1, class E2> SIMDPP_INL |
62 | int16<N, expr_mul_hi<int16<N,E1>, |
63 | int16<N,E2>>> mul_hi(const int16<N,E1>& a, |
64 | const int16<N,E2>& b) |
65 | { |
66 | return { { a, b } }; |
67 | } |
68 | |
69 | SIMDPP_SCALAR_ARG_IMPL_EXPR(mul_hi, expr_mul_hi, int16, int16) |
70 | |
71 | /** Multiplies unsigned 16-bit values and returns the higher half of the result. |
72 | |
73 | @code |
74 | r0 = high(a0 * b0) |
75 | ... |
76 | rN = high(aN * bN) |
77 | @endcode |
78 | |
79 | @par 128-bit version: |
80 | @icost{NEON, ALTIVEC, 3} |
81 | |
82 | @par 256-bit version: |
83 | @icost{SSE2-AVX, 2} |
84 | @icost{NEON, ALTIVEC, 6} |
85 | */ |
86 | template<unsigned N, class E1, class E2> SIMDPP_INL |
87 | uint16<N, expr_mul_hi<uint16<N,E1>, |
88 | uint16<N,E2>>> mul_hi(const uint16<N,E1>& a, |
89 | const uint16<N,E2>& b) |
90 | { |
91 | return { { a, b } }; |
92 | } |
93 | |
94 | SIMDPP_SCALAR_ARG_IMPL_EXPR(mul_hi, expr_mul_hi, uint16, uint16) |
95 | |
96 | |
97 | /** Multiplies 32-bit values and returns the lower half of the result. |
98 | |
99 | @code |
100 | r0 = low(a0 * b0) |
101 | ... |
102 | rN = low(aN * bN) |
103 | @endcode |
104 | |
105 | @par 128-bit version: |
106 | @icost{SSE2-SSSE3, 6} |
107 | @icost{ALTIVEC, 8} |
108 | |
109 | @par 256-bit version: |
110 | @icost{SSE2-SSSE3, 12} |
111 | @icost{SSE4.1, AVX, NEON, 2} |
112 | @icost{ALTIVEC, 16} |
113 | */ |
114 | template<unsigned N, class V1, class V2> SIMDPP_INL |
115 | typename detail::get_expr_uint<expr_mul_lo, V1, V2>::type |
116 | mul_lo(const any_int32<N,V1>& a, |
117 | const any_int32<N,V2>& b) |
118 | { |
119 | return { { a.wrapped(), b.wrapped() } }; |
120 | } |
121 | |
122 | SIMDPP_SCALAR_ARG_IMPL_INT_UNSIGNED(mul_lo, expr_mul_lo, any_int32, int32) |
123 | |
124 | |
125 | } // namespace SIMDPP_ARCH_NAMESPACE |
126 | } // namespace simdpp |
127 | |
128 | #endif |
129 | |
130 | |