1/* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_CORE_I_MUL_H
9#define LIBSIMDPP_SIMDPP_CORE_I_MUL_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/detail/expr/i_mul.h>
17#include <simdpp/core/detail/get_expr_uint.h>
18#include <simdpp/core/detail/scalar_arg_impl.h>
19#include <simdpp/core/detail/get_expr_uint.h>
20
21namespace simdpp {
22namespace SIMDPP_ARCH_NAMESPACE {
23
24// no 8 bit multiplications in SSE
25/** Multiplies 16-bit values and returns the lower part of the multiplication
26
27 @code
28 r0 = low(a0 * b0)
29 ...
30 rN = low(aN * bN)
31 @endcode
32
33 @par 256-bit version:
34 @icost{SSE2-AVX, NEON, ALTIVEC, 2}
35*/
36template<unsigned N, class V1, class V2> SIMDPP_INL
37typename detail::get_expr_uint<expr_mul_lo, V1, V2>::type
38 mul_lo(const any_int16<N,V1>& a,
39 const any_int16<N,V2>& b)
40{
41 return { { a.wrapped(), b.wrapped() } };
42}
43
44SIMDPP_SCALAR_ARG_IMPL_INT_UNSIGNED(mul_lo, expr_mul_lo, any_int16, int16)
45
46/** Multiplies signed 16-bit values and returns the higher half of the result.
47
48 @code
49 r0 = high(a0 * b0)
50 ...
51 rN = high(aN * bN)
52 @endcode
53
54 @par 128-bit version:
55 @icost{NEON, ALTIVEC, 3}
56
57 @par 256-bit version:
58 @icost{SSE2-AVX, 2}
59 @icost{NEON, ALTIVEC, 6}
60*/
61template<unsigned N, class E1, class E2> SIMDPP_INL
62int16<N, expr_mul_hi<int16<N,E1>,
63 int16<N,E2>>> mul_hi(const int16<N,E1>& a,
64 const int16<N,E2>& b)
65{
66 return { { a, b } };
67}
68
69SIMDPP_SCALAR_ARG_IMPL_EXPR(mul_hi, expr_mul_hi, int16, int16)
70
71/** Multiplies unsigned 16-bit values and returns the higher half of the result.
72
73 @code
74 r0 = high(a0 * b0)
75 ...
76 rN = high(aN * bN)
77 @endcode
78
79 @par 128-bit version:
80 @icost{NEON, ALTIVEC, 3}
81
82 @par 256-bit version:
83 @icost{SSE2-AVX, 2}
84 @icost{NEON, ALTIVEC, 6}
85*/
86template<unsigned N, class E1, class E2> SIMDPP_INL
87uint16<N, expr_mul_hi<uint16<N,E1>,
88 uint16<N,E2>>> mul_hi(const uint16<N,E1>& a,
89 const uint16<N,E2>& b)
90{
91 return { { a, b } };
92}
93
94SIMDPP_SCALAR_ARG_IMPL_EXPR(mul_hi, expr_mul_hi, uint16, uint16)
95
96
97/** Multiplies 32-bit values and returns the lower half of the result.
98
99 @code
100 r0 = low(a0 * b0)
101 ...
102 rN = low(aN * bN)
103 @endcode
104
105 @par 128-bit version:
106 @icost{SSE2-SSSE3, 6}
107 @icost{ALTIVEC, 8}
108
109 @par 256-bit version:
110 @icost{SSE2-SSSE3, 12}
111 @icost{SSE4.1, AVX, NEON, 2}
112 @icost{ALTIVEC, 16}
113*/
114template<unsigned N, class V1, class V2> SIMDPP_INL
115typename detail::get_expr_uint<expr_mul_lo, V1, V2>::type
116 mul_lo(const any_int32<N,V1>& a,
117 const any_int32<N,V2>& b)
118{
119 return { { a.wrapped(), b.wrapped() } };
120}
121
122SIMDPP_SCALAR_ARG_IMPL_INT_UNSIGNED(mul_lo, expr_mul_lo, any_int32, int32)
123
124
125} // namespace SIMDPP_ARCH_NAMESPACE
126} // namespace simdpp
127
128#endif
129
130