1 | /* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_CORE_I_AVG_H |
9 | #define LIBSIMDPP_SIMDPP_CORE_I_AVG_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/detail/insn/i_avg.h> |
17 | #include <simdpp/core/detail/scalar_arg_impl.h> |
18 | |
19 | namespace simdpp { |
20 | namespace SIMDPP_ARCH_NAMESPACE { |
21 | |
22 | |
23 | /** Computes rounded average of the unsigned 8-bit values. |
24 | |
25 | @code |
26 | r0 = (a0 + b0 + 1) / 2 |
27 | ... |
28 | rN = (aN + bN + 1) / 2 |
29 | @endcode |
30 | |
31 | @par 256-bit version: |
32 | @icost{SSE2-AVX, NEON, ALTIVEC, 2} |
33 | */ |
34 | template<unsigned N, class E1, class E2> SIMDPP_INL |
35 | uint8<N,expr_empty> avg(const uint8<N,E1>& a, const uint8<N,E2>& b) |
36 | { |
37 | return detail::insn::i_avg(a.eval(), b.eval()); |
38 | } |
39 | |
40 | SIMDPP_SCALAR_ARG_IMPL_VEC(avg, uint8, uint8) |
41 | |
42 | /** Computes rounded average of signed 8-bit values. |
43 | |
44 | @code |
45 | r0 = (a0 + b0 + 1) / 2 |
46 | ... |
47 | rN = (aN + bN + 1) / 2 |
48 | @endcode |
49 | |
50 | @par 128-bit version: |
51 | @icost{SSE2-AVX2, 4-5} |
52 | |
53 | @par 256-bit version: |
54 | @icost{SSE2-AVX, 8-9} |
55 | @icost{AVX2, 4-5} |
56 | @icost{NEON, ALTIVEC, 2} |
57 | */ |
58 | template<unsigned N, class E1, class E2> SIMDPP_INL |
59 | int8<N,expr_empty> avg(const int8<N,E1>& a, const int8<N,E2>& b) |
60 | { |
61 | return detail::insn::i_avg(a.eval(), b.eval()); |
62 | } |
63 | |
64 | SIMDPP_SCALAR_ARG_IMPL_VEC(avg, int8, int8) |
65 | |
66 | /** Computes rounded average of unsigned 16-bit values. |
67 | |
68 | @code |
69 | r0 = (a0 + b0 + 1) / 2 |
70 | ... |
71 | rN = (aN + bN + 1) / 2 |
72 | @endcode |
73 | |
74 | @par 256-bit version: |
75 | @icost{SSE2-AVX, NEON, ALTIVEC, 2} |
76 | */ |
77 | template<unsigned N, class E1, class E2> SIMDPP_INL |
78 | uint16<N,expr_empty> avg(const uint16<N,E1>& a, const uint16<N,E2>& b) |
79 | { |
80 | return detail::insn::i_avg(a.eval(), b.eval()); |
81 | } |
82 | |
83 | SIMDPP_SCALAR_ARG_IMPL_VEC(avg, uint16, uint16) |
84 | |
85 | /** Computes rounded average of signed 16-bit values. |
86 | |
87 | @code |
88 | r0 = (a0 + b0 + 1) / 2 |
89 | ... |
90 | rN = (aN + bN + 1) / 2 |
91 | @endcode |
92 | |
93 | @par 128-bit version: |
94 | @icost{SSE2-AVX2, 4-5} |
95 | |
96 | @par 256-bit version: |
97 | @icost{SSE2-AVX, 8-9} |
98 | @icost{AVX2, 4-5} |
99 | @icost{NEON, ALTIVEC, 2} |
100 | */ |
101 | template<unsigned N, class E1, class E2> SIMDPP_INL |
102 | int16<N,expr_empty> avg(const int16<N,E1>& a, const int16<N,E2>& b) |
103 | { |
104 | return detail::insn::i_avg(a.eval(), b.eval()); |
105 | } |
106 | |
107 | SIMDPP_SCALAR_ARG_IMPL_VEC(avg, int16, int16) |
108 | |
109 | /** Computes rounded average of unsigned 32-bit values. |
110 | |
111 | @code |
112 | r0 = (a0 + b0 + 1) / 2 |
113 | ... |
114 | rN = (aN + bN + 1) / 2 |
115 | @endcode |
116 | |
117 | @par 128-bit version: |
118 | @icost{SSE2-AVX2, 6-7} |
119 | |
120 | @par 256-bit version: |
121 | @icost{SSE2-AVX, 12-13} |
122 | @icost{AVX2, 6-7} |
123 | @icost{NEON, ALTIVEC, 2} |
124 | */ |
125 | template<unsigned N, class E1, class E2> SIMDPP_INL |
126 | uint32<N,expr_empty> avg(const uint32<N,E1>& a, const uint32<N,E2>& b) |
127 | { |
128 | return detail::insn::i_avg(a.eval(), b.eval()); |
129 | } |
130 | |
131 | SIMDPP_SCALAR_ARG_IMPL_VEC(avg, uint32, uint32) |
132 | |
133 | /** Computes rounded average of signed 32-bit values. |
134 | |
135 | @code |
136 | r0 = (a0 + b0 + 1) / 2 |
137 | ... |
138 | rN = (aN + bN + 1) / 2 |
139 | @endcode |
140 | |
141 | @par 128-bit version: |
142 | @icost{SSE2-AVX2, 9-10} |
143 | @icost{NEON, 1} |
144 | |
145 | @par 256-bit version: |
146 | @icost{SSE2-AVX, 18-19} |
147 | @icost{AVX2, 9-10} |
148 | @icost{NEON, ALTIVEC, 2} |
149 | */ |
150 | template<unsigned N, class E1, class E2> SIMDPP_INL |
151 | int32<N,expr_empty> avg(const int32<N,E1>& a, const int32<N,E2>& b) |
152 | { |
153 | return detail::insn::i_avg(a.eval(), b.eval()); |
154 | } |
155 | |
156 | SIMDPP_SCALAR_ARG_IMPL_VEC(avg, int32, int32) |
157 | |
158 | } // namespace SIMDPP_ARCH_NAMESPACE |
159 | } // namespace simdpp |
160 | |
161 | #endif |
162 | |
163 | |