1 | /* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_CORE_CMP_EQ_H |
9 | #define LIBSIMDPP_SIMDPP_CORE_CMP_EQ_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/detail/insn/cmp_eq.h> |
17 | #include <simdpp/core/detail/scalar_arg_impl.h> |
18 | |
19 | namespace simdpp { |
20 | namespace SIMDPP_ARCH_NAMESPACE { |
21 | |
22 | |
23 | /** Compares 8-bit values for equality. |
24 | |
25 | @code |
26 | r0 = (a0 == b0) ? 0xff : 0x0 |
27 | ... |
28 | rN = (aN == bN) ? 0xff : 0x0 |
29 | @endcode |
30 | |
31 | @par 256-bit version: |
32 | @icost{SSE2-AVX, NEON, ALTIVEC, 2} |
33 | */ |
34 | template<unsigned N, class V1, class V2> SIMDPP_INL |
35 | mask_int8<N,expr_empty> cmp_eq(const any_int8<N,V1>& a, |
36 | const any_int8<N,V2>& b) |
37 | { |
38 | typename detail::get_expr2_nosign<V1, V2>::type ra, rb; |
39 | ra = a.wrapped().eval(); |
40 | rb = b.wrapped().eval(); |
41 | return detail::insn::i_cmp_eq(ra, rb); |
42 | } |
43 | |
44 | SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_eq, mask_int8, any_int8) |
45 | |
46 | /** Compares 16-bit values for equality. |
47 | |
48 | @code |
49 | r0 = (a0 == b0) ? 0xffff : 0x0 |
50 | ... |
51 | rN = (aN == bN) ? 0xffff : 0x0 |
52 | @endcode |
53 | |
54 | @par 256-bit version: |
55 | @icost{SSE2-AVX, NEON, ALTIVEC, 2} |
56 | */ |
57 | template<unsigned N, class V1, class V2> SIMDPP_INL |
58 | mask_int16<N,expr_empty> cmp_eq(const any_int16<N,V1>& a, |
59 | const any_int16<N,V2>& b) |
60 | { |
61 | typename detail::get_expr2_nosign<V1, V2>::type ra, rb; |
62 | ra = a.wrapped().eval(); |
63 | rb = b.wrapped().eval(); |
64 | return detail::insn::i_cmp_eq(ra, rb); |
65 | } |
66 | |
67 | SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_eq, mask_int16, any_int16) |
68 | |
69 | /** Compares the values of two int32x4 vectors for equality |
70 | |
71 | @code |
72 | r0 = (a0 == b0) ? 0xffffffff : 0x0 |
73 | ... |
74 | rN = (aN == bN) ? 0xffffffff : 0x0 |
75 | @endcode |
76 | |
77 | @par 256-bit version: |
78 | @icost{SSE2-AVX, NEON, ALTIVEC, 2} |
79 | */ |
80 | template<unsigned N, class V1, class V2> SIMDPP_INL |
81 | mask_int32<N,expr_empty> cmp_eq(const any_int32<N,V1>& a, |
82 | const any_int32<N,V2>& b) |
83 | { |
84 | typename detail::get_expr2_nosign<V1, V2>::type ra, rb; |
85 | ra = a.wrapped().eval(); |
86 | rb = b.wrapped().eval(); |
87 | return detail::insn::i_cmp_eq(ra, rb); |
88 | } |
89 | |
90 | SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_eq, mask_int32, any_int32) |
91 | |
92 | /** Compares the values of two int64x2 vectors for equality |
93 | |
94 | @code |
95 | r0 = (a0 == b0) ? 0xffffffffffffffff : 0x0 |
96 | ... |
97 | rN = (aN == bN) ? 0xffffffffffffffff : 0x0 |
98 | @endcode |
99 | |
100 | @par 128-bit version: |
101 | @icost{SSE2-SSSE3, 5} |
102 | @icost{XOP, 1} |
103 | @icost{NEON, 3} |
104 | @icost{ALTIVEC, 3-4} |
105 | |
106 | @par 256-bit version: |
107 | @icost{SSE2-SSSE3, AVX, 10} |
108 | @icost{XOP, SSE4.1, 2} |
109 | @icost{NEON, 6} |
110 | @icost{ALTIVEC, 6-7} |
111 | */ |
112 | template<unsigned N, class V1, class V2> SIMDPP_INL |
113 | mask_int64<N,expr_empty> cmp_eq(const any_int64<N,V1>& a, |
114 | const any_int64<N,V2>& b) |
115 | { |
116 | typename detail::get_expr2_nosign<V1, V2>::type ra, rb; |
117 | ra = a.wrapped().eval(); |
118 | rb = b.wrapped().eval(); |
119 | return detail::insn::i_cmp_eq(ra, rb); |
120 | } |
121 | |
122 | SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_eq, mask_int64, any_int64) |
123 | |
124 | /** Compares the values of two float32x4 vectors for equality |
125 | |
126 | @code |
127 | r0 = (a0 == b0) ? 0xffffffff : 0x0 |
128 | ... |
129 | rN = (aN == bN) ? 0xffffffff : 0x0 |
130 | @endcode |
131 | |
132 | @par 256-bit version: |
133 | @icost{SSE2-SSE4.1, NEON, ALTIVEC, 2} |
134 | */ |
135 | template<unsigned N, class V1, class V2> SIMDPP_INL |
136 | mask_float32<N,expr_empty> cmp_eq(const any_float32<N,V1>& a, |
137 | const any_float32<N,V2>& b) |
138 | { |
139 | return detail::insn::i_cmp_eq(a.wrapped().eval(), b.wrapped().eval()); |
140 | } |
141 | |
142 | SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_eq, mask_float32, any_float32) |
143 | |
144 | /** Compares the values of two float64x2 vectors for equality |
145 | |
146 | @code |
147 | r0 = (a0 == b0) ? 0xffffffffffffffff : 0x0 |
148 | ... |
149 | rN = (aN == bN) ? 0xffffffffffffffff : 0x0 |
150 | @endcode |
151 | |
152 | @par 128-bit version: |
153 | @novec{NEON, ALTIVEC} |
154 | |
155 | @par 256-bit version: |
156 | @novec{NEON, ALTIVEC} |
157 | @icost{SSE2-SSE4.1, 2} |
158 | */ |
159 | template<unsigned N, class V1, class V2> SIMDPP_INL |
160 | mask_float64<N,expr_empty> cmp_eq(const any_float64<N,V1>& a, |
161 | const any_float64<N,V2>& b) |
162 | { |
163 | return detail::insn::i_cmp_eq(a.wrapped().eval(), b.wrapped().eval()); |
164 | } |
165 | |
166 | SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_eq, mask_float64, any_float64) |
167 | |
168 | |
169 | } // namespace SIMDPP_ARCH_NAMESPACE |
170 | } // namespace simdpp |
171 | |
172 | #endif |
173 | |
174 | |