1 | /* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_CORE_CMP_NEQ_H |
9 | #define LIBSIMDPP_SIMDPP_CORE_CMP_NEQ_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/detail/insn/cmp_neq.h> |
17 | #include <simdpp/core/detail/scalar_arg_impl.h> |
18 | |
19 | namespace simdpp { |
20 | namespace SIMDPP_ARCH_NAMESPACE { |
21 | |
22 | |
23 | /** Compares the values of two int8x16 vectors for inequality |
24 | |
25 | @code |
26 | r0 = (a0 != b0) ? 0xff : 0x0 |
27 | ... |
28 | rN = (aN != bN) ? 0xff : 0x0 |
29 | @endcode |
30 | |
31 | @par 128-bit version: |
32 | @icost{SSE2-AVX, NEON, ALTIVEC, 2} |
33 | @icost{XOP, 1} |
34 | |
35 | @par 256-bit version |
36 | @icost{SSE2-AVX, NEON, ALTIVEC, 4} |
37 | @icost{AVX2, 2} |
38 | @icost{XOP, 2} |
39 | */ |
40 | template<unsigned N, class V1, class V2> SIMDPP_INL |
41 | mask_int8<N,expr_empty> cmp_neq(const any_int8<N,V1>& a, |
42 | const any_int8<N,V2>& b) |
43 | { |
44 | typename detail::get_expr2_nosign<V1, V2>::type ra, rb; |
45 | ra = a.wrapped().eval(); |
46 | rb = b.wrapped().eval(); |
47 | return detail::insn::i_cmp_neq(ra, rb); |
48 | } |
49 | |
50 | SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_neq, mask_int8, any_int8) |
51 | |
52 | /** Compares the values of two int16x8 vectors for inequality |
53 | |
54 | @code |
55 | r0 = (a0 != b0) ? 0xffff : 0x0 |
56 | ... |
57 | rN = (aN != bN) ? 0xffff : 0x0 |
58 | @endcode |
59 | |
60 | @par 128-bit version: |
61 | @icost{SSE2-AVX, NEON, ALTIVEC, 2} |
62 | @icost{XOP, 1} |
63 | |
64 | @par 256-bit version |
65 | @icost{SSE2-AVX, NEON, ALTIVEC, 4} |
66 | @icost{AVX2, 2} |
67 | @icost{XOP, 2} |
68 | */ |
69 | template<unsigned N, class V1, class V2> SIMDPP_INL |
70 | mask_int16<N,expr_empty> cmp_neq(const any_int16<N,V1>& a, |
71 | const any_int16<N,V2>& b) |
72 | { |
73 | typename detail::get_expr2_nosign<V1, V2>::type ra, rb; |
74 | ra = a.wrapped().eval(); |
75 | rb = b.wrapped().eval(); |
76 | return detail::insn::i_cmp_neq(ra, rb); |
77 | } |
78 | |
79 | SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_neq, mask_int16, any_int16) |
80 | |
81 | /** Compares the values of two int32x4 vectors for inequality |
82 | |
83 | @code |
84 | r0 = (a0 != b0) ? 0xffffffff : 0x0 |
85 | ... |
86 | rN = (aN != bN) ? 0xffffffff : 0x0 |
87 | @endcode |
88 | |
89 | @par 128-bit version: |
90 | @icost{SSE2-AVX, NEON, ALTIVEC, 2} |
91 | @icost{XOP, 1} |
92 | |
93 | @par 256-bit version |
94 | @icost{SSE2-AVX, NEON, ALTIVEC, 4} |
95 | @icost{AVX2, 2} |
96 | @icost{XOP, 2} |
97 | */ |
98 | template<unsigned N, class V1, class V2> SIMDPP_INL |
99 | mask_int32<N,expr_empty> cmp_neq(const any_int32<N,V1>& a, |
100 | const any_int32<N,V2>& b) |
101 | { |
102 | typename detail::get_expr2_nosign<V1, V2>::type ra, rb; |
103 | ra = a.wrapped().eval(); |
104 | rb = b.wrapped().eval(); |
105 | return detail::insn::i_cmp_neq(ra, rb); |
106 | } |
107 | |
108 | SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_neq, mask_int32, any_int32) |
109 | |
110 | /** Compares the values of two int64x2 vectors for inequality |
111 | |
112 | @code |
113 | r0 = (a0 != b0) ? 0xffffffffffffffff : 0x0 |
114 | ... |
115 | rN = (aN != bN) ? 0xffffffffffffffff : 0x0 |
116 | @endcode |
117 | |
118 | @par 128-bit version: |
119 | @icost{SSE2-SSSE3, 5} |
120 | @icost{SSE4.1, AVX, 2} |
121 | @icost{XOP, 1} |
122 | @icost{NEON, 4} |
123 | @icost{ALTIVEC, 3-5} |
124 | |
125 | @par 256-bit version: |
126 | @icost{SSE2-SSSE3, AVX, 10} |
127 | @icost{SSE4.1, NEON, 4} |
128 | @icost{AVX2, XOP, 2} |
129 | @icost{NEON, 8} |
130 | @icost{ALTIVEC, 6-8} |
131 | */ |
132 | template<unsigned N, class V1, class V2> SIMDPP_INL |
133 | mask_int64<N,expr_empty> cmp_neq(const any_int64<N,V1>& a, |
134 | const any_int64<N,V2>& b) |
135 | { |
136 | typename detail::get_expr2_nosign<V1, V2>::type ra, rb; |
137 | ra = a.wrapped().eval(); |
138 | rb = b.wrapped().eval(); |
139 | return detail::insn::i_cmp_neq(ra, rb); |
140 | } |
141 | |
142 | SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_neq, mask_int64, any_int64) |
143 | |
144 | /** Compares the values of two float32x4 vectors for inequality |
145 | |
146 | @code |
147 | r0 = (a0 != b0) ? 0xffffffff : 0x0 |
148 | ... |
149 | rN = (aN != bN) ? 0xffffffff : 0x0 |
150 | @endcode |
151 | |
152 | @par 128-bit version: |
153 | @icost{NEON, ALTIVEC, 2} |
154 | |
155 | @par 256-bit version |
156 | @icost{SSE2-SSE4.1, 2} |
157 | @icost{NEON, ALTIVEC, 4} |
158 | */ |
159 | template<unsigned N, class V1, class V2> SIMDPP_INL |
160 | mask_float32<N,expr_empty> cmp_neq(const any_float32<N,V1>& a, |
161 | const any_float32<N,V2>& b) |
162 | { |
163 | return detail::insn::i_cmp_neq(a.wrapped().eval(), b.wrapped().eval()); |
164 | } |
165 | |
166 | SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_neq, mask_float32, any_float32) |
167 | |
168 | /** Compares the values of two float64x2 vectors for inequality |
169 | |
170 | @code |
171 | r0 = (a0 != b0) ? 0xffffffffffffffff : 0x0 |
172 | ... |
173 | rN = (aN != bN) ? 0xffffffffffffffff : 0x0 |
174 | @endcode |
175 | |
176 | @par 128-bit version: |
177 | @novec{NEON, ALTIVEC} |
178 | |
179 | @par 256-bit version: |
180 | @novec{NEON, ALTIVEC} |
181 | @icost{SSE2-SSE4.1, 2} |
182 | */ |
183 | template<unsigned N, class V1, class V2> SIMDPP_INL |
184 | mask_float64<N,expr_empty> cmp_neq(const any_float64<N,V1>& a, |
185 | const any_float64<N,V2>& b) |
186 | { |
187 | return detail::insn::i_cmp_neq(a.wrapped().eval(), b.wrapped().eval()); |
188 | } |
189 | |
190 | SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_neq, mask_float64, any_float64) |
191 | |
192 | } // namespace SIMDPP_ARCH_NAMESPACE |
193 | } // namespace simdpp |
194 | |
195 | #endif |
196 | |
197 | |