1/* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_CORE_CMP_NEQ_H
9#define LIBSIMDPP_SIMDPP_CORE_CMP_NEQ_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/detail/insn/cmp_neq.h>
17#include <simdpp/core/detail/scalar_arg_impl.h>
18
19namespace simdpp {
20namespace SIMDPP_ARCH_NAMESPACE {
21
22
23/** Compares the values of two int8x16 vectors for inequality
24
25 @code
26 r0 = (a0 != b0) ? 0xff : 0x0
27 ...
28 rN = (aN != bN) ? 0xff : 0x0
29 @endcode
30
31 @par 128-bit version:
32 @icost{SSE2-AVX, NEON, ALTIVEC, 2}
33 @icost{XOP, 1}
34
35 @par 256-bit version
36 @icost{SSE2-AVX, NEON, ALTIVEC, 4}
37 @icost{AVX2, 2}
38 @icost{XOP, 2}
39*/
40template<unsigned N, class V1, class V2> SIMDPP_INL
41mask_int8<N,expr_empty> cmp_neq(const any_int8<N,V1>& a,
42 const any_int8<N,V2>& b)
43{
44 typename detail::get_expr2_nosign<V1, V2>::type ra, rb;
45 ra = a.wrapped().eval();
46 rb = b.wrapped().eval();
47 return detail::insn::i_cmp_neq(ra, rb);
48}
49
50SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_neq, mask_int8, any_int8)
51
52/** Compares the values of two int16x8 vectors for inequality
53
54 @code
55 r0 = (a0 != b0) ? 0xffff : 0x0
56 ...
57 rN = (aN != bN) ? 0xffff : 0x0
58 @endcode
59
60 @par 128-bit version:
61 @icost{SSE2-AVX, NEON, ALTIVEC, 2}
62 @icost{XOP, 1}
63
64 @par 256-bit version
65 @icost{SSE2-AVX, NEON, ALTIVEC, 4}
66 @icost{AVX2, 2}
67 @icost{XOP, 2}
68*/
69template<unsigned N, class V1, class V2> SIMDPP_INL
70mask_int16<N,expr_empty> cmp_neq(const any_int16<N,V1>& a,
71 const any_int16<N,V2>& b)
72{
73 typename detail::get_expr2_nosign<V1, V2>::type ra, rb;
74 ra = a.wrapped().eval();
75 rb = b.wrapped().eval();
76 return detail::insn::i_cmp_neq(ra, rb);
77}
78
79SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_neq, mask_int16, any_int16)
80
81/** Compares the values of two int32x4 vectors for inequality
82
83 @code
84 r0 = (a0 != b0) ? 0xffffffff : 0x0
85 ...
86 rN = (aN != bN) ? 0xffffffff : 0x0
87 @endcode
88
89 @par 128-bit version:
90 @icost{SSE2-AVX, NEON, ALTIVEC, 2}
91 @icost{XOP, 1}
92
93 @par 256-bit version
94 @icost{SSE2-AVX, NEON, ALTIVEC, 4}
95 @icost{AVX2, 2}
96 @icost{XOP, 2}
97*/
98template<unsigned N, class V1, class V2> SIMDPP_INL
99mask_int32<N,expr_empty> cmp_neq(const any_int32<N,V1>& a,
100 const any_int32<N,V2>& b)
101{
102 typename detail::get_expr2_nosign<V1, V2>::type ra, rb;
103 ra = a.wrapped().eval();
104 rb = b.wrapped().eval();
105 return detail::insn::i_cmp_neq(ra, rb);
106}
107
108SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_neq, mask_int32, any_int32)
109
110/** Compares the values of two int64x2 vectors for inequality
111
112 @code
113 r0 = (a0 != b0) ? 0xffffffffffffffff : 0x0
114 ...
115 rN = (aN != bN) ? 0xffffffffffffffff : 0x0
116 @endcode
117
118 @par 128-bit version:
119 @icost{SSE2-SSSE3, 5}
120 @icost{SSE4.1, AVX, 2}
121 @icost{XOP, 1}
122 @icost{NEON, 4}
123 @icost{ALTIVEC, 3-5}
124
125 @par 256-bit version:
126 @icost{SSE2-SSSE3, AVX, 10}
127 @icost{SSE4.1, NEON, 4}
128 @icost{AVX2, XOP, 2}
129 @icost{NEON, 8}
130 @icost{ALTIVEC, 6-8}
131*/
132template<unsigned N, class V1, class V2> SIMDPP_INL
133mask_int64<N,expr_empty> cmp_neq(const any_int64<N,V1>& a,
134 const any_int64<N,V2>& b)
135{
136 typename detail::get_expr2_nosign<V1, V2>::type ra, rb;
137 ra = a.wrapped().eval();
138 rb = b.wrapped().eval();
139 return detail::insn::i_cmp_neq(ra, rb);
140}
141
142SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_neq, mask_int64, any_int64)
143
144/** Compares the values of two float32x4 vectors for inequality
145
146 @code
147 r0 = (a0 != b0) ? 0xffffffff : 0x0
148 ...
149 rN = (aN != bN) ? 0xffffffff : 0x0
150 @endcode
151
152 @par 128-bit version:
153 @icost{NEON, ALTIVEC, 2}
154
155 @par 256-bit version
156 @icost{SSE2-SSE4.1, 2}
157 @icost{NEON, ALTIVEC, 4}
158*/
159template<unsigned N, class V1, class V2> SIMDPP_INL
160mask_float32<N,expr_empty> cmp_neq(const any_float32<N,V1>& a,
161 const any_float32<N,V2>& b)
162{
163 return detail::insn::i_cmp_neq(a.wrapped().eval(), b.wrapped().eval());
164}
165
166SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_neq, mask_float32, any_float32)
167
168/** Compares the values of two float64x2 vectors for inequality
169
170 @code
171 r0 = (a0 != b0) ? 0xffffffffffffffff : 0x0
172 ...
173 rN = (aN != bN) ? 0xffffffffffffffff : 0x0
174 @endcode
175
176 @par 128-bit version:
177 @novec{NEON, ALTIVEC}
178
179 @par 256-bit version:
180 @novec{NEON, ALTIVEC}
181 @icost{SSE2-SSE4.1, 2}
182*/
183template<unsigned N, class V1, class V2> SIMDPP_INL
184mask_float64<N,expr_empty> cmp_neq(const any_float64<N,V1>& a,
185 const any_float64<N,V2>& b)
186{
187 return detail::insn::i_cmp_neq(a.wrapped().eval(), b.wrapped().eval());
188}
189
190SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_neq, mask_float64, any_float64)
191
192} // namespace SIMDPP_ARCH_NAMESPACE
193} // namespace simdpp
194
195#endif
196
197