1 | /* Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_SUB_SAT_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_SUB_SAT_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/detail/null/math.h> |
17 | #include <simdpp/detail/vector_array_macros.h> |
18 | |
19 | namespace simdpp { |
20 | namespace SIMDPP_ARCH_NAMESPACE { |
21 | namespace detail { |
22 | namespace insn { |
23 | |
24 | static SIMDPP_INL |
25 | int8<16> i_sub_sat(const int8<16>& a, const int8<16>& b) |
26 | { |
27 | #if SIMDPP_USE_NULL |
28 | return detail::null::sub_sat(a, b); |
29 | #elif SIMDPP_USE_SSE2 |
30 | return _mm_subs_epi8(a.native(), b.native()); |
31 | #elif SIMDPP_USE_NEON |
32 | return vqsubq_s8(a.native(), b.native()); |
33 | #elif SIMDPP_USE_ALTIVEC |
34 | return vec_subs(a.native(), b.native()); |
35 | #elif SIMDPP_USE_MSA |
36 | return __msa_subs_s_b(a.native(), b.native()); |
37 | #endif |
38 | } |
39 | |
40 | #if SIMDPP_USE_AVX2 |
41 | static SIMDPP_INL |
42 | int8<32> i_sub_sat(const int8<32>& a, const int8<32>& b) |
43 | { |
44 | return _mm256_subs_epi8(a.native(), b.native()); |
45 | } |
46 | #endif |
47 | |
48 | #if SIMDPP_USE_AVX512BW |
49 | static SIMDPP_INL |
50 | int8<64> i_sub_sat(const int8<64>& a, const int8<64>& b) |
51 | { |
52 | return _mm512_subs_epi8(a.native(), b.native()); |
53 | } |
54 | #endif |
55 | |
56 | // ----------------------------------------------------------------------------- |
57 | |
58 | static SIMDPP_INL |
59 | int16<8> i_sub_sat(const int16<8>& a, const int16<8>& b) |
60 | { |
61 | #if SIMDPP_USE_NULL |
62 | return detail::null::sub_sat(a, b); |
63 | #elif SIMDPP_USE_SSE2 |
64 | return _mm_subs_epi16(a.native(), b.native()); |
65 | #elif SIMDPP_USE_NEON |
66 | return vqsubq_s16(a.native(), b.native()); |
67 | #elif SIMDPP_USE_ALTIVEC |
68 | return vec_subs(a.native(), b.native()); |
69 | #elif SIMDPP_USE_MSA |
70 | return __msa_subs_s_h(a.native(), b.native()); |
71 | #endif |
72 | } |
73 | |
74 | #if SIMDPP_USE_AVX2 |
75 | static SIMDPP_INL |
76 | int16<16> i_sub_sat(const int16<16>& a, const int16<16>& b) |
77 | { |
78 | return _mm256_subs_epi16(a.native(), b.native()); |
79 | } |
80 | #endif |
81 | |
82 | #if SIMDPP_USE_AVX512BW |
83 | static SIMDPP_INL |
84 | int16<32> i_sub_sat(const int16<32>& a, const int16<32>& b) |
85 | { |
86 | return _mm512_subs_epi16(a.native(), b.native()); |
87 | } |
88 | #endif |
89 | |
90 | // ----------------------------------------------------------------------------- |
91 | |
92 | static SIMDPP_INL |
93 | uint8<16> i_sub_sat(const uint8<16>& a, const uint8<16>& b) |
94 | { |
95 | #if SIMDPP_USE_NULL |
96 | return detail::null::sub_sat(a, b); |
97 | #elif SIMDPP_USE_SSE2 |
98 | return _mm_subs_epu8(a.native(), b.native()); |
99 | #elif SIMDPP_USE_NEON |
100 | return vqsubq_u8(a.native(), b.native()); |
101 | #elif SIMDPP_USE_ALTIVEC |
102 | return vec_subs(a.native(), b.native()); |
103 | #elif SIMDPP_USE_MSA |
104 | return __msa_subs_u_b(a.native(), b.native()); |
105 | #endif |
106 | } |
107 | |
108 | #if SIMDPP_USE_AVX2 |
109 | static SIMDPP_INL |
110 | uint8<32> i_sub_sat(const uint8<32>& a, const uint8<32>& b) |
111 | { |
112 | return _mm256_subs_epu8(a.native(), b.native()); |
113 | } |
114 | #endif |
115 | |
116 | #if SIMDPP_USE_AVX512BW |
117 | static SIMDPP_INL |
118 | uint8<64> i_sub_sat(const uint8<64>& a, const uint8<64>& b) |
119 | { |
120 | return _mm512_subs_epu8(a.native(), b.native()); |
121 | } |
122 | #endif |
123 | |
124 | // ----------------------------------------------------------------------------- |
125 | |
126 | static SIMDPP_INL |
127 | uint16<8> i_sub_sat(const uint16<8>& a, const uint16<8>& b) |
128 | { |
129 | #if SIMDPP_USE_NULL |
130 | return detail::null::sub_sat(a, b); |
131 | #elif SIMDPP_USE_SSE2 |
132 | return _mm_subs_epu16(a.native(), b.native()); |
133 | #elif SIMDPP_USE_NEON |
134 | return vqsubq_u16(a.native(), b.native()); |
135 | #elif SIMDPP_USE_ALTIVEC |
136 | return vec_subs(a.native(), b.native()); |
137 | #elif SIMDPP_USE_MSA |
138 | return __msa_subs_u_h(a.native(), b.native()); |
139 | #endif |
140 | } |
141 | |
142 | #if SIMDPP_USE_AVX2 |
143 | static SIMDPP_INL |
144 | uint16<16> i_sub_sat(const uint16<16>& a, const uint16<16>& b) |
145 | { |
146 | return _mm256_subs_epu16(a.native(), b.native()); |
147 | } |
148 | #endif |
149 | |
150 | #if SIMDPP_USE_AVX512BW |
151 | static SIMDPP_INL |
152 | uint16<32> i_sub_sat(const uint16<32>& a, const uint16<32>& b) |
153 | { |
154 | return _mm512_subs_epu16(a.native(), b.native()); |
155 | } |
156 | #endif |
157 | |
158 | // ----------------------------------------------------------------------------- |
159 | |
160 | template<class V> SIMDPP_INL |
161 | V i_sub_sat(const V& a, const V& b) |
162 | { |
163 | SIMDPP_VEC_ARRAY_IMPL2(V, i_sub_sat, a, b) |
164 | } |
165 | |
166 | } // namespace insn |
167 | } // namespace detail |
168 | } // namespace SIMDPP_ARCH_NAMESPACE |
169 | } // namespace simdpp |
170 | |
171 | #endif |
172 | |
173 | |