1/* Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_SUB_SAT_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_SUB_SAT_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/detail/null/math.h>
17#include <simdpp/detail/vector_array_macros.h>
18
19namespace simdpp {
20namespace SIMDPP_ARCH_NAMESPACE {
21namespace detail {
22namespace insn {
23
24static SIMDPP_INL
25int8<16> i_sub_sat(const int8<16>& a, const int8<16>& b)
26{
27#if SIMDPP_USE_NULL
28 return detail::null::sub_sat(a, b);
29#elif SIMDPP_USE_SSE2
30 return _mm_subs_epi8(a.native(), b.native());
31#elif SIMDPP_USE_NEON
32 return vqsubq_s8(a.native(), b.native());
33#elif SIMDPP_USE_ALTIVEC
34 return vec_subs(a.native(), b.native());
35#elif SIMDPP_USE_MSA
36 return __msa_subs_s_b(a.native(), b.native());
37#endif
38}
39
40#if SIMDPP_USE_AVX2
41static SIMDPP_INL
42int8<32> i_sub_sat(const int8<32>& a, const int8<32>& b)
43{
44 return _mm256_subs_epi8(a.native(), b.native());
45}
46#endif
47
48#if SIMDPP_USE_AVX512BW
49static SIMDPP_INL
50int8<64> i_sub_sat(const int8<64>& a, const int8<64>& b)
51{
52 return _mm512_subs_epi8(a.native(), b.native());
53}
54#endif
55
56// -----------------------------------------------------------------------------
57
58static SIMDPP_INL
59int16<8> i_sub_sat(const int16<8>& a, const int16<8>& b)
60{
61#if SIMDPP_USE_NULL
62 return detail::null::sub_sat(a, b);
63#elif SIMDPP_USE_SSE2
64 return _mm_subs_epi16(a.native(), b.native());
65#elif SIMDPP_USE_NEON
66 return vqsubq_s16(a.native(), b.native());
67#elif SIMDPP_USE_ALTIVEC
68 return vec_subs(a.native(), b.native());
69#elif SIMDPP_USE_MSA
70 return __msa_subs_s_h(a.native(), b.native());
71#endif
72}
73
74#if SIMDPP_USE_AVX2
75static SIMDPP_INL
76int16<16> i_sub_sat(const int16<16>& a, const int16<16>& b)
77{
78 return _mm256_subs_epi16(a.native(), b.native());
79}
80#endif
81
82#if SIMDPP_USE_AVX512BW
83static SIMDPP_INL
84int16<32> i_sub_sat(const int16<32>& a, const int16<32>& b)
85{
86 return _mm512_subs_epi16(a.native(), b.native());
87}
88#endif
89
90// -----------------------------------------------------------------------------
91
92static SIMDPP_INL
93uint8<16> i_sub_sat(const uint8<16>& a, const uint8<16>& b)
94{
95#if SIMDPP_USE_NULL
96 return detail::null::sub_sat(a, b);
97#elif SIMDPP_USE_SSE2
98 return _mm_subs_epu8(a.native(), b.native());
99#elif SIMDPP_USE_NEON
100 return vqsubq_u8(a.native(), b.native());
101#elif SIMDPP_USE_ALTIVEC
102 return vec_subs(a.native(), b.native());
103#elif SIMDPP_USE_MSA
104 return __msa_subs_u_b(a.native(), b.native());
105#endif
106}
107
108#if SIMDPP_USE_AVX2
109static SIMDPP_INL
110uint8<32> i_sub_sat(const uint8<32>& a, const uint8<32>& b)
111{
112 return _mm256_subs_epu8(a.native(), b.native());
113}
114#endif
115
116#if SIMDPP_USE_AVX512BW
117static SIMDPP_INL
118uint8<64> i_sub_sat(const uint8<64>& a, const uint8<64>& b)
119{
120 return _mm512_subs_epu8(a.native(), b.native());
121}
122#endif
123
124// -----------------------------------------------------------------------------
125
126static SIMDPP_INL
127uint16<8> i_sub_sat(const uint16<8>& a, const uint16<8>& b)
128{
129#if SIMDPP_USE_NULL
130 return detail::null::sub_sat(a, b);
131#elif SIMDPP_USE_SSE2
132 return _mm_subs_epu16(a.native(), b.native());
133#elif SIMDPP_USE_NEON
134 return vqsubq_u16(a.native(), b.native());
135#elif SIMDPP_USE_ALTIVEC
136 return vec_subs(a.native(), b.native());
137#elif SIMDPP_USE_MSA
138 return __msa_subs_u_h(a.native(), b.native());
139#endif
140}
141
142#if SIMDPP_USE_AVX2
143static SIMDPP_INL
144uint16<16> i_sub_sat(const uint16<16>& a, const uint16<16>& b)
145{
146 return _mm256_subs_epu16(a.native(), b.native());
147}
148#endif
149
150#if SIMDPP_USE_AVX512BW
151static SIMDPP_INL
152uint16<32> i_sub_sat(const uint16<32>& a, const uint16<32>& b)
153{
154 return _mm512_subs_epu16(a.native(), b.native());
155}
156#endif
157
158// -----------------------------------------------------------------------------
159
160template<class V> SIMDPP_INL
161V i_sub_sat(const V& a, const V& b)
162{
163 SIMDPP_VEC_ARRAY_IMPL2(V, i_sub_sat, a, b)
164}
165
166} // namespace insn
167} // namespace detail
168} // namespace SIMDPP_ARCH_NAMESPACE
169} // namespace simdpp
170
171#endif
172
173