1/* Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_ADD_SAT_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_ADD_SAT_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/expr.h>
17#include <simdpp/detail/null/math.h>
18#include <simdpp/detail/vector_array_macros.h>
19
20namespace simdpp {
21namespace SIMDPP_ARCH_NAMESPACE {
22namespace detail {
23namespace insn {
24
25static SIMDPP_INL
26int8<16> i_add_sat(const int8<16>& a, const int8<16>& b)
27{
28#if SIMDPP_USE_NULL
29 return detail::null::add_sat(a, b);
30#elif SIMDPP_USE_SSE2
31 return _mm_adds_epi8(a.native(), b.native());
32#elif SIMDPP_USE_NEON
33 return vqaddq_s8(a.native(), b.native());
34#elif SIMDPP_USE_ALTIVEC
35 return vec_adds(a.native(), b.native());
36#elif SIMDPP_USE_MSA
37 return __msa_adds_s_b(a.native(), b.native());
38#endif
39}
40
41#if SIMDPP_USE_AVX2
42static SIMDPP_INL
43int8<32> i_add_sat(const int8<32>& a, const int8<32>& b)
44{
45 return _mm256_adds_epi8(a.native(), b.native());
46}
47#endif
48
49#if SIMDPP_USE_AVX512BW
50static SIMDPP_INL
51int8<64> i_add_sat(const int8<64>& a, const int8<64>& b)
52{
53 return _mm512_adds_epi8(a.native(), b.native());
54}
55#endif
56
57// -----------------------------------------------------------------------------
58
59static SIMDPP_INL
60int16<8> i_add_sat(const int16<8>& a, const int16<8>& b)
61{
62#if SIMDPP_USE_NULL
63 return detail::null::add_sat(a, b);
64#elif SIMDPP_USE_SSE2
65 return _mm_adds_epi16(a.native(), b.native());
66#elif SIMDPP_USE_NEON
67 return vqaddq_s16(a.native(), b.native());
68#elif SIMDPP_USE_ALTIVEC
69 return vec_adds(a.native(), b.native());
70#elif SIMDPP_USE_MSA
71 return __msa_adds_s_h(a.native(), b.native());
72#endif
73}
74
75#if SIMDPP_USE_AVX2
76static SIMDPP_INL
77int16<16> i_add_sat(const int16<16>& a, const int16<16>& b)
78{
79 return _mm256_adds_epi16(a.native(), b.native());
80}
81#endif
82
83#if SIMDPP_USE_AVX512BW
84static SIMDPP_INL
85int16<32> i_add_sat(const int16<32>& a, const int16<32>& b)
86{
87 return _mm512_adds_epi16(a.native(), b.native());
88}
89#endif
90
91// -----------------------------------------------------------------------------
92
93static SIMDPP_INL
94uint8<16> i_add_sat(const uint8<16>& a, const uint8<16>& b)
95{
96#if SIMDPP_USE_NULL
97 return detail::null::add_sat(a, b);
98#elif SIMDPP_USE_SSE2
99 return _mm_adds_epu8(a.native(), b.native());
100#elif SIMDPP_USE_NEON
101 return vqaddq_u8(a.native(), b.native());
102#elif SIMDPP_USE_ALTIVEC
103 return vec_adds(a.native(), b.native());
104#elif SIMDPP_USE_MSA
105 return __msa_adds_u_b(a.native(), b.native());
106#endif
107}
108
109#if SIMDPP_USE_AVX2
110static SIMDPP_INL
111uint8<32> i_add_sat(const uint8<32>& a, const uint8<32>& b)
112{
113 return _mm256_adds_epu8(a.native(), b.native());
114}
115#endif
116
117#if SIMDPP_USE_AVX512BW
118static SIMDPP_INL
119uint8<64> i_add_sat(const uint8<64>& a, const uint8<64>& b)
120{
121 return _mm512_adds_epu8(a.native(), b.native());
122}
123#endif
124
125// -----------------------------------------------------------------------------
126
127static SIMDPP_INL
128uint16<8> i_add_sat(const uint16<8>& a, const uint16<8>& b)
129{
130#if SIMDPP_USE_NULL
131 return detail::null::add_sat(a, b);
132#elif SIMDPP_USE_SSE2
133 return _mm_adds_epu16(a.native(), b.native());
134#elif SIMDPP_USE_NEON
135 return vqaddq_u16(a.native(), b.native());
136#elif SIMDPP_USE_ALTIVEC
137 return vec_adds(a.native(), b.native());
138#elif SIMDPP_USE_MSA
139 return __msa_adds_u_h(a.native(), b.native());
140#endif
141}
142
143#if SIMDPP_USE_AVX2
144static SIMDPP_INL
145uint16<16> i_add_sat(const uint16<16>& a, const uint16<16>& b)
146{
147 return _mm256_adds_epu16(a.native(), b.native());
148}
149#endif
150
151#if SIMDPP_USE_AVX512BW
152static SIMDPP_INL
153uint16<32> i_add_sat(const uint16<32>& a, const uint16<32>& b)
154{
155 return _mm512_adds_epu16(a.native(), b.native());
156}
157#endif
158
159template<class R, unsigned N, class E1, class E2> SIMDPP_INL
160uint16<N> i_add_sat(const uint16<N>& a, const uint16<N>& b)
161{
162 SIMDPP_VEC_ARRAY_IMPL2(uint16<N>, add_sat, a, b);
163}
164
165// -----------------------------------------------------------------------------
166
167template<class V> SIMDPP_INL
168V i_add_sat(const V& a, const V& b)
169{
170 SIMDPP_VEC_ARRAY_IMPL2(V, i_add_sat, a, b)
171}
172
173} // namespace insn
174} // namespace detail
175} // namespace SIMDPP_ARCH_NAMESPACE
176} // namespace simdpp
177
178#endif
179
180