1 | /* Copyright (C) 2012 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_DETAIL_NEON_MATH_INT_H |
9 | #define LIBSIMDPP_DETAIL_NEON_MATH_INT_H |
10 | #if SIMDPP_USE_NEON |
11 | |
12 | #ifndef LIBSIMDPP_SIMD_H |
13 | #error "This file must be included through simd.h" |
14 | #endif |
15 | |
16 | #include <simdpp/types.h> |
17 | |
18 | namespace simdpp { |
19 | namespace SIMDPP_ARCH_NAMESPACE { |
20 | namespace detail { |
21 | namespace neon { |
22 | |
23 | /** Multiplies the values of two int8x16 vectors and returns the low |
24 | part of the multiplication |
25 | |
26 | @code |
27 | r0 = low(a0 * b0) |
28 | ... |
29 | r15 = low(a15 * b15) |
30 | @endcode |
31 | */ |
32 | static SIMDPP_INL |
33 | uint8x16 mul_lo(const uint8x16& a, const uint8x16& b) |
34 | { |
35 | return vmulq_u8(a.native(), b.native()); |
36 | } |
37 | |
38 | /** Multiplies the first 8 values of two signed int8x16 vectors and expands the |
39 | results to 16 bits. |
40 | |
41 | @code |
42 | r0 = a0 * b0 |
43 | ... |
44 | r7 = a7 * b7 |
45 | @endcode |
46 | */ |
47 | static SIMDPP_INL |
48 | int16x8 mull_lo(const int8x16& a, const int8x16& b) |
49 | { |
50 | return vmull_s8(vget_low_s8(a.native()), vget_low_s8(b.native())); |
51 | } |
52 | |
53 | /** Multiplies the first 8 values of two unsigned int8x16 vectors and expands the |
54 | results to 16 bits. |
55 | |
56 | @code |
57 | r0 = a0 * b0 |
58 | ... |
59 | r7 = a7 * b7 |
60 | @endcode |
61 | */ |
62 | static SIMDPP_INL |
63 | uint16x8 mull_lo(const uint8x16& a, const uint8x16& b) |
64 | { |
65 | return vmull_u8(vget_low_u8(a.native()), vget_low_u8(b.native())); |
66 | } |
67 | |
68 | /** Multiplies the last 8 values of two signed int8x16 vectors and expands the |
69 | results to 16 bits. |
70 | |
71 | @code |
72 | r0 = a8 * b8 |
73 | ... |
74 | r7 = a15 * b15 |
75 | @endcode |
76 | */ |
77 | static SIMDPP_INL |
78 | int16x8 mull_hi(const int8x16& a, const int8x16& b) |
79 | { |
80 | return vmull_s8(vget_high_s8(a.native()), vget_high_s8(b.native())); |
81 | } |
82 | |
83 | |
84 | /** Multiplies the last 8 values of two unsigned int8x16 vectors and expands |
85 | the results to 16 bits. |
86 | |
87 | @code |
88 | r0 = a8 * b8 |
89 | ... |
90 | r7 = a15 * b15 |
91 | @endcode |
92 | */ |
93 | static SIMDPP_INL |
94 | uint16x8 mull_hi(const uint8x16& a, const uint8x16& b) |
95 | { |
96 | return vmull_u8(vget_high_u8(a.native()), vget_high_u8(b.native())); |
97 | } |
98 | |
99 | } // namespace neon |
100 | } // namespace detail |
101 | } // namespace SIMDPP_ARCH_NAMESPACE |
102 | } // namespace simdpp |
103 | |
104 | #endif |
105 | #endif |
106 | |