1 | /* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_CORE_COMBINE_H |
9 | #define LIBSIMDPP_SIMDPP_CORE_COMBINE_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/detail/insn/combine.h> |
17 | |
18 | namespace simdpp { |
19 | namespace SIMDPP_ARCH_NAMESPACE { |
20 | |
21 | /** Combines two vectors into one twice as large. This function is useful when |
22 | the ISA supports multiple vector sizes and the user does some operations |
23 | with vectors that are narrower than the widest native vector. |
24 | |
25 | For example, on AVX, two __m128 vectors can be combined into a __m256 |
26 | vector. |
27 | |
28 | @todo icost |
29 | */ |
30 | template<unsigned N, class E1, class E2> SIMDPP_INL |
31 | uint8<N*2> combine(const uint8<N,E1>& a1, const uint8<N,E2>& a2) |
32 | { |
33 | return detail::insn::i_combine<uint8<N*2>>(a1.eval(), a2.eval()); |
34 | } |
35 | |
36 | template<unsigned N, class E1, class E2> SIMDPP_INL |
37 | uint16<N*2> combine(const uint16<N,E1>& a1, const uint16<N,E2>& a2) |
38 | { |
39 | return detail::insn::i_combine<uint16<N*2>>(a1.eval(), a2.eval()); |
40 | } |
41 | |
42 | template<unsigned N, class E1, class E2> SIMDPP_INL |
43 | uint32<N*2> combine(const uint32<N,E1>& a1, const uint32<N,E2>& a2) |
44 | { |
45 | return detail::insn::i_combine<uint32<N*2>>(a1.eval(), a2.eval()); |
46 | } |
47 | |
48 | template<unsigned N, class E1, class E2> SIMDPP_INL |
49 | uint64<N*2> combine(const uint64<N,E1>& a1, const uint64<N,E2>& a2) |
50 | { |
51 | return detail::insn::i_combine<uint64<N*2>>(a1.eval(), a2.eval()); |
52 | } |
53 | |
54 | template<unsigned N, class E1, class E2> SIMDPP_INL |
55 | int8<N*2> combine(const int8<N,E1>& a1, const int8<N,E2>& a2) |
56 | { |
57 | return detail::insn::i_combine<uint8<N*2>>(uint8<N>(a1.eval()), |
58 | uint8<N>(a2.eval())); |
59 | } |
60 | |
61 | template<unsigned N, class E1, class E2> SIMDPP_INL |
62 | int16<N*2> combine(const int16<N,E1>& a1, const int16<N,E2>& a2) |
63 | { |
64 | return detail::insn::i_combine<uint16<N*2>>(uint16<N>(a1.eval()), |
65 | uint16<N>(a2.eval())); |
66 | } |
67 | |
68 | template<unsigned N, class E1, class E2> SIMDPP_INL |
69 | int32<N*2> combine(const int32<N,E1>& a1, const int32<N,E2>& a2) |
70 | { |
71 | return detail::insn::i_combine<uint32<N*2>>(uint32<N>(a1.eval()), |
72 | uint32<N>(a2.eval())); |
73 | } |
74 | |
75 | template<unsigned N, class E1, class E2> SIMDPP_INL |
76 | int64<N*2> combine(const int64<N,E1>& a1, const int64<N,E2>& a2) |
77 | { |
78 | return detail::insn::i_combine<uint64<N*2>>(uint64<N>(a1.eval()), |
79 | uint64<N>(a2.eval())); |
80 | } |
81 | |
82 | template<unsigned N, class E1, class E2> SIMDPP_INL |
83 | float32<N*2> combine(const float32<N,E1>& a1, const float32<N,E2>& a2) |
84 | { |
85 | return detail::insn::i_combine<float32<N*2>>(a1.eval(), a2.eval()); |
86 | } |
87 | |
88 | template<unsigned N, class E1, class E2> SIMDPP_INL |
89 | float64<N*2> combine(const float64<N,E1>& a1, const float64<N,E2>& a2) |
90 | { |
91 | return detail::insn::i_combine<float64<N*2>>(a1.eval(), a2.eval()); |
92 | } |
93 | |
94 | } // namespace SIMDPP_ARCH_NAMESPACE |
95 | } // namespace simdpp |
96 | |
97 | #endif |
98 | |