1/* Copyright (C) 2012 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_NEON_MEMORY_STORE_H
9#define LIBSIMDPP_NEON_MEMORY_STORE_H
10#if SIMDPP_USE_NEON
11
12#ifndef LIBSIMDPP_SIMD_H
13 #error "This file must be included through simd.h"
14#endif
15
16#include <simdpp/types.h>
17
18namespace simdpp {
19namespace SIMDPP_ARCH_NAMESPACE {
20namespace detail {
21namespace neon {
22
23/** Stores @a n elements of a 128-bit vector starting from the @a pos-th one.
24 @a p must be aligned to 16 bytes.
25
26 @a n must be a power of 2. @a pos must be a multiple of @a n. The behavior is
27 undefined if @a pos+n exceeds the number of elements in the specified vector.
28*/
29template<unsigned pos, unsigned n> SIMDPP_INL
30void store_lane(char* p, const uint8x16& a)
31{
32 static_assert(n == 1 || n == 2 || n == 4 || n == 8, "Size not supported");
33 static_assert(pos%n == 0, "pos must be a multiple of n");
34 static_assert(pos+n <= 16, "Index out of bounds");
35 switch (n) {
36 case 1:
37 vst1q_lane_u8(reinterpret_cast<uint8_t*>(p), a.native(), pos);
38 return;
39 case 2:
40 vst1q_lane_u16(reinterpret_cast<uint16_t*>(p), vreinterpretq_u16_u8(a.native()), pos/2);
41 return;
42 case 4:
43 vst1q_lane_u32(reinterpret_cast<uint32_t*>(p), vreinterpretq_u32_u8(a.native()), pos/4);
44 return;
45 case 8:
46 vst1q_lane_u64(reinterpret_cast<uint64_t*>(p), vreinterpretq_u64_u8(a.native()), pos/8);
47 return;
48 }
49}
50
51template<unsigned pos, unsigned n> SIMDPP_INL
52void store_lane(char* p, const uint16x8& a)
53{
54 static_assert(n == 1 || n == 2 || n == 4, "Size not supported");
55 static_assert(pos%n == 0, "pos must be a multiple of n");
56 static_assert(pos+n <= 8, "Index out of bounds");
57 switch (n) {
58 case 1:
59 vst1q_lane_u16(reinterpret_cast<uint16_t*>(p), a.native(), pos);
60 return;
61 case 2:
62 vst1q_lane_u32(reinterpret_cast<uint32_t*>(p),
63 vreinterpretq_u32_u16(a.native()), pos/2);
64 return;
65 case 4:
66 vst1q_lane_u64(reinterpret_cast<uint64_t*>(p),
67 vreinterpretq_u64_u16(a.native()), pos/4);
68 return;
69 }
70}
71
72template<unsigned pos, unsigned n> SIMDPP_INL
73void store_lane(char* p, const uint32x4& a)
74{
75 static_assert(n == 1 || n == 2, "Size not supported");
76 static_assert(pos%n == 0, "pos must be a multiple of n");
77 static_assert(pos+n <= 4, "Index out of bounds");
78 switch (n) {
79 case 1:
80 vst1q_lane_u32(reinterpret_cast<uint32_t*>(p), a.native(), pos);
81 return;
82 case 2:
83 vst1q_lane_u64(reinterpret_cast<uint64_t*>(p),
84 vreinterpretq_u64_u32(a.native()), pos/2);
85 return;
86 }
87}
88
89template<unsigned pos, unsigned n> SIMDPP_INL
90void store_lane(char* p, const uint64x2& a)
91{
92 static_assert(n == 1, "Size not supported");
93 static_assert(pos%n == 0, "pos must be a multiple of n");
94 static_assert(pos+n <= 2, "Index out of bounds");
95 vst1q_lane_u64(reinterpret_cast<uint64_t*>(p), a.native(), pos);
96}
97
98template<unsigned pos, unsigned n> SIMDPP_INL
99void store_lane(char* p, const float32x4& a)
100{
101 store_lane<pos,n>(p, uint32x4(a));
102}
103
104} // namespace neon
105} // namespace detail
106} // namespace SIMDPP_ARCH_NAMESPACE
107} // namespace simdpp
108
109#endif
110#endif
111