1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5// From llvm-3.9/clang-3.9.1 xmmintrin.h:
6
7/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
8*
9* Permission is hereby granted, free of charge, to any person obtaining a copy
10* of this software and associated documentation files (the "Software"), to deal
11* in the Software without restriction, including without limitation the rights
12* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13* copies of the Software, and to permit persons to whom the Software is
14* furnished to do so, subject to the following conditions:
15*
16* The above copyright notice and this permission notice shall be included in
17* all copies or substantial portions of the Software.
18*
19* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25* THE SOFTWARE.
26*
27*===-----------------------------------------------------------------------===
28*/
29
30#ifdef __clang__
31
32typedef float __m128 __attribute__((__vector_size__(16)));
33
34/* Define the default attributes for the functions in this file. */
35#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
36
37/// \brief Loads a 128-bit floating-point vector of [4 x float] from an aligned
38/// memory location.
39///
40/// \headerfile <x86intrin.h>
41///
42/// This intrinsic corresponds to the \c VMOVAPS / MOVAPS instruction.
43///
44/// \param __p
45/// A pointer to a 128-bit memory location. The address of the memory
46/// location has to be 128-bit aligned.
47/// \returns A 128-bit vector of [4 x float] containing the loaded valus.
48static __inline__ __m128 __DEFAULT_FN_ATTRS
49_mm_load_ps(const float *__p)
50{
51 return *(__m128*)__p;
52}
53
54/// \brief Loads a 128-bit floating-point vector of [4 x float] from an
55/// unaligned memory location.
56///
57/// \headerfile <x86intrin.h>
58///
59/// This intrinsic corresponds to the \c VMOVUPS / MOVUPS instruction.
60///
61/// \param __p
62/// A pointer to a 128-bit memory location. The address of the memory
63/// location does not have to be aligned.
64/// \returns A 128-bit vector of [4 x float] containing the loaded values.
65static __inline__ __m128 __DEFAULT_FN_ATTRS
66_mm_loadu_ps(const float *__p)
67{
68 struct __loadu_ps
69 {
70 __m128 __v;
71 } __attribute__((__packed__, __may_alias__));
72 return ((struct __loadu_ps*)__p)->__v;
73}
74
75/// \brief Stores float values from a 128-bit vector of [4 x float] to an
76/// unaligned memory location.
77///
78/// \headerfile <x86intrin.h>
79///
80/// This intrinsic corresponds to the \c VMOVUPS / MOVUPS instruction.
81///
82/// \param __p
83/// A pointer to a 128-bit memory location. The address of the memory
84/// location does not have to be aligned.
85/// \param __a
86/// A 128-bit vector of [4 x float] containing the values to be stored.
87static __inline__ void __DEFAULT_FN_ATTRS
88_mm_storeu_ps(float *__p, __m128 __a)
89{
90 struct __storeu_ps
91 {
92 __m128 __v;
93 } __attribute__((__packed__, __may_alias__));
94 ((struct __storeu_ps*)__p)->__v = __a;
95}
96
97/// \brief Stores the lower 32 bits of a 128-bit vector of [4 x float] into
98/// four contiguous elements in an aligned memory location.
99///
100/// \headerfile <x86intrin.h>
101///
102/// This intrinsic corresponds to \c VMOVAPS / MOVAPS + \c shuffling
103/// instruction.
104///
105/// \param __p
106/// A pointer to a 128-bit memory location.
107/// \param __a
108/// A 128-bit vector of [4 x float] whose lower 32 bits are stored to each
109/// of the four contiguous elements pointed by __p.
110static __inline__ void __DEFAULT_FN_ATTRS
111_mm_store_ps(float *__p, __m128 __a)
112{
113 *(__m128*)__p = __a;
114}
115
116#endif // __clang__
117