1 | // This file is part of Eigen, a lightweight C++ template library |
2 | // for linear algebra. |
3 | // |
4 | // Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com> |
5 | // Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr> |
6 | // |
7 | // This Source Code Form is subject to the terms of the Mozilla |
8 | // Public License v. 2.0. If a copy of the MPL was not distributed |
9 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. |
10 | |
11 | #ifndef EIGEN_GEOMETRY_SSE_H |
12 | #define EIGEN_GEOMETRY_SSE_H |
13 | |
14 | namespace Eigen { |
15 | |
16 | namespace internal { |
17 | |
18 | template<class Derived, class OtherDerived> |
19 | struct quat_product<Architecture::SSE, Derived, OtherDerived, float> |
20 | { |
21 | enum { |
22 | AAlignment = traits<Derived>::Alignment, |
23 | BAlignment = traits<OtherDerived>::Alignment, |
24 | ResAlignment = traits<Quaternion<float> >::Alignment |
25 | }; |
26 | static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b) |
27 | { |
28 | Quaternion<float> res; |
29 | const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f); |
30 | __m128 a = _a.coeffs().template packet<AAlignment>(0); |
31 | __m128 b = _b.coeffs().template packet<BAlignment>(0); |
32 | __m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2)); |
33 | __m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1)); |
34 | pstoret<float,Packet4f,ResAlignment>( |
35 | &res.x(), |
36 | _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)), |
37 | _mm_mul_ps(vec4f_swizzle1(a,2,0,1,0), |
38 | vec4f_swizzle1(b,1,2,0,0))), |
39 | _mm_xor_ps(mask,_mm_add_ps(s1,s2)))); |
40 | |
41 | return res; |
42 | } |
43 | }; |
44 | |
45 | template<class Derived> |
46 | struct quat_conj<Architecture::SSE, Derived, float> |
47 | { |
48 | enum { |
49 | ResAlignment = traits<Quaternion<float> >::Alignment |
50 | }; |
51 | static inline Quaternion<float> run(const QuaternionBase<Derived>& q) |
52 | { |
53 | Quaternion<float> res; |
54 | const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f); |
55 | pstoret<float,Packet4f,ResAlignment>(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<traits<Derived>::Alignment>(0))); |
56 | return res; |
57 | } |
58 | }; |
59 | |
60 | |
61 | template<typename VectorLhs,typename VectorRhs> |
62 | struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true> |
63 | { |
64 | enum { |
65 | ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment |
66 | }; |
67 | static inline typename plain_matrix_type<VectorLhs>::type |
68 | run(const VectorLhs& lhs, const VectorRhs& rhs) |
69 | { |
70 | __m128 a = lhs.template packet<traits<VectorLhs>::Alignment>(0); |
71 | __m128 b = rhs.template packet<traits<VectorRhs>::Alignment>(0); |
72 | __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3)); |
73 | __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3)); |
74 | typename plain_matrix_type<VectorLhs>::type res; |
75 | pstoret<float,Packet4f,ResAlignment>(&res.x(),_mm_sub_ps(mul1,mul2)); |
76 | return res; |
77 | } |
78 | }; |
79 | |
80 | |
81 | |
82 | |
83 | template<class Derived, class OtherDerived> |
84 | struct quat_product<Architecture::SSE, Derived, OtherDerived, double> |
85 | { |
86 | enum { |
87 | BAlignment = traits<OtherDerived>::Alignment, |
88 | ResAlignment = traits<Quaternion<double> >::Alignment |
89 | }; |
90 | |
91 | static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b) |
92 | { |
93 | const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); |
94 | |
95 | Quaternion<double> res; |
96 | |
97 | const double* a = _a.coeffs().data(); |
98 | Packet2d b_xy = _b.coeffs().template packet<BAlignment>(0); |
99 | Packet2d b_zw = _b.coeffs().template packet<BAlignment>(2); |
100 | Packet2d a_xx = pset1<Packet2d>(a[0]); |
101 | Packet2d a_yy = pset1<Packet2d>(a[1]); |
102 | Packet2d a_zz = pset1<Packet2d>(a[2]); |
103 | Packet2d a_ww = pset1<Packet2d>(a[3]); |
104 | |
105 | // two temporaries: |
106 | Packet2d t1, t2; |
107 | |
108 | /* |
109 | * t1 = ww*xy + yy*zw |
110 | * t2 = zz*xy - xx*zw |
111 | * res.xy = t1 +/- swap(t2) |
112 | */ |
113 | t1 = padd(pmul(a_ww, b_xy), pmul(a_yy, b_zw)); |
114 | t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw)); |
115 | #ifdef EIGEN_VECTORIZE_SSE3 |
116 | EIGEN_UNUSED_VARIABLE(mask) |
117 | pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_addsub_pd(t1, preverse(t2))); |
118 | #else |
119 | pstoret<double,Packet2d,ResAlignment>(&res.x(), padd(t1, pxor(mask,preverse(t2)))); |
120 | #endif |
121 | |
122 | /* |
123 | * t1 = ww*zw - yy*xy |
124 | * t2 = zz*zw + xx*xy |
125 | * res.zw = t1 -/+ swap(t2) = swap( swap(t1) +/- t2) |
126 | */ |
127 | t1 = psub(pmul(a_ww, b_zw), pmul(a_yy, b_xy)); |
128 | t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy)); |
129 | #ifdef EIGEN_VECTORIZE_SSE3 |
130 | EIGEN_UNUSED_VARIABLE(mask) |
131 | pstoret<double,Packet2d,ResAlignment>(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2))); |
132 | #else |
133 | pstoret<double,Packet2d,ResAlignment>(&res.z(), psub(t1, pxor(mask,preverse(t2)))); |
134 | #endif |
135 | |
136 | return res; |
137 | } |
138 | }; |
139 | |
140 | template<class Derived> |
141 | struct quat_conj<Architecture::SSE, Derived, double> |
142 | { |
143 | enum { |
144 | ResAlignment = traits<Quaternion<double> >::Alignment |
145 | }; |
146 | static inline Quaternion<double> run(const QuaternionBase<Derived>& q) |
147 | { |
148 | Quaternion<double> res; |
149 | const __m128d mask0 = _mm_setr_pd(-0.,-0.); |
150 | const __m128d mask2 = _mm_setr_pd(-0.,0.); |
151 | pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<traits<Derived>::Alignment>(0))); |
152 | pstoret<double,Packet2d,ResAlignment>(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<traits<Derived>::Alignment>(2))); |
153 | return res; |
154 | } |
155 | }; |
156 | |
157 | } // end namespace internal |
158 | |
159 | } // end namespace Eigen |
160 | |
161 | #endif // EIGEN_GEOMETRY_SSE_H |
162 | |