1 | // This file is part of Eigen, a lightweight C++ template library |
2 | // for linear algebra. |
3 | // |
4 | // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> |
5 | // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> |
6 | // Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk> |
7 | // |
8 | // This Source Code Form is subject to the terms of the Mozilla |
9 | // Public License v. 2.0. If a copy of the MPL was not distributed |
10 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. |
11 | |
12 | |
13 | #ifndef EIGEN_PRODUCTEVALUATORS_H |
14 | #define EIGEN_PRODUCTEVALUATORS_H |
15 | |
16 | namespace Eigen { |
17 | |
18 | namespace internal { |
19 | |
20 | /** \internal |
21 | * Evaluator of a product expression. |
22 | * Since products require special treatments to handle all possible cases, |
23 | * we simply deffer the evaluation logic to a product_evaluator class |
24 | * which offers more partial specialization possibilities. |
25 | * |
26 | * \sa class product_evaluator |
27 | */ |
28 | template<typename Lhs, typename Rhs, int Options> |
29 | struct evaluator<Product<Lhs, Rhs, Options> > |
30 | : public product_evaluator<Product<Lhs, Rhs, Options> > |
31 | { |
32 | typedef Product<Lhs, Rhs, Options> XprType; |
33 | typedef product_evaluator<XprType> Base; |
34 | |
35 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {} |
36 | }; |
37 | |
38 | // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" |
39 | // TODO we should apply that rule only if that's really helpful |
40 | template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1> |
41 | struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>, |
42 | const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>, |
43 | const Product<Lhs, Rhs, DefaultProduct> > > |
44 | { |
45 | static const bool value = true; |
46 | }; |
47 | template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1> |
48 | struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>, |
49 | const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>, |
50 | const Product<Lhs, Rhs, DefaultProduct> > > |
51 | : public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > |
52 | { |
53 | typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>, |
54 | const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>, |
55 | const Product<Lhs, Rhs, DefaultProduct> > XprType; |
56 | typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base; |
57 | |
58 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) |
59 | : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) |
60 | {} |
61 | }; |
62 | |
63 | |
64 | template<typename Lhs, typename Rhs, int DiagIndex> |
65 | struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> > |
66 | : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > |
67 | { |
68 | typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType; |
69 | typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base; |
70 | |
71 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) |
72 | : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>( |
73 | Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), |
74 | xpr.index() )) |
75 | {} |
76 | }; |
77 | |
78 | |
79 | // Helper class to perform a matrix product with the destination at hand. |
80 | // Depending on the sizes of the factors, there are different evaluation strategies |
81 | // as controlled by internal::product_type. |
82 | template< typename Lhs, typename Rhs, |
83 | typename LhsShape = typename evaluator_traits<Lhs>::Shape, |
84 | typename RhsShape = typename evaluator_traits<Rhs>::Shape, |
85 | int ProductType = internal::product_type<Lhs,Rhs>::value> |
86 | struct generic_product_impl; |
87 | |
88 | template<typename Lhs, typename Rhs> |
89 | struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct> > { |
90 | static const bool value = true; |
91 | }; |
92 | |
93 | // This is the default evaluator implementation for products: |
94 | // It creates a temporary and call generic_product_impl |
95 | template<typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape> |
96 | struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape> |
97 | : public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject> |
98 | { |
99 | typedef Product<Lhs, Rhs, Options> XprType; |
100 | typedef typename XprType::PlainObject PlainObject; |
101 | typedef evaluator<PlainObject> Base; |
102 | enum { |
103 | Flags = Base::Flags | EvalBeforeNestingBit |
104 | }; |
105 | |
106 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
107 | explicit product_evaluator(const XprType& xpr) |
108 | : m_result(xpr.rows(), xpr.cols()) |
109 | { |
110 | ::new (static_cast<Base*>(this)) Base(m_result); |
111 | |
112 | // FIXME shall we handle nested_eval here?, |
113 | // if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.) |
114 | // typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; |
115 | // typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; |
116 | // typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned; |
117 | // typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned; |
118 | // |
119 | // const LhsNested lhs(xpr.lhs()); |
120 | // const RhsNested rhs(xpr.rhs()); |
121 | // |
122 | // generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs); |
123 | |
124 | generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs()); |
125 | } |
126 | |
127 | protected: |
128 | PlainObject m_result; |
129 | }; |
130 | |
131 | // The following three shortcuts are enabled only if the scalar types match excatly. |
132 | // TODO: we could enable them for different scalar types when the product is not vectorized. |
133 | |
134 | // Dense = Product |
135 | template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> |
136 | struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense, |
137 | typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> |
138 | { |
139 | typedef Product<Lhs,Rhs,Options> SrcXprType; |
140 | static EIGEN_STRONG_INLINE |
141 | void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &) |
142 | { |
143 | Index dstRows = src.rows(); |
144 | Index dstCols = src.cols(); |
145 | if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) |
146 | dst.resize(dstRows, dstCols); |
147 | // FIXME shall we handle nested_eval here? |
148 | generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs()); |
149 | } |
150 | }; |
151 | |
152 | // Dense += Product |
153 | template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> |
154 | struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense, |
155 | typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> |
156 | { |
157 | typedef Product<Lhs,Rhs,Options> SrcXprType; |
158 | static EIGEN_STRONG_INLINE |
159 | void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &) |
160 | { |
161 | eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); |
162 | // FIXME shall we handle nested_eval here? |
163 | generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs()); |
164 | } |
165 | }; |
166 | |
167 | // Dense -= Product |
168 | template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> |
169 | struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense, |
170 | typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> |
171 | { |
172 | typedef Product<Lhs,Rhs,Options> SrcXprType; |
173 | static EIGEN_STRONG_INLINE |
174 | void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &) |
175 | { |
176 | eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); |
177 | // FIXME shall we handle nested_eval here? |
178 | generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs()); |
179 | } |
180 | }; |
181 | |
182 | |
183 | // Dense ?= scalar * Product |
184 | // TODO we should apply that rule if that's really helpful |
185 | // for instance, this is not good for inner products |
186 | template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain> |
187 | struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>, |
188 | const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense> |
189 | { |
190 | typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, |
191 | const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>, |
192 | const Product<Lhs,Rhs,DefaultProduct> > SrcXprType; |
193 | static EIGEN_STRONG_INLINE |
194 | void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) |
195 | { |
196 | call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func); |
197 | } |
198 | }; |
199 | |
200 | //---------------------------------------- |
201 | // Catch "Dense ?= xpr + Product<>" expression to save one temporary |
202 | // FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct |
203 | |
204 | template<typename OtherXpr, typename Lhs, typename Rhs> |
205 | struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr, |
206 | const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > { |
207 | static const bool value = true; |
208 | }; |
209 | |
210 | template<typename OtherXpr, typename Lhs, typename Rhs> |
211 | struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr, |
212 | const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > { |
213 | static const bool value = true; |
214 | }; |
215 | |
216 | template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2> |
217 | struct assignment_from_xpr_op_product |
218 | { |
219 | template<typename SrcXprType, typename InitialFunc> |
220 | static EIGEN_STRONG_INLINE |
221 | void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/) |
222 | { |
223 | call_assignment_no_alias(dst, src.lhs(), Func1()); |
224 | call_assignment_no_alias(dst, src.rhs(), Func2()); |
225 | } |
226 | }; |
227 | |
228 | #define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2) \ |
229 | template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> \ |
230 | struct Assignment<DstXprType, CwiseBinaryOp<internal::BINOP<OtherScalar,ProdScalar>, const OtherXpr, \ |
231 | const Product<Lhs,Rhs,DefaultProduct> >, internal::ASSIGN_OP<DstScalar,SrcScalar>, Dense2Dense> \ |
232 | : assignment_from_xpr_op_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::ASSIGN_OP<DstScalar,OtherScalar>, internal::ASSIGN_OP2<DstScalar,ProdScalar> > \ |
233 | {} |
234 | |
235 | EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op,add_assign_op); |
236 | EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op); |
237 | EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op); |
238 | |
239 | EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op,sub_assign_op); |
240 | EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_difference_op,sub_assign_op); |
241 | EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_difference_op,add_assign_op); |
242 | |
243 | //---------------------------------------- |
244 | |
245 | template<typename Lhs, typename Rhs> |
246 | struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct> |
247 | { |
248 | template<typename Dst> |
249 | static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) |
250 | { |
251 | dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); |
252 | } |
253 | |
254 | template<typename Dst> |
255 | static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) |
256 | { |
257 | dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); |
258 | } |
259 | |
260 | template<typename Dst> |
261 | static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) |
262 | { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } |
263 | }; |
264 | |
265 | |
266 | /*********************************************************************** |
267 | * Implementation of outer dense * dense vector product |
268 | ***********************************************************************/ |
269 | |
270 | // Column major result |
271 | template<typename Dst, typename Lhs, typename Rhs, typename Func> |
272 | void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) |
273 | { |
274 | evaluator<Rhs> rhsEval(rhs); |
275 | typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs); |
276 | // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored |
277 | // FIXME not very good if rhs is real and lhs complex while alpha is real too |
278 | const Index cols = dst.cols(); |
279 | for (Index j=0; j<cols; ++j) |
280 | func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs); |
281 | } |
282 | |
283 | // Row major result |
284 | template<typename Dst, typename Lhs, typename Rhs, typename Func> |
285 | void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) |
286 | { |
287 | evaluator<Lhs> lhsEval(lhs); |
288 | typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs); |
289 | // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored |
290 | // FIXME not very good if lhs is real and rhs complex while alpha is real too |
291 | const Index rows = dst.rows(); |
292 | for (Index i=0; i<rows; ++i) |
293 | func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs); |
294 | } |
295 | |
296 | template<typename Lhs, typename Rhs> |
297 | struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct> |
298 | { |
299 | template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; |
300 | typedef typename Product<Lhs,Rhs>::Scalar Scalar; |
301 | |
302 | // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose |
303 | struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; |
304 | struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; |
305 | struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; |
306 | struct adds { |
307 | Scalar m_scale; |
308 | explicit adds(const Scalar& s) : m_scale(s) {} |
309 | template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { |
310 | dst.const_cast_derived() += m_scale * src; |
311 | } |
312 | }; |
313 | |
314 | template<typename Dst> |
315 | static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) |
316 | { |
317 | internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>()); |
318 | } |
319 | |
320 | template<typename Dst> |
321 | static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) |
322 | { |
323 | internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>()); |
324 | } |
325 | |
326 | template<typename Dst> |
327 | static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) |
328 | { |
329 | internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>()); |
330 | } |
331 | |
332 | template<typename Dst> |
333 | static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) |
334 | { |
335 | internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>()); |
336 | } |
337 | |
338 | }; |
339 | |
340 | |
341 | // This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo |
342 | template<typename Lhs, typename Rhs, typename Derived> |
343 | struct generic_product_impl_base |
344 | { |
345 | typedef typename Product<Lhs,Rhs>::Scalar Scalar; |
346 | |
347 | template<typename Dst> |
348 | static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) |
349 | { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } |
350 | |
351 | template<typename Dst> |
352 | static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) |
353 | { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); } |
354 | |
355 | template<typename Dst> |
356 | static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) |
357 | { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } |
358 | |
359 | template<typename Dst> |
360 | static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) |
361 | { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } |
362 | |
363 | }; |
364 | |
365 | template<typename Lhs, typename Rhs> |
366 | struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> |
367 | : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> > |
368 | { |
369 | typedef typename nested_eval<Lhs,1>::type LhsNested; |
370 | typedef typename nested_eval<Rhs,1>::type RhsNested; |
371 | typedef typename Product<Lhs,Rhs>::Scalar Scalar; |
372 | enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; |
373 | typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType; |
374 | |
375 | template<typename Dest> |
376 | static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) |
377 | { |
378 | LhsNested actual_lhs(lhs); |
379 | RhsNested actual_rhs(rhs); |
380 | internal::gemv_dense_selector<Side, |
381 | (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor, |
382 | bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess) |
383 | >::run(actual_lhs, actual_rhs, dst, alpha); |
384 | } |
385 | }; |
386 | |
387 | template<typename Lhs, typename Rhs> |
388 | struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> |
389 | { |
390 | typedef typename Product<Lhs,Rhs>::Scalar Scalar; |
391 | |
392 | template<typename Dst> |
393 | static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) |
394 | { |
395 | // Same as: dst.noalias() = lhs.lazyProduct(rhs); |
396 | // but easier on the compiler side |
397 | call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>()); |
398 | } |
399 | |
400 | template<typename Dst> |
401 | static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) |
402 | { |
403 | // dst.noalias() += lhs.lazyProduct(rhs); |
404 | call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>()); |
405 | } |
406 | |
407 | template<typename Dst> |
408 | static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) |
409 | { |
410 | // dst.noalias() -= lhs.lazyProduct(rhs); |
411 | call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>()); |
412 | } |
413 | |
414 | // template<typename Dst> |
415 | // static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) |
416 | // { dst.noalias() += alpha * lhs.lazyProduct(rhs); } |
417 | }; |
418 | |
419 | // This specialization enforces the use of a coefficient-based evaluation strategy |
420 | template<typename Lhs, typename Rhs> |
421 | struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,LazyCoeffBasedProductMode> |
422 | : generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> {}; |
423 | |
424 | // Case 2: Evaluate coeff by coeff |
425 | // |
426 | // This is mostly taken from CoeffBasedProduct.h |
427 | // The main difference is that we add an extra argument to the etor_product_*_impl::run() function |
428 | // for the inner dimension of the product, because evaluator object do not know their size. |
429 | |
430 | template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> |
431 | struct etor_product_coeff_impl; |
432 | |
433 | template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> |
434 | struct etor_product_packet_impl; |
435 | |
436 | template<typename Lhs, typename Rhs, int ProductTag> |
437 | struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape> |
438 | : evaluator_base<Product<Lhs, Rhs, LazyProduct> > |
439 | { |
440 | typedef Product<Lhs, Rhs, LazyProduct> XprType; |
441 | typedef typename XprType::Scalar Scalar; |
442 | typedef typename XprType::CoeffReturnType CoeffReturnType; |
443 | |
444 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
445 | explicit product_evaluator(const XprType& xpr) |
446 | : m_lhs(xpr.lhs()), |
447 | m_rhs(xpr.rhs()), |
448 | m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that! |
449 | m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed, |
450 | // or perhaps declare them on the fly on the packet method... We have experiment to check what's best. |
451 | m_innerDim(xpr.lhs().cols()) |
452 | { |
453 | EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost); |
454 | EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost); |
455 | EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); |
456 | #if 0 |
457 | std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n" ; |
458 | std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n" ; |
459 | std::cerr << "LhsAlignment= " << LhsAlignment << "\n" ; |
460 | std::cerr << "RhsAlignment= " << RhsAlignment << "\n" ; |
461 | std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n" ; |
462 | std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n" ; |
463 | std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n" ; |
464 | std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n" ; |
465 | std::cerr << "Alignment= " << Alignment << "\n" ; |
466 | std::cerr << "Flags= " << Flags << "\n" ; |
467 | #endif |
468 | } |
469 | |
470 | // Everything below here is taken from CoeffBasedProduct.h |
471 | |
472 | typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; |
473 | typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; |
474 | |
475 | typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned; |
476 | typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned; |
477 | |
478 | typedef evaluator<LhsNestedCleaned> LhsEtorType; |
479 | typedef evaluator<RhsNestedCleaned> RhsEtorType; |
480 | |
481 | enum { |
482 | RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime, |
483 | ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime, |
484 | InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime), |
485 | MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime, |
486 | MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime |
487 | }; |
488 | |
489 | typedef typename find_best_packet<Scalar,RowsAtCompileTime>::type LhsVecPacketType; |
490 | typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType; |
491 | |
492 | enum { |
493 | |
494 | LhsCoeffReadCost = LhsEtorType::CoeffReadCost, |
495 | RhsCoeffReadCost = RhsEtorType::CoeffReadCost, |
496 | CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost |
497 | : InnerSize == Dynamic ? HugeCost |
498 | : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) |
499 | + (InnerSize - 1) * NumTraits<Scalar>::AddCost, |
500 | |
501 | Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, |
502 | |
503 | LhsFlags = LhsEtorType::Flags, |
504 | RhsFlags = RhsEtorType::Flags, |
505 | |
506 | LhsRowMajor = LhsFlags & RowMajorBit, |
507 | RhsRowMajor = RhsFlags & RowMajorBit, |
508 | |
509 | LhsVecPacketSize = unpacket_traits<LhsVecPacketType>::size, |
510 | RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size, |
511 | |
512 | // Here, we don't care about alignment larger than the usable packet size. |
513 | LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))), |
514 | RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))), |
515 | |
516 | SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value, |
517 | |
518 | CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1), |
519 | CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime!=1), |
520 | |
521 | EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 |
522 | : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 |
523 | : (bool(RhsRowMajor) && !CanVectorizeLhs), |
524 | |
525 | Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) |
526 | | (EvalToRowMajor ? RowMajorBit : 0) |
527 | // TODO enable vectorization for mixed types |
528 | | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) |
529 | | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0), |
530 | |
531 | LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)), |
532 | RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)), |
533 | |
534 | Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) |
535 | : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) |
536 | : 0, |
537 | |
538 | /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside |
539 | * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner |
540 | * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect |
541 | * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. |
542 | */ |
543 | CanVectorizeInner = SameType |
544 | && LhsRowMajor |
545 | && (!RhsRowMajor) |
546 | && (LhsFlags & RhsFlags & ActualPacketAccessBit) |
547 | && (InnerSize % packet_traits<Scalar>::size == 0) |
548 | }; |
549 | |
550 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const |
551 | { |
552 | return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); |
553 | } |
554 | |
555 | /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, |
556 | * which is why we don't set the LinearAccessBit. |
557 | * TODO: this seems possible when the result is a vector |
558 | */ |
559 | EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const |
560 | { |
561 | const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index; |
562 | const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0; |
563 | return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); |
564 | } |
565 | |
566 | template<int LoadMode, typename PacketType> |
567 | const PacketType packet(Index row, Index col) const |
568 | { |
569 | PacketType res; |
570 | typedef etor_product_packet_impl<bool(int(Flags)&RowMajorBit) ? RowMajor : ColMajor, |
571 | Unroll ? int(InnerSize) : Dynamic, |
572 | LhsEtorType, RhsEtorType, PacketType, LoadMode> PacketImpl; |
573 | PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); |
574 | return res; |
575 | } |
576 | |
577 | template<int LoadMode, typename PacketType> |
578 | const PacketType packet(Index index) const |
579 | { |
580 | const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index; |
581 | const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0; |
582 | return packet<LoadMode,PacketType>(row,col); |
583 | } |
584 | |
585 | protected: |
586 | typename internal::add_const_on_value_type<LhsNested>::type m_lhs; |
587 | typename internal::add_const_on_value_type<RhsNested>::type m_rhs; |
588 | |
589 | LhsEtorType m_lhsImpl; |
590 | RhsEtorType m_rhsImpl; |
591 | |
592 | // TODO: Get rid of m_innerDim if known at compile time |
593 | Index m_innerDim; |
594 | }; |
595 | |
596 | template<typename Lhs, typename Rhs> |
597 | struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape> |
598 | : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape> |
599 | { |
600 | typedef Product<Lhs, Rhs, DefaultProduct> XprType; |
601 | typedef Product<Lhs, Rhs, LazyProduct> BaseProduct; |
602 | typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape> Base; |
603 | enum { |
604 | Flags = Base::Flags | EvalBeforeNestingBit |
605 | }; |
606 | EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) |
607 | : Base(BaseProduct(xpr.lhs(),xpr.rhs())) |
608 | {} |
609 | }; |
610 | |
611 | /**************************************** |
612 | *** Coeff based product, Packet path *** |
613 | ****************************************/ |
614 | |
615 | template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> |
616 | struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> |
617 | { |
618 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) |
619 | { |
620 | etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); |
621 | res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res); |
622 | } |
623 | }; |
624 | |
625 | template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> |
626 | struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> |
627 | { |
628 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) |
629 | { |
630 | etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); |
631 | res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res); |
632 | } |
633 | }; |
634 | |
635 | template<typename Lhs, typename Rhs, typename Packet, int LoadMode> |
636 | struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode> |
637 | { |
638 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) |
639 | { |
640 | res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col)); |
641 | } |
642 | }; |
643 | |
644 | template<typename Lhs, typename Rhs, typename Packet, int LoadMode> |
645 | struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode> |
646 | { |
647 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) |
648 | { |
649 | res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col))); |
650 | } |
651 | }; |
652 | |
653 | template<typename Lhs, typename Rhs, typename Packet, int LoadMode> |
654 | struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> |
655 | { |
656 | static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) |
657 | { |
658 | res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); |
659 | } |
660 | }; |
661 | |
662 | template<typename Lhs, typename Rhs, typename Packet, int LoadMode> |
663 | struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> |
664 | { |
665 | static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) |
666 | { |
667 | res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); |
668 | } |
669 | }; |
670 | |
671 | template<typename Lhs, typename Rhs, typename Packet, int LoadMode> |
672 | struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> |
673 | { |
674 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) |
675 | { |
676 | res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); |
677 | for(Index i = 0; i < innerDim; ++i) |
678 | res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res); |
679 | } |
680 | }; |
681 | |
682 | template<typename Lhs, typename Rhs, typename Packet, int LoadMode> |
683 | struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> |
684 | { |
685 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) |
686 | { |
687 | res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); |
688 | for(Index i = 0; i < innerDim; ++i) |
689 | res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); |
690 | } |
691 | }; |
692 | |
693 | |
694 | /*************************************************************************** |
695 | * Triangular products |
696 | ***************************************************************************/ |
697 | template<int Mode, bool LhsIsTriangular, |
698 | typename Lhs, bool LhsIsVector, |
699 | typename Rhs, bool RhsIsVector> |
700 | struct triangular_product_impl; |
701 | |
702 | template<typename Lhs, typename Rhs, int ProductTag> |
703 | struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> |
704 | : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> > |
705 | { |
706 | typedef typename Product<Lhs,Rhs>::Scalar Scalar; |
707 | |
708 | template<typename Dest> |
709 | static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) |
710 | { |
711 | triangular_product_impl<Lhs::Mode,true,typename Lhs::MatrixType,false,Rhs, Rhs::ColsAtCompileTime==1> |
712 | ::run(dst, lhs.nestedExpression(), rhs, alpha); |
713 | } |
714 | }; |
715 | |
716 | template<typename Lhs, typename Rhs, int ProductTag> |
717 | struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> |
718 | : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> > |
719 | { |
720 | typedef typename Product<Lhs,Rhs>::Scalar Scalar; |
721 | |
722 | template<typename Dest> |
723 | static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) |
724 | { |
725 | triangular_product_impl<Rhs::Mode,false,Lhs,Lhs::RowsAtCompileTime==1, typename Rhs::MatrixType, false>::run(dst, lhs, rhs.nestedExpression(), alpha); |
726 | } |
727 | }; |
728 | |
729 | |
730 | /*************************************************************************** |
731 | * SelfAdjoint products |
732 | ***************************************************************************/ |
733 | template <typename Lhs, int LhsMode, bool LhsIsVector, |
734 | typename Rhs, int RhsMode, bool RhsIsVector> |
735 | struct selfadjoint_product_impl; |
736 | |
737 | template<typename Lhs, typename Rhs, int ProductTag> |
738 | struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> |
739 | : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> > |
740 | { |
741 | typedef typename Product<Lhs,Rhs>::Scalar Scalar; |
742 | |
743 | template<typename Dest> |
744 | static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) |
745 | { |
746 | selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha); |
747 | } |
748 | }; |
749 | |
750 | template<typename Lhs, typename Rhs, int ProductTag> |
751 | struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> |
752 | : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> > |
753 | { |
754 | typedef typename Product<Lhs,Rhs>::Scalar Scalar; |
755 | |
756 | template<typename Dest> |
757 | static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) |
758 | { |
759 | selfadjoint_product_impl<Lhs,0,Lhs::IsVectorAtCompileTime,typename Rhs::MatrixType,Rhs::Mode,false>::run(dst, lhs, rhs.nestedExpression(), alpha); |
760 | } |
761 | }; |
762 | |
763 | |
764 | /*************************************************************************** |
765 | * Diagonal products |
766 | ***************************************************************************/ |
767 | |
768 | template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder> |
769 | struct diagonal_product_evaluator_base |
770 | : evaluator_base<Derived> |
771 | { |
772 | typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar; |
773 | public: |
774 | enum { |
775 | CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost, |
776 | |
777 | MatrixFlags = evaluator<MatrixType>::Flags, |
778 | DiagFlags = evaluator<DiagonalType>::Flags, |
779 | _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor, |
780 | _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) |
781 | ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), |
782 | _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value, |
783 | // FIXME currently we need same types, but in the future the next rule should be the one |
784 | //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))), |
785 | _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), |
786 | _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, |
787 | Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0), |
788 | Alignment = evaluator<MatrixType>::Alignment, |
789 | |
790 | AsScalarProduct = (DiagonalType::SizeAtCompileTime==1) |
791 | || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft) |
792 | || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight) |
793 | }; |
794 | |
795 | diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) |
796 | : m_diagImpl(diag), m_matImpl(mat) |
797 | { |
798 | EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost); |
799 | EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); |
800 | } |
801 | |
802 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const |
803 | { |
804 | if(AsScalarProduct) |
805 | return m_diagImpl.coeff(0) * m_matImpl.coeff(idx); |
806 | else |
807 | return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); |
808 | } |
809 | |
810 | protected: |
811 | template<int LoadMode,typename PacketType> |
812 | EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const |
813 | { |
814 | return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col), |
815 | internal::pset1<PacketType>(m_diagImpl.coeff(id))); |
816 | } |
817 | |
818 | template<int LoadMode,typename PacketType> |
819 | EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const |
820 | { |
821 | enum { |
822 | InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, |
823 | DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!! |
824 | }; |
825 | return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col), |
826 | m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id)); |
827 | } |
828 | |
829 | evaluator<DiagonalType> m_diagImpl; |
830 | evaluator<MatrixType> m_matImpl; |
831 | }; |
832 | |
833 | // diagonal * dense |
834 | template<typename Lhs, typename Rhs, int ProductKind, int ProductTag> |
835 | struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape> |
836 | : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> |
837 | { |
838 | typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base; |
839 | using Base::m_diagImpl; |
840 | using Base::m_matImpl; |
841 | using Base::coeff; |
842 | typedef typename Base::Scalar Scalar; |
843 | |
844 | typedef Product<Lhs, Rhs, ProductKind> XprType; |
845 | typedef typename XprType::PlainObject PlainObject; |
846 | |
847 | enum { |
848 | StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor |
849 | }; |
850 | |
851 | EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) |
852 | : Base(xpr.rhs(), xpr.lhs().diagonal()) |
853 | { |
854 | } |
855 | |
856 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const |
857 | { |
858 | return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col); |
859 | } |
860 | |
861 | #ifndef __CUDACC__ |
862 | template<int LoadMode,typename PacketType> |
863 | EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const |
864 | { |
865 | // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case. |
866 | // See also similar calls below. |
867 | return this->template packet_impl<LoadMode,PacketType>(row,col, row, |
868 | typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type()); |
869 | } |
870 | |
871 | template<int LoadMode,typename PacketType> |
872 | EIGEN_STRONG_INLINE PacketType packet(Index idx) const |
873 | { |
874 | return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); |
875 | } |
876 | #endif |
877 | }; |
878 | |
879 | // dense * diagonal |
880 | template<typename Lhs, typename Rhs, int ProductKind, int ProductTag> |
881 | struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape> |
882 | : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> |
883 | { |
884 | typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base; |
885 | using Base::m_diagImpl; |
886 | using Base::m_matImpl; |
887 | using Base::coeff; |
888 | typedef typename Base::Scalar Scalar; |
889 | |
890 | typedef Product<Lhs, Rhs, ProductKind> XprType; |
891 | typedef typename XprType::PlainObject PlainObject; |
892 | |
893 | enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor }; |
894 | |
895 | EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) |
896 | : Base(xpr.lhs(), xpr.rhs().diagonal()) |
897 | { |
898 | } |
899 | |
900 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const |
901 | { |
902 | return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col); |
903 | } |
904 | |
905 | #ifndef __CUDACC__ |
906 | template<int LoadMode,typename PacketType> |
907 | EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const |
908 | { |
909 | return this->template packet_impl<LoadMode,PacketType>(row,col, col, |
910 | typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type()); |
911 | } |
912 | |
913 | template<int LoadMode,typename PacketType> |
914 | EIGEN_STRONG_INLINE PacketType packet(Index idx) const |
915 | { |
916 | return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); |
917 | } |
918 | #endif |
919 | }; |
920 | |
921 | /*************************************************************************** |
922 | * Products with permutation matrices |
923 | ***************************************************************************/ |
924 | |
925 | /** \internal |
926 | * \class permutation_matrix_product |
927 | * Internal helper class implementing the product between a permutation matrix and a matrix. |
928 | * This class is specialized for DenseShape below and for SparseShape in SparseCore/SparsePermutation.h |
929 | */ |
930 | template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape> |
931 | struct permutation_matrix_product; |
932 | |
933 | template<typename ExpressionType, int Side, bool Transposed> |
934 | struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape> |
935 | { |
936 | typedef typename nested_eval<ExpressionType, 1>::type MatrixType; |
937 | typedef typename remove_all<MatrixType>::type MatrixTypeCleaned; |
938 | |
939 | template<typename Dest, typename PermutationType> |
940 | static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr) |
941 | { |
942 | MatrixType mat(xpr); |
943 | const Index n = Side==OnTheLeft ? mat.rows() : mat.cols(); |
944 | // FIXME we need an is_same for expression that is not sensitive to constness. For instance |
945 | // is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true. |
946 | //if(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat)) |
947 | if(is_same_dense(dst, mat)) |
948 | { |
949 | // apply the permutation inplace |
950 | Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(perm.size()); |
951 | mask.fill(false); |
952 | Index r = 0; |
953 | while(r < perm.size()) |
954 | { |
955 | // search for the next seed |
956 | while(r<perm.size() && mask[r]) r++; |
957 | if(r>=perm.size()) |
958 | break; |
959 | // we got one, let's follow it until we are back to the seed |
960 | Index k0 = r++; |
961 | Index kPrev = k0; |
962 | mask.coeffRef(k0) = true; |
963 | for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k)) |
964 | { |
965 | Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k) |
966 | .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime> |
967 | (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev)); |
968 | |
969 | mask.coeffRef(k) = true; |
970 | kPrev = k; |
971 | } |
972 | } |
973 | } |
974 | else |
975 | { |
976 | for(Index i = 0; i < n; ++i) |
977 | { |
978 | Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime> |
979 | (dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i) |
980 | |
981 | = |
982 | |
983 | Block<const MatrixTypeCleaned,Side==OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixTypeCleaned::ColsAtCompileTime> |
984 | (mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i); |
985 | } |
986 | } |
987 | } |
988 | }; |
989 | |
990 | template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> |
991 | struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag> |
992 | { |
993 | template<typename Dest> |
994 | static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) |
995 | { |
996 | permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs); |
997 | } |
998 | }; |
999 | |
1000 | template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> |
1001 | struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag> |
1002 | { |
1003 | template<typename Dest> |
1004 | static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) |
1005 | { |
1006 | permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs); |
1007 | } |
1008 | }; |
1009 | |
1010 | template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> |
1011 | struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag> |
1012 | { |
1013 | template<typename Dest> |
1014 | static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs) |
1015 | { |
1016 | permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs); |
1017 | } |
1018 | }; |
1019 | |
1020 | template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> |
1021 | struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag> |
1022 | { |
1023 | template<typename Dest> |
1024 | static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs) |
1025 | { |
1026 | permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs); |
1027 | } |
1028 | }; |
1029 | |
1030 | |
1031 | /*************************************************************************** |
1032 | * Products with transpositions matrices |
1033 | ***************************************************************************/ |
1034 | |
1035 | // FIXME could we unify Transpositions and Permutation into a single "shape"?? |
1036 | |
1037 | /** \internal |
1038 | * \class transposition_matrix_product |
1039 | * Internal helper class implementing the product between a permutation matrix and a matrix. |
1040 | */ |
1041 | template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape> |
1042 | struct transposition_matrix_product |
1043 | { |
1044 | typedef typename nested_eval<ExpressionType, 1>::type MatrixType; |
1045 | typedef typename remove_all<MatrixType>::type MatrixTypeCleaned; |
1046 | |
1047 | template<typename Dest, typename TranspositionType> |
1048 | static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr) |
1049 | { |
1050 | MatrixType mat(xpr); |
1051 | typedef typename TranspositionType::StorageIndex StorageIndex; |
1052 | const Index size = tr.size(); |
1053 | StorageIndex j = 0; |
1054 | |
1055 | if(!is_same_dense(dst,mat)) |
1056 | dst = mat; |
1057 | |
1058 | for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k) |
1059 | if(Index(j=tr.coeff(k))!=k) |
1060 | { |
1061 | if(Side==OnTheLeft) dst.row(k).swap(dst.row(j)); |
1062 | else if(Side==OnTheRight) dst.col(k).swap(dst.col(j)); |
1063 | } |
1064 | } |
1065 | }; |
1066 | |
1067 | template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> |
1068 | struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag> |
1069 | { |
1070 | template<typename Dest> |
1071 | static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) |
1072 | { |
1073 | transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs); |
1074 | } |
1075 | }; |
1076 | |
1077 | template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> |
1078 | struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag> |
1079 | { |
1080 | template<typename Dest> |
1081 | static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) |
1082 | { |
1083 | transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs); |
1084 | } |
1085 | }; |
1086 | |
1087 | |
1088 | template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> |
1089 | struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag> |
1090 | { |
1091 | template<typename Dest> |
1092 | static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs) |
1093 | { |
1094 | transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs); |
1095 | } |
1096 | }; |
1097 | |
1098 | template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> |
1099 | struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag> |
1100 | { |
1101 | template<typename Dest> |
1102 | static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs) |
1103 | { |
1104 | transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs); |
1105 | } |
1106 | }; |
1107 | |
1108 | } // end namespace internal |
1109 | |
1110 | } // end namespace Eigen |
1111 | |
1112 | #endif // EIGEN_PRODUCT_EVALUATORS_H |
1113 | |