1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
5// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_GENERAL_PRODUCT_H
12#define EIGEN_GENERAL_PRODUCT_H
13
14namespace Eigen {
15
16enum {
17 Large = 2,
18 Small = 3
19};
20
21namespace internal {
22
23template<int Rows, int Cols, int Depth> struct product_type_selector;
24
25template<int Size, int MaxSize> struct product_size_category
26{
27 enum {
28 #ifndef EIGEN_CUDA_ARCH
29 is_large = MaxSize == Dynamic ||
30 Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
31 (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
32 #else
33 is_large = 0,
34 #endif
35 value = is_large ? Large
36 : Size == 1 ? 1
37 : Small
38 };
39};
40
41template<typename Lhs, typename Rhs> struct product_type
42{
43 typedef typename remove_all<Lhs>::type _Lhs;
44 typedef typename remove_all<Rhs>::type _Rhs;
45 enum {
46 MaxRows = traits<_Lhs>::MaxRowsAtCompileTime,
47 Rows = traits<_Lhs>::RowsAtCompileTime,
48 MaxCols = traits<_Rhs>::MaxColsAtCompileTime,
49 Cols = traits<_Rhs>::ColsAtCompileTime,
50 MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime,
51 traits<_Rhs>::MaxRowsAtCompileTime),
52 Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime,
53 traits<_Rhs>::RowsAtCompileTime)
54 };
55
56 // the splitting into different lines of code here, introducing the _select enums and the typedef below,
57 // is to work around an internal compiler error with gcc 4.1 and 4.2.
58private:
59 enum {
60 rows_select = product_size_category<Rows,MaxRows>::value,
61 cols_select = product_size_category<Cols,MaxCols>::value,
62 depth_select = product_size_category<Depth,MaxDepth>::value
63 };
64 typedef product_type_selector<rows_select, cols_select, depth_select> selector;
65
66public:
67 enum {
68 value = selector::ret,
69 ret = selector::ret
70 };
71#ifdef EIGEN_DEBUG_PRODUCT
72 static void debug()
73 {
74 EIGEN_DEBUG_VAR(Rows);
75 EIGEN_DEBUG_VAR(Cols);
76 EIGEN_DEBUG_VAR(Depth);
77 EIGEN_DEBUG_VAR(rows_select);
78 EIGEN_DEBUG_VAR(cols_select);
79 EIGEN_DEBUG_VAR(depth_select);
80 EIGEN_DEBUG_VAR(value);
81 }
82#endif
83};
84
85/* The following allows to select the kind of product at compile time
86 * based on the three dimensions of the product.
87 * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
88// FIXME I'm not sure the current mapping is the ideal one.
89template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
90template<int M> struct product_type_selector<M, 1, 1> { enum { ret = LazyCoeffBasedProductMode }; };
91template<int N> struct product_type_selector<1, N, 1> { enum { ret = LazyCoeffBasedProductMode }; };
92template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
93template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
94template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
95template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
96template<> struct product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
97template<> struct product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
98template<> struct product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
99template<> struct product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
100template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
101template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
102template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
103template<> struct product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
104template<> struct product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
105template<> struct product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
106template<> struct product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
107template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
108template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
109template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
110template<> struct product_type_selector<Large,Small,Small> { enum { ret = CoeffBasedProductMode }; };
111template<> struct product_type_selector<Small,Large,Small> { enum { ret = CoeffBasedProductMode }; };
112template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
113
114} // end namespace internal
115
116/***********************************************************************
117* Implementation of Inner Vector Vector Product
118***********************************************************************/
119
120// FIXME : maybe the "inner product" could return a Scalar
121// instead of a 1x1 matrix ??
122// Pro: more natural for the user
123// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
124// product ends up to a row-vector times col-vector product... To tackle this use
125// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
126
127/***********************************************************************
128* Implementation of Outer Vector Vector Product
129***********************************************************************/
130
131/***********************************************************************
132* Implementation of General Matrix Vector Product
133***********************************************************************/
134
135/* According to the shape/flags of the matrix we have to distinghish 3 different cases:
136 * 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
137 * 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
138 * 3 - all other cases are handled using a simple loop along the outer-storage direction.
139 * Therefore we need a lower level meta selector.
140 * Furthermore, if the matrix is the rhs, then the product has to be transposed.
141 */
142namespace internal {
143
144template<int Side, int StorageOrder, bool BlasCompatible>
145struct gemv_dense_selector;
146
147} // end namespace internal
148
149namespace internal {
150
151template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
152
153template<typename Scalar,int Size,int MaxSize>
154struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
155{
156 EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
157};
158
159template<typename Scalar,int Size>
160struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
161{
162 EIGEN_STRONG_INLINE Scalar* data() { return 0; }
163};
164
165template<typename Scalar,int Size,int MaxSize>
166struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
167{
168 enum {
169 ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
170 PacketSize = internal::packet_traits<Scalar>::size
171 };
172 #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
173 internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0,EIGEN_PLAIN_ENUM_MIN(AlignedMax,PacketSize)> m_data;
174 EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
175 #else
176 // Some architectures cannot align on the stack,
177 // => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
178 internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;
179 EIGEN_STRONG_INLINE Scalar* data() {
180 return ForceAlignment
181 ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
182 : m_data.array;
183 }
184 #endif
185};
186
187// The vector is on the left => transposition
188template<int StorageOrder, bool BlasCompatible>
189struct gemv_dense_selector<OnTheLeft,StorageOrder,BlasCompatible>
190{
191 template<typename Lhs, typename Rhs, typename Dest>
192 static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
193 {
194 Transpose<Dest> destT(dest);
195 enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
196 gemv_dense_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
197 ::run(rhs.transpose(), lhs.transpose(), destT, alpha);
198 }
199};
200
201template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
202{
203 template<typename Lhs, typename Rhs, typename Dest>
204 static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
205 {
206 typedef typename Lhs::Scalar LhsScalar;
207 typedef typename Rhs::Scalar RhsScalar;
208 typedef typename Dest::Scalar ResScalar;
209 typedef typename Dest::RealScalar RealScalar;
210
211 typedef internal::blas_traits<Lhs> LhsBlasTraits;
212 typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
213 typedef internal::blas_traits<Rhs> RhsBlasTraits;
214 typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
215
216 typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
217
218 ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
219 ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
220
221 ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
222 * RhsBlasTraits::extractScalarFactor(rhs);
223
224 // make sure Dest is a compile-time vector type (bug 1166)
225 typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
226
227 enum {
228 // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
229 // on, the other hand it is good for the cache to pack the vector anyways...
230 EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
231 ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
232 MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
233 };
234
235 typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
236 typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
237 RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
238
239 if(!MightCannotUseDest)
240 {
241 // shortcut if we are sure to be able to use dest directly,
242 // this ease the compiler to generate cleaner and more optimzized code for most common cases
243 general_matrix_vector_product
244 <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
245 actualLhs.rows(), actualLhs.cols(),
246 LhsMapper(actualLhs.data(), actualLhs.outerStride()),
247 RhsMapper(actualRhs.data(), actualRhs.innerStride()),
248 dest.data(), 1,
249 compatibleAlpha);
250 }
251 else
252 {
253 gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
254
255 const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
256 const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
257
258 ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
259 evalToDest ? dest.data() : static_dest.data());
260
261 if(!evalToDest)
262 {
263 #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
264 Index size = dest.size();
265 EIGEN_DENSE_STORAGE_CTOR_PLUGIN
266 #endif
267 if(!alphaIsCompatible)
268 {
269 MappedDest(actualDestPtr, dest.size()).setZero();
270 compatibleAlpha = RhsScalar(1);
271 }
272 else
273 MappedDest(actualDestPtr, dest.size()) = dest;
274 }
275
276 general_matrix_vector_product
277 <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
278 actualLhs.rows(), actualLhs.cols(),
279 LhsMapper(actualLhs.data(), actualLhs.outerStride()),
280 RhsMapper(actualRhs.data(), actualRhs.innerStride()),
281 actualDestPtr, 1,
282 compatibleAlpha);
283
284 if (!evalToDest)
285 {
286 if(!alphaIsCompatible)
287 dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
288 else
289 dest = MappedDest(actualDestPtr, dest.size());
290 }
291 }
292 }
293};
294
295template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
296{
297 template<typename Lhs, typename Rhs, typename Dest>
298 static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
299 {
300 typedef typename Lhs::Scalar LhsScalar;
301 typedef typename Rhs::Scalar RhsScalar;
302 typedef typename Dest::Scalar ResScalar;
303
304 typedef internal::blas_traits<Lhs> LhsBlasTraits;
305 typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
306 typedef internal::blas_traits<Rhs> RhsBlasTraits;
307 typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
308 typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
309
310 typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
311 typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
312
313 ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
314 * RhsBlasTraits::extractScalarFactor(rhs);
315
316 enum {
317 // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
318 // on, the other hand it is good for the cache to pack the vector anyways...
319 DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
320 };
321
322 gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
323
324 ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
325 DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
326
327 if(!DirectlyUseRhs)
328 {
329 #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
330 Index size = actualRhs.size();
331 EIGEN_DENSE_STORAGE_CTOR_PLUGIN
332 #endif
333 Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
334 }
335
336 typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;
337 typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;
338 general_matrix_vector_product
339 <Index,LhsScalar,LhsMapper,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
340 actualLhs.rows(), actualLhs.cols(),
341 LhsMapper(actualLhs.data(), actualLhs.outerStride()),
342 RhsMapper(actualRhsPtr, 1),
343 dest.data(), dest.col(0).innerStride(), //NOTE if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166)
344 actualAlpha);
345 }
346};
347
348template<> struct gemv_dense_selector<OnTheRight,ColMajor,false>
349{
350 template<typename Lhs, typename Rhs, typename Dest>
351 static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
352 {
353 EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
354 // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp
355 typename nested_eval<Rhs,1>::type actual_rhs(rhs);
356 const Index size = rhs.rows();
357 for(Index k=0; k<size; ++k)
358 dest += (alpha*actual_rhs.coeff(k)) * lhs.col(k);
359 }
360};
361
362template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
363{
364 template<typename Lhs, typename Rhs, typename Dest>
365 static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
366 {
367 EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
368 typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs);
369 const Index rows = dest.rows();
370 for(Index i=0; i<rows; ++i)
371 dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(actual_rhs.transpose())).sum();
372 }
373};
374
375} // end namespace internal
376
377/***************************************************************************
378* Implementation of matrix base methods
379***************************************************************************/
380
381/** \returns the matrix product of \c *this and \a other.
382 *
383 * \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().
384 *
385 * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
386 */
387template<typename Derived>
388template<typename OtherDerived>
389inline const Product<Derived, OtherDerived>
390MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
391{
392 // A note regarding the function declaration: In MSVC, this function will sometimes
393 // not be inlined since DenseStorage is an unwindable object for dynamic
394 // matrices and product types are holding a member to store the result.
395 // Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
396 enum {
397 ProductIsValid = Derived::ColsAtCompileTime==Dynamic
398 || OtherDerived::RowsAtCompileTime==Dynamic
399 || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
400 AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
401 SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
402 };
403 // note to the lost user:
404 // * for a dot product use: v1.dot(v2)
405 // * for a coeff-wise product use: v1.cwiseProduct(v2)
406 EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
407 INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
408 EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
409 INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
410 EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
411#ifdef EIGEN_DEBUG_PRODUCT
412 internal::product_type<Derived,OtherDerived>::debug();
413#endif
414
415 return Product<Derived, OtherDerived>(derived(), other.derived());
416}
417
418/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
419 *
420 * The returned product will behave like any other expressions: the coefficients of the product will be
421 * computed once at a time as requested. This might be useful in some extremely rare cases when only
422 * a small and no coherent fraction of the result's coefficients have to be computed.
423 *
424 * \warning This version of the matrix product can be much much slower. So use it only if you know
425 * what you are doing and that you measured a true speed improvement.
426 *
427 * \sa operator*(const MatrixBase&)
428 */
429template<typename Derived>
430template<typename OtherDerived>
431const Product<Derived,OtherDerived,LazyProduct>
432MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
433{
434 enum {
435 ProductIsValid = Derived::ColsAtCompileTime==Dynamic
436 || OtherDerived::RowsAtCompileTime==Dynamic
437 || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
438 AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
439 SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
440 };
441 // note to the lost user:
442 // * for a dot product use: v1.dot(v2)
443 // * for a coeff-wise product use: v1.cwiseProduct(v2)
444 EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
445 INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
446 EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
447 INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
448 EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
449
450 return Product<Derived,OtherDerived,LazyProduct>(derived(), other.derived());
451}
452
453} // end namespace Eigen
454
455#endif // EIGEN_PRODUCT_H
456