1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
5// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
6// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
7//
8// This Source Code Form is subject to the terms of the Mozilla
9// Public License v. 2.0. If a copy of the MPL was not distributed
10// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
11
12#ifndef EIGEN_ASSIGN_EVALUATOR_H
13#define EIGEN_ASSIGN_EVALUATOR_H
14
15namespace Eigen {
16
17// This implementation is based on Assign.h
18
19namespace internal {
20
21/***************************************************************************
22* Part 1 : the logic deciding a strategy for traversal and unrolling *
23***************************************************************************/
24
25// copy_using_evaluator_traits is based on assign_traits
26
27template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
28struct copy_using_evaluator_traits
29{
30 typedef typename DstEvaluator::XprType Dst;
31 typedef typename Dst::Scalar DstScalar;
32
33 enum {
34 DstFlags = DstEvaluator::Flags,
35 SrcFlags = SrcEvaluator::Flags
36 };
37
38public:
39 enum {
40 DstAlignment = DstEvaluator::Alignment,
41 SrcAlignment = SrcEvaluator::Alignment,
42 DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
43 JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
44 };
45
46private:
47 enum {
48 InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
49 : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
50 : int(Dst::RowsAtCompileTime),
51 InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
52 : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
53 : int(Dst::MaxRowsAtCompileTime),
54 OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
55 MaxSizeAtCompileTime = Dst::SizeAtCompileTime
56 };
57
58 // TODO distinguish between linear traversal and inner-traversals
59 typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType;
60 typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType;
61
62 enum {
63 LinearPacketSize = unpacket_traits<LinearPacketType>::size,
64 InnerPacketSize = unpacket_traits<InnerPacketType>::size
65 };
66
67public:
68 enum {
69 LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
70 InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
71 };
72
73private:
74 enum {
75 DstIsRowMajor = DstFlags&RowMajorBit,
76 SrcIsRowMajor = SrcFlags&RowMajorBit,
77 StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
78 MightVectorize = bool(StorageOrdersAgree)
79 && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
80 && bool(functor_traits<AssignFunc>::PacketAccess),
81 MayInnerVectorize = MightVectorize
82 && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
83 && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
84 && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
85 MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
86 MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
87 && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
88 /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
89 so it's only good for large enough sizes. */
90 MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
91 && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
92 /* slice vectorization can be slow, so we only want it if the slices are big, which is
93 indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
94 in a fixed-size matrix
95 However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
96 };
97
98public:
99 enum {
100 Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal)
101 : int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
102 : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
103 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
104 : int(MayLinearize) ? int(LinearTraversal)
105 : int(DefaultTraversal),
106 Vectorized = int(Traversal) == InnerVectorizedTraversal
107 || int(Traversal) == LinearVectorizedTraversal
108 || int(Traversal) == SliceVectorizedTraversal
109 };
110
111 typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
112
113private:
114 enum {
115 ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize
116 : Vectorized ? InnerPacketSize
117 : 1,
118 UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
119 MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
120 && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
121 MayUnrollInner = int(InnerSize) != Dynamic
122 && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
123 };
124
125public:
126 enum {
127 Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
128 ? (
129 int(MayUnrollCompletely) ? int(CompleteUnrolling)
130 : int(MayUnrollInner) ? int(InnerUnrolling)
131 : int(NoUnrolling)
132 )
133 : int(Traversal) == int(LinearVectorizedTraversal)
134 ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
135 ? int(CompleteUnrolling)
136 : int(NoUnrolling) )
137 : int(Traversal) == int(LinearTraversal)
138 ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
139 : int(NoUnrolling) )
140#if EIGEN_UNALIGNED_VECTORIZE
141 : int(Traversal) == int(SliceVectorizedTraversal)
142 ? ( bool(MayUnrollInner) ? int(InnerUnrolling)
143 : int(NoUnrolling) )
144#endif
145 : int(NoUnrolling)
146 };
147
148#ifdef EIGEN_DEBUG_ASSIGN
149 static void debug()
150 {
151 std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
152 std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
153 std::cerr.setf(std::ios::hex, std::ios::basefield);
154 std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
155 std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
156 std::cerr.unsetf(std::ios::hex);
157 EIGEN_DEBUG_VAR(DstAlignment)
158 EIGEN_DEBUG_VAR(SrcAlignment)
159 EIGEN_DEBUG_VAR(LinearRequiredAlignment)
160 EIGEN_DEBUG_VAR(InnerRequiredAlignment)
161 EIGEN_DEBUG_VAR(JointAlignment)
162 EIGEN_DEBUG_VAR(InnerSize)
163 EIGEN_DEBUG_VAR(InnerMaxSize)
164 EIGEN_DEBUG_VAR(LinearPacketSize)
165 EIGEN_DEBUG_VAR(InnerPacketSize)
166 EIGEN_DEBUG_VAR(ActualPacketSize)
167 EIGEN_DEBUG_VAR(StorageOrdersAgree)
168 EIGEN_DEBUG_VAR(MightVectorize)
169 EIGEN_DEBUG_VAR(MayLinearize)
170 EIGEN_DEBUG_VAR(MayInnerVectorize)
171 EIGEN_DEBUG_VAR(MayLinearVectorize)
172 EIGEN_DEBUG_VAR(MaySliceVectorize)
173 std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
174 EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
175 EIGEN_DEBUG_VAR(UnrollingLimit)
176 EIGEN_DEBUG_VAR(MayUnrollCompletely)
177 EIGEN_DEBUG_VAR(MayUnrollInner)
178 std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
179 std::cerr << std::endl;
180 }
181#endif
182};
183
184/***************************************************************************
185* Part 2 : meta-unrollers
186***************************************************************************/
187
188/************************
189*** Default traversal ***
190************************/
191
192template<typename Kernel, int Index, int Stop>
193struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
194{
195 // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
196 typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
197 typedef typename DstEvaluatorType::XprType DstXprType;
198
199 enum {
200 outer = Index / DstXprType::InnerSizeAtCompileTime,
201 inner = Index % DstXprType::InnerSizeAtCompileTime
202 };
203
204 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
205 {
206 kernel.assignCoeffByOuterInner(outer, inner);
207 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
208 }
209};
210
211template<typename Kernel, int Stop>
212struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
213{
214 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
215};
216
217template<typename Kernel, int Index_, int Stop>
218struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
219{
220 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
221 {
222 kernel.assignCoeffByOuterInner(outer, Index_);
223 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
224 }
225};
226
227template<typename Kernel, int Stop>
228struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
229{
230 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
231};
232
233/***********************
234*** Linear traversal ***
235***********************/
236
237template<typename Kernel, int Index, int Stop>
238struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
239{
240 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
241 {
242 kernel.assignCoeff(Index);
243 copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
244 }
245};
246
247template<typename Kernel, int Stop>
248struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
249{
250 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
251};
252
253/**************************
254*** Inner vectorization ***
255**************************/
256
257template<typename Kernel, int Index, int Stop>
258struct copy_using_evaluator_innervec_CompleteUnrolling
259{
260 // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
261 typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
262 typedef typename DstEvaluatorType::XprType DstXprType;
263 typedef typename Kernel::PacketType PacketType;
264
265 enum {
266 outer = Index / DstXprType::InnerSizeAtCompileTime,
267 inner = Index % DstXprType::InnerSizeAtCompileTime,
268 SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
269 DstAlignment = Kernel::AssignmentTraits::DstAlignment
270 };
271
272 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
273 {
274 kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
275 enum { NextIndex = Index + unpacket_traits<PacketType>::size };
276 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
277 }
278};
279
280template<typename Kernel, int Stop>
281struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
282{
283 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
284};
285
286template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
287struct copy_using_evaluator_innervec_InnerUnrolling
288{
289 typedef typename Kernel::PacketType PacketType;
290 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
291 {
292 kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
293 enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
294 copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
295 }
296};
297
298template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
299struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
300{
301 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
302};
303
304/***************************************************************************
305* Part 3 : implementation of all cases
306***************************************************************************/
307
308// dense_assignment_loop is based on assign_impl
309
310template<typename Kernel,
311 int Traversal = Kernel::AssignmentTraits::Traversal,
312 int Unrolling = Kernel::AssignmentTraits::Unrolling>
313struct dense_assignment_loop;
314
315/************************
316*** Default traversal ***
317************************/
318
319template<typename Kernel>
320struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
321{
322 EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
323 {
324 for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
325 for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
326 kernel.assignCoeffByOuterInner(outer, inner);
327 }
328 }
329 }
330};
331
332template<typename Kernel>
333struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
334{
335 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
336 {
337 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
338 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
339 }
340};
341
342template<typename Kernel>
343struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
344{
345 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
346 {
347 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
348
349 const Index outerSize = kernel.outerSize();
350 for(Index outer = 0; outer < outerSize; ++outer)
351 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
352 }
353};
354
355/***************************
356*** Linear vectorization ***
357***************************/
358
359
360// The goal of unaligned_dense_assignment_loop is simply to factorize the handling
361// of the non vectorizable beginning and ending parts
362
363template <bool IsAligned = false>
364struct unaligned_dense_assignment_loop
365{
366 // if IsAligned = true, then do nothing
367 template <typename Kernel>
368 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
369};
370
371template <>
372struct unaligned_dense_assignment_loop<false>
373{
374 // MSVC must not inline this functions. If it does, it fails to optimize the
375 // packet access path.
376 // FIXME check which version exhibits this issue
377#if EIGEN_COMP_MSVC
378 template <typename Kernel>
379 static EIGEN_DONT_INLINE void run(Kernel &kernel,
380 Index start,
381 Index end)
382#else
383 template <typename Kernel>
384 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
385 Index start,
386 Index end)
387#endif
388 {
389 for (Index index = start; index < end; ++index)
390 kernel.assignCoeff(index);
391 }
392};
393
394template<typename Kernel>
395struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
396{
397 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
398 {
399 const Index size = kernel.size();
400 typedef typename Kernel::Scalar Scalar;
401 typedef typename Kernel::PacketType PacketType;
402 enum {
403 requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
404 packetSize = unpacket_traits<PacketType>::size,
405 dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
406 dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
407 : int(Kernel::AssignmentTraits::DstAlignment),
408 srcAlignment = Kernel::AssignmentTraits::JointAlignment
409 };
410 const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
411 const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
412
413 unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
414
415 for(Index index = alignedStart; index < alignedEnd; index += packetSize)
416 kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
417
418 unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
419 }
420};
421
422template<typename Kernel>
423struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
424{
425 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
426 {
427 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
428 typedef typename Kernel::PacketType PacketType;
429
430 enum { size = DstXprType::SizeAtCompileTime,
431 packetSize =unpacket_traits<PacketType>::size,
432 alignedSize = (size/packetSize)*packetSize };
433
434 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
435 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
436 }
437};
438
439/**************************
440*** Inner vectorization ***
441**************************/
442
443template<typename Kernel>
444struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
445{
446 typedef typename Kernel::PacketType PacketType;
447 enum {
448 SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
449 DstAlignment = Kernel::AssignmentTraits::DstAlignment
450 };
451 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
452 {
453 const Index innerSize = kernel.innerSize();
454 const Index outerSize = kernel.outerSize();
455 const Index packetSize = unpacket_traits<PacketType>::size;
456 for(Index outer = 0; outer < outerSize; ++outer)
457 for(Index inner = 0; inner < innerSize; inner+=packetSize)
458 kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
459 }
460};
461
462template<typename Kernel>
463struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
464{
465 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
466 {
467 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
468 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
469 }
470};
471
472template<typename Kernel>
473struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
474{
475 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
476 {
477 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
478 typedef typename Kernel::AssignmentTraits Traits;
479 const Index outerSize = kernel.outerSize();
480 for(Index outer = 0; outer < outerSize; ++outer)
481 copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
482 Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
483 }
484};
485
486/***********************
487*** Linear traversal ***
488***********************/
489
490template<typename Kernel>
491struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
492{
493 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
494 {
495 const Index size = kernel.size();
496 for(Index i = 0; i < size; ++i)
497 kernel.assignCoeff(i);
498 }
499};
500
501template<typename Kernel>
502struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
503{
504 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
505 {
506 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
507 copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
508 }
509};
510
511/**************************
512*** Slice vectorization ***
513***************************/
514
515template<typename Kernel>
516struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
517{
518 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
519 {
520 typedef typename Kernel::Scalar Scalar;
521 typedef typename Kernel::PacketType PacketType;
522 enum {
523 packetSize = unpacket_traits<PacketType>::size,
524 requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
525 alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
526 dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
527 dstAlignment = alignable ? int(requestedAlignment)
528 : int(Kernel::AssignmentTraits::DstAlignment)
529 };
530 const Scalar *dst_ptr = kernel.dstDataPtr();
531 if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
532 {
533 // the pointer is not aligend-on scalar, so alignment is not possible
534 return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
535 }
536 const Index packetAlignedMask = packetSize - 1;
537 const Index innerSize = kernel.innerSize();
538 const Index outerSize = kernel.outerSize();
539 const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
540 Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
541
542 for(Index outer = 0; outer < outerSize; ++outer)
543 {
544 const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
545 // do the non-vectorizable part of the assignment
546 for(Index inner = 0; inner<alignedStart ; ++inner)
547 kernel.assignCoeffByOuterInner(outer, inner);
548
549 // do the vectorizable part of the assignment
550 for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
551 kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
552
553 // do the non-vectorizable part of the assignment
554 for(Index inner = alignedEnd; inner<innerSize ; ++inner)
555 kernel.assignCoeffByOuterInner(outer, inner);
556
557 alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
558 }
559 }
560};
561
562#if EIGEN_UNALIGNED_VECTORIZE
563template<typename Kernel>
564struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
565{
566 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
567 {
568 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
569 typedef typename Kernel::PacketType PacketType;
570
571 enum { size = DstXprType::InnerSizeAtCompileTime,
572 packetSize =unpacket_traits<PacketType>::size,
573 vectorizableSize = (size/packetSize)*packetSize };
574
575 for(Index outer = 0; outer < kernel.outerSize(); ++outer)
576 {
577 copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
578 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer);
579 }
580 }
581};
582#endif
583
584
585/***************************************************************************
586* Part 4 : Generic dense assignment kernel
587***************************************************************************/
588
589// This class generalize the assignment of a coefficient (or packet) from one dense evaluator
590// to another dense writable evaluator.
591// It is parametrized by the two evaluators, and the actual assignment functor.
592// This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
593// One can customize the assignment using this generic dense_assignment_kernel with different
594// functors, or by completely overloading it, by-passing a functor.
595template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
596class generic_dense_assignment_kernel
597{
598protected:
599 typedef typename DstEvaluatorTypeT::XprType DstXprType;
600 typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
601public:
602
603 typedef DstEvaluatorTypeT DstEvaluatorType;
604 typedef SrcEvaluatorTypeT SrcEvaluatorType;
605 typedef typename DstEvaluatorType::Scalar Scalar;
606 typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
607 typedef typename AssignmentTraits::PacketType PacketType;
608
609
610 EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
611 : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
612 {
613 #ifdef EIGEN_DEBUG_ASSIGN
614 AssignmentTraits::debug();
615 #endif
616 }
617
618 EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); }
619 EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); }
620 EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); }
621 EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); }
622 EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); }
623 EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
624
625 EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
626 EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
627
628 /// Assign src(row,col) to dst(row,col) through the assignment functor.
629 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
630 {
631 m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
632 }
633
634 /// \sa assignCoeff(Index,Index)
635 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
636 {
637 m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
638 }
639
640 /// \sa assignCoeff(Index,Index)
641 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
642 {
643 Index row = rowIndexByOuterInner(outer, inner);
644 Index col = colIndexByOuterInner(outer, inner);
645 assignCoeff(row, col);
646 }
647
648
649 template<int StoreMode, int LoadMode, typename PacketType>
650 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
651 {
652 m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
653 }
654
655 template<int StoreMode, int LoadMode, typename PacketType>
656 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
657 {
658 m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
659 }
660
661 template<int StoreMode, int LoadMode, typename PacketType>
662 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
663 {
664 Index row = rowIndexByOuterInner(outer, inner);
665 Index col = colIndexByOuterInner(outer, inner);
666 assignPacket<StoreMode,LoadMode,PacketType>(row, col);
667 }
668
669 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
670 {
671 typedef typename DstEvaluatorType::ExpressionTraits Traits;
672 return int(Traits::RowsAtCompileTime) == 1 ? 0
673 : int(Traits::ColsAtCompileTime) == 1 ? inner
674 : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
675 : inner;
676 }
677
678 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
679 {
680 typedef typename DstEvaluatorType::ExpressionTraits Traits;
681 return int(Traits::ColsAtCompileTime) == 1 ? 0
682 : int(Traits::RowsAtCompileTime) == 1 ? inner
683 : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
684 : outer;
685 }
686
687 EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
688 {
689 return m_dstExpr.data();
690 }
691
692protected:
693 DstEvaluatorType& m_dst;
694 const SrcEvaluatorType& m_src;
695 const Functor &m_functor;
696 // TODO find a way to avoid the needs of the original expression
697 DstXprType& m_dstExpr;
698};
699
700/***************************************************************************
701* Part 5 : Entry point for dense rectangular assignment
702***************************************************************************/
703
704template<typename DstXprType,typename SrcXprType, typename Functor>
705EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
706void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/)
707{
708 EIGEN_ONLY_USED_FOR_DEBUG(dst);
709 EIGEN_ONLY_USED_FOR_DEBUG(src);
710 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
711}
712
713template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
714EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
715void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/)
716{
717 Index dstRows = src.rows();
718 Index dstCols = src.cols();
719 if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
720 dst.resize(dstRows, dstCols);
721 eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
722}
723
724template<typename DstXprType, typename SrcXprType, typename Functor>
725EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
726{
727 typedef evaluator<DstXprType> DstEvaluatorType;
728 typedef evaluator<SrcXprType> SrcEvaluatorType;
729
730 SrcEvaluatorType srcEvaluator(src);
731
732 // NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
733 // we need to resize the destination after the source evaluator has been created.
734 resize_if_allowed(dst, src, func);
735
736 DstEvaluatorType dstEvaluator(dst);
737
738 typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
739 Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
740
741 dense_assignment_loop<Kernel>::run(kernel);
742}
743
744template<typename DstXprType, typename SrcXprType>
745EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
746{
747 call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
748}
749
750/***************************************************************************
751* Part 6 : Generic assignment
752***************************************************************************/
753
754// Based on the respective shapes of the destination and source,
755// the class AssignmentKind determine the kind of assignment mechanism.
756// AssignmentKind must define a Kind typedef.
757template<typename DstShape, typename SrcShape> struct AssignmentKind;
758
759// Assignement kind defined in this file:
760struct Dense2Dense {};
761struct EigenBase2EigenBase {};
762
763template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
764template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
765
766// This is the main assignment class
767template< typename DstXprType, typename SrcXprType, typename Functor,
768 typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
769 typename EnableIf = void>
770struct Assignment;
771
772
773// The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
774// Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
775// So this intermediate function removes everything related to "assume-aliasing" such that Assignment
776// does not has to bother about these annoying details.
777
778template<typename Dst, typename Src>
779EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
780void call_assignment(Dst& dst, const Src& src)
781{
782 call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
783}
784template<typename Dst, typename Src>
785EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
786void call_assignment(const Dst& dst, const Src& src)
787{
788 call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
789}
790
791// Deal with "assume-aliasing"
792template<typename Dst, typename Src, typename Func>
793EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
794void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
795{
796 typename plain_matrix_type<Src>::type tmp(src);
797 call_assignment_no_alias(dst, tmp, func);
798}
799
800template<typename Dst, typename Src, typename Func>
801EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
802void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
803{
804 call_assignment_no_alias(dst, src, func);
805}
806
807// by-pass "assume-aliasing"
808// When there is no aliasing, we require that 'dst' has been properly resized
809template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
810EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
811void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
812{
813 call_assignment_no_alias(dst.expression(), src, func);
814}
815
816
817template<typename Dst, typename Src, typename Func>
818EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
819void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
820{
821 enum {
822 NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
823 || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
824 ) && int(Dst::SizeAtCompileTime) != 1
825 };
826
827 typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
828 typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
829 ActualDstType actualDst(dst);
830
831 // TODO check whether this is the right place to perform these checks:
832 EIGEN_STATIC_ASSERT_LVALUE(Dst)
833 EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
834 EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
835
836 Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
837}
838template<typename Dst, typename Src>
839EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
840void call_assignment_no_alias(Dst& dst, const Src& src)
841{
842 call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
843}
844
845template<typename Dst, typename Src, typename Func>
846EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
847void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
848{
849 // TODO check whether this is the right place to perform these checks:
850 EIGEN_STATIC_ASSERT_LVALUE(Dst)
851 EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
852 EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
853
854 Assignment<Dst,Src,Func>::run(dst, src, func);
855}
856template<typename Dst, typename Src>
857EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
858void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
859{
860 call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
861}
862
863// forward declaration
864template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
865
866// Generic Dense to Dense assignment
867// Note that the last template argument "Weak" is needed to make it possible to perform
868// both partial specialization+SFINAE without ambiguous specialization
869template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
870struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
871{
872 EIGEN_DEVICE_FUNC
873 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
874 {
875#ifndef EIGEN_NO_DEBUG
876 internal::check_for_aliasing(dst, src);
877#endif
878
879 call_dense_assignment_loop(dst, src, func);
880 }
881};
882
883// Generic assignment through evalTo.
884// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
885// Note that the last template argument "Weak" is needed to make it possible to perform
886// both partial specialization+SFINAE without ambiguous specialization
887template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
888struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
889{
890 EIGEN_DEVICE_FUNC
891 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
892 {
893 Index dstRows = src.rows();
894 Index dstCols = src.cols();
895 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
896 dst.resize(dstRows, dstCols);
897
898 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
899 src.evalTo(dst);
900 }
901
902 // NOTE The following two functions are templated to avoid their instanciation if not needed
903 // This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
904 template<typename SrcScalarType>
905 EIGEN_DEVICE_FUNC
906 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
907 {
908 Index dstRows = src.rows();
909 Index dstCols = src.cols();
910 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
911 dst.resize(dstRows, dstCols);
912
913 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
914 src.addTo(dst);
915 }
916
917 template<typename SrcScalarType>
918 EIGEN_DEVICE_FUNC
919 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
920 {
921 Index dstRows = src.rows();
922 Index dstCols = src.cols();
923 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
924 dst.resize(dstRows, dstCols);
925
926 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
927 src.subTo(dst);
928 }
929};
930
931} // namespace internal
932
933} // end namespace Eigen
934
935#endif // EIGEN_ASSIGN_EVALUATOR_H
936