1 | // This file is part of Eigen, a lightweight C++ template library |
2 | // for linear algebra. |
3 | // |
4 | // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com> |
5 | // Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr> |
6 | // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk> |
7 | // |
8 | // This Source Code Form is subject to the terms of the Mozilla |
9 | // Public License v. 2.0. If a copy of the MPL was not distributed |
10 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. |
11 | |
12 | #ifndef EIGEN_ASSIGN_EVALUATOR_H |
13 | #define EIGEN_ASSIGN_EVALUATOR_H |
14 | |
15 | namespace Eigen { |
16 | |
17 | // This implementation is based on Assign.h |
18 | |
19 | namespace internal { |
20 | |
21 | /*************************************************************************** |
22 | * Part 1 : the logic deciding a strategy for traversal and unrolling * |
23 | ***************************************************************************/ |
24 | |
25 | // copy_using_evaluator_traits is based on assign_traits |
26 | |
27 | template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc> |
28 | struct copy_using_evaluator_traits |
29 | { |
30 | typedef typename DstEvaluator::XprType Dst; |
31 | typedef typename Dst::Scalar DstScalar; |
32 | |
33 | enum { |
34 | DstFlags = DstEvaluator::Flags, |
35 | SrcFlags = SrcEvaluator::Flags |
36 | }; |
37 | |
38 | public: |
39 | enum { |
40 | DstAlignment = DstEvaluator::Alignment, |
41 | SrcAlignment = SrcEvaluator::Alignment, |
42 | DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit, |
43 | JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment) |
44 | }; |
45 | |
46 | private: |
47 | enum { |
48 | InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) |
49 | : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime) |
50 | : int(Dst::RowsAtCompileTime), |
51 | InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) |
52 | : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) |
53 | : int(Dst::MaxRowsAtCompileTime), |
54 | OuterStride = int(outer_stride_at_compile_time<Dst>::ret), |
55 | MaxSizeAtCompileTime = Dst::SizeAtCompileTime |
56 | }; |
57 | |
58 | // TODO distinguish between linear traversal and inner-traversals |
59 | typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType; |
60 | typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType; |
61 | |
62 | enum { |
63 | LinearPacketSize = unpacket_traits<LinearPacketType>::size, |
64 | InnerPacketSize = unpacket_traits<InnerPacketType>::size |
65 | }; |
66 | |
67 | public: |
68 | enum { |
69 | LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment, |
70 | InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment |
71 | }; |
72 | |
73 | private: |
74 | enum { |
75 | DstIsRowMajor = DstFlags&RowMajorBit, |
76 | SrcIsRowMajor = SrcFlags&RowMajorBit, |
77 | StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)), |
78 | MightVectorize = bool(StorageOrdersAgree) |
79 | && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) |
80 | && bool(functor_traits<AssignFunc>::PacketAccess), |
81 | MayInnerVectorize = MightVectorize |
82 | && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0 |
83 | && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0 |
84 | && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)), |
85 | MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), |
86 | MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess) |
87 | && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), |
88 | /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, |
89 | so it's only good for large enough sizes. */ |
90 | MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess) |
91 | && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize))) |
92 | /* slice vectorization can be slow, so we only want it if the slices are big, which is |
93 | indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block |
94 | in a fixed-size matrix |
95 | However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */ |
96 | }; |
97 | |
98 | public: |
99 | enum { |
100 | Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal) |
101 | : int(MayInnerVectorize) ? int(InnerVectorizedTraversal) |
102 | : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) |
103 | : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) |
104 | : int(MayLinearize) ? int(LinearTraversal) |
105 | : int(DefaultTraversal), |
106 | Vectorized = int(Traversal) == InnerVectorizedTraversal |
107 | || int(Traversal) == LinearVectorizedTraversal |
108 | || int(Traversal) == SliceVectorizedTraversal |
109 | }; |
110 | |
111 | typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType; |
112 | |
113 | private: |
114 | enum { |
115 | ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize |
116 | : Vectorized ? InnerPacketSize |
117 | : 1, |
118 | UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize, |
119 | MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic |
120 | && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit), |
121 | MayUnrollInner = int(InnerSize) != Dynamic |
122 | && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit) |
123 | }; |
124 | |
125 | public: |
126 | enum { |
127 | Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) |
128 | ? ( |
129 | int(MayUnrollCompletely) ? int(CompleteUnrolling) |
130 | : int(MayUnrollInner) ? int(InnerUnrolling) |
131 | : int(NoUnrolling) |
132 | ) |
133 | : int(Traversal) == int(LinearVectorizedTraversal) |
134 | ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment))) |
135 | ? int(CompleteUnrolling) |
136 | : int(NoUnrolling) ) |
137 | : int(Traversal) == int(LinearTraversal) |
138 | ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) |
139 | : int(NoUnrolling) ) |
140 | #if EIGEN_UNALIGNED_VECTORIZE |
141 | : int(Traversal) == int(SliceVectorizedTraversal) |
142 | ? ( bool(MayUnrollInner) ? int(InnerUnrolling) |
143 | : int(NoUnrolling) ) |
144 | #endif |
145 | : int(NoUnrolling) |
146 | }; |
147 | |
148 | #ifdef EIGEN_DEBUG_ASSIGN |
149 | static void debug() |
150 | { |
151 | std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl; |
152 | std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl; |
153 | std::cerr.setf(std::ios::hex, std::ios::basefield); |
154 | std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl; |
155 | std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl; |
156 | std::cerr.unsetf(std::ios::hex); |
157 | EIGEN_DEBUG_VAR(DstAlignment) |
158 | EIGEN_DEBUG_VAR(SrcAlignment) |
159 | EIGEN_DEBUG_VAR(LinearRequiredAlignment) |
160 | EIGEN_DEBUG_VAR(InnerRequiredAlignment) |
161 | EIGEN_DEBUG_VAR(JointAlignment) |
162 | EIGEN_DEBUG_VAR(InnerSize) |
163 | EIGEN_DEBUG_VAR(InnerMaxSize) |
164 | EIGEN_DEBUG_VAR(LinearPacketSize) |
165 | EIGEN_DEBUG_VAR(InnerPacketSize) |
166 | EIGEN_DEBUG_VAR(ActualPacketSize) |
167 | EIGEN_DEBUG_VAR(StorageOrdersAgree) |
168 | EIGEN_DEBUG_VAR(MightVectorize) |
169 | EIGEN_DEBUG_VAR(MayLinearize) |
170 | EIGEN_DEBUG_VAR(MayInnerVectorize) |
171 | EIGEN_DEBUG_VAR(MayLinearVectorize) |
172 | EIGEN_DEBUG_VAR(MaySliceVectorize) |
173 | std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; |
174 | EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost) |
175 | EIGEN_DEBUG_VAR(UnrollingLimit) |
176 | EIGEN_DEBUG_VAR(MayUnrollCompletely) |
177 | EIGEN_DEBUG_VAR(MayUnrollInner) |
178 | std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl; |
179 | std::cerr << std::endl; |
180 | } |
181 | #endif |
182 | }; |
183 | |
184 | /*************************************************************************** |
185 | * Part 2 : meta-unrollers |
186 | ***************************************************************************/ |
187 | |
188 | /************************ |
189 | *** Default traversal *** |
190 | ************************/ |
191 | |
192 | template<typename Kernel, int Index, int Stop> |
193 | struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling |
194 | { |
195 | // FIXME: this is not very clean, perhaps this information should be provided by the kernel? |
196 | typedef typename Kernel::DstEvaluatorType DstEvaluatorType; |
197 | typedef typename DstEvaluatorType::XprType DstXprType; |
198 | |
199 | enum { |
200 | outer = Index / DstXprType::InnerSizeAtCompileTime, |
201 | inner = Index % DstXprType::InnerSizeAtCompileTime |
202 | }; |
203 | |
204 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
205 | { |
206 | kernel.assignCoeffByOuterInner(outer, inner); |
207 | copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel); |
208 | } |
209 | }; |
210 | |
211 | template<typename Kernel, int Stop> |
212 | struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop> |
213 | { |
214 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } |
215 | }; |
216 | |
217 | template<typename Kernel, int Index_, int Stop> |
218 | struct copy_using_evaluator_DefaultTraversal_InnerUnrolling |
219 | { |
220 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) |
221 | { |
222 | kernel.assignCoeffByOuterInner(outer, Index_); |
223 | copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer); |
224 | } |
225 | }; |
226 | |
227 | template<typename Kernel, int Stop> |
228 | struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop> |
229 | { |
230 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { } |
231 | }; |
232 | |
233 | /*********************** |
234 | *** Linear traversal *** |
235 | ***********************/ |
236 | |
237 | template<typename Kernel, int Index, int Stop> |
238 | struct copy_using_evaluator_LinearTraversal_CompleteUnrolling |
239 | { |
240 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) |
241 | { |
242 | kernel.assignCoeff(Index); |
243 | copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel); |
244 | } |
245 | }; |
246 | |
247 | template<typename Kernel, int Stop> |
248 | struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop> |
249 | { |
250 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } |
251 | }; |
252 | |
253 | /************************** |
254 | *** Inner vectorization *** |
255 | **************************/ |
256 | |
257 | template<typename Kernel, int Index, int Stop> |
258 | struct copy_using_evaluator_innervec_CompleteUnrolling |
259 | { |
260 | // FIXME: this is not very clean, perhaps this information should be provided by the kernel? |
261 | typedef typename Kernel::DstEvaluatorType DstEvaluatorType; |
262 | typedef typename DstEvaluatorType::XprType DstXprType; |
263 | typedef typename Kernel::PacketType PacketType; |
264 | |
265 | enum { |
266 | outer = Index / DstXprType::InnerSizeAtCompileTime, |
267 | inner = Index % DstXprType::InnerSizeAtCompileTime, |
268 | SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, |
269 | DstAlignment = Kernel::AssignmentTraits::DstAlignment |
270 | }; |
271 | |
272 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
273 | { |
274 | kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner); |
275 | enum { NextIndex = Index + unpacket_traits<PacketType>::size }; |
276 | copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel); |
277 | } |
278 | }; |
279 | |
280 | template<typename Kernel, int Stop> |
281 | struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop> |
282 | { |
283 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } |
284 | }; |
285 | |
286 | template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment> |
287 | struct copy_using_evaluator_innervec_InnerUnrolling |
288 | { |
289 | typedef typename Kernel::PacketType PacketType; |
290 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) |
291 | { |
292 | kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_); |
293 | enum { NextIndex = Index_ + unpacket_traits<PacketType>::size }; |
294 | copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer); |
295 | } |
296 | }; |
297 | |
298 | template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment> |
299 | struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment> |
300 | { |
301 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { } |
302 | }; |
303 | |
304 | /*************************************************************************** |
305 | * Part 3 : implementation of all cases |
306 | ***************************************************************************/ |
307 | |
308 | // dense_assignment_loop is based on assign_impl |
309 | |
310 | template<typename Kernel, |
311 | int Traversal = Kernel::AssignmentTraits::Traversal, |
312 | int Unrolling = Kernel::AssignmentTraits::Unrolling> |
313 | struct dense_assignment_loop; |
314 | |
315 | /************************ |
316 | *** Default traversal *** |
317 | ************************/ |
318 | |
319 | template<typename Kernel> |
320 | struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling> |
321 | { |
322 | EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel) |
323 | { |
324 | for(Index outer = 0; outer < kernel.outerSize(); ++outer) { |
325 | for(Index inner = 0; inner < kernel.innerSize(); ++inner) { |
326 | kernel.assignCoeffByOuterInner(outer, inner); |
327 | } |
328 | } |
329 | } |
330 | }; |
331 | |
332 | template<typename Kernel> |
333 | struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling> |
334 | { |
335 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
336 | { |
337 | typedef typename Kernel::DstEvaluatorType::XprType DstXprType; |
338 | copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); |
339 | } |
340 | }; |
341 | |
342 | template<typename Kernel> |
343 | struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling> |
344 | { |
345 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
346 | { |
347 | typedef typename Kernel::DstEvaluatorType::XprType DstXprType; |
348 | |
349 | const Index outerSize = kernel.outerSize(); |
350 | for(Index outer = 0; outer < outerSize; ++outer) |
351 | copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer); |
352 | } |
353 | }; |
354 | |
355 | /*************************** |
356 | *** Linear vectorization *** |
357 | ***************************/ |
358 | |
359 | |
360 | // The goal of unaligned_dense_assignment_loop is simply to factorize the handling |
361 | // of the non vectorizable beginning and ending parts |
362 | |
363 | template <bool IsAligned = false> |
364 | struct unaligned_dense_assignment_loop |
365 | { |
366 | // if IsAligned = true, then do nothing |
367 | template <typename Kernel> |
368 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {} |
369 | }; |
370 | |
371 | template <> |
372 | struct unaligned_dense_assignment_loop<false> |
373 | { |
374 | // MSVC must not inline this functions. If it does, it fails to optimize the |
375 | // packet access path. |
376 | // FIXME check which version exhibits this issue |
377 | #if EIGEN_COMP_MSVC |
378 | template <typename Kernel> |
379 | static EIGEN_DONT_INLINE void run(Kernel &kernel, |
380 | Index start, |
381 | Index end) |
382 | #else |
383 | template <typename Kernel> |
384 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, |
385 | Index start, |
386 | Index end) |
387 | #endif |
388 | { |
389 | for (Index index = start; index < end; ++index) |
390 | kernel.assignCoeff(index); |
391 | } |
392 | }; |
393 | |
394 | template<typename Kernel> |
395 | struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling> |
396 | { |
397 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
398 | { |
399 | const Index size = kernel.size(); |
400 | typedef typename Kernel::Scalar Scalar; |
401 | typedef typename Kernel::PacketType PacketType; |
402 | enum { |
403 | requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment, |
404 | packetSize = unpacket_traits<PacketType>::size, |
405 | dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), |
406 | dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment) |
407 | : int(Kernel::AssignmentTraits::DstAlignment), |
408 | srcAlignment = Kernel::AssignmentTraits::JointAlignment |
409 | }; |
410 | const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size); |
411 | const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; |
412 | |
413 | unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart); |
414 | |
415 | for(Index index = alignedStart; index < alignedEnd; index += packetSize) |
416 | kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index); |
417 | |
418 | unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size); |
419 | } |
420 | }; |
421 | |
422 | template<typename Kernel> |
423 | struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling> |
424 | { |
425 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
426 | { |
427 | typedef typename Kernel::DstEvaluatorType::XprType DstXprType; |
428 | typedef typename Kernel::PacketType PacketType; |
429 | |
430 | enum { size = DstXprType::SizeAtCompileTime, |
431 | packetSize =unpacket_traits<PacketType>::size, |
432 | alignedSize = (size/packetSize)*packetSize }; |
433 | |
434 | copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel); |
435 | copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel); |
436 | } |
437 | }; |
438 | |
439 | /************************** |
440 | *** Inner vectorization *** |
441 | **************************/ |
442 | |
443 | template<typename Kernel> |
444 | struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling> |
445 | { |
446 | typedef typename Kernel::PacketType PacketType; |
447 | enum { |
448 | SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, |
449 | DstAlignment = Kernel::AssignmentTraits::DstAlignment |
450 | }; |
451 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
452 | { |
453 | const Index innerSize = kernel.innerSize(); |
454 | const Index outerSize = kernel.outerSize(); |
455 | const Index packetSize = unpacket_traits<PacketType>::size; |
456 | for(Index outer = 0; outer < outerSize; ++outer) |
457 | for(Index inner = 0; inner < innerSize; inner+=packetSize) |
458 | kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner); |
459 | } |
460 | }; |
461 | |
462 | template<typename Kernel> |
463 | struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling> |
464 | { |
465 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
466 | { |
467 | typedef typename Kernel::DstEvaluatorType::XprType DstXprType; |
468 | copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); |
469 | } |
470 | }; |
471 | |
472 | template<typename Kernel> |
473 | struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling> |
474 | { |
475 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
476 | { |
477 | typedef typename Kernel::DstEvaluatorType::XprType DstXprType; |
478 | typedef typename Kernel::AssignmentTraits Traits; |
479 | const Index outerSize = kernel.outerSize(); |
480 | for(Index outer = 0; outer < outerSize; ++outer) |
481 | copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime, |
482 | Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer); |
483 | } |
484 | }; |
485 | |
486 | /*********************** |
487 | *** Linear traversal *** |
488 | ***********************/ |
489 | |
490 | template<typename Kernel> |
491 | struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling> |
492 | { |
493 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
494 | { |
495 | const Index size = kernel.size(); |
496 | for(Index i = 0; i < size; ++i) |
497 | kernel.assignCoeff(i); |
498 | } |
499 | }; |
500 | |
501 | template<typename Kernel> |
502 | struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling> |
503 | { |
504 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
505 | { |
506 | typedef typename Kernel::DstEvaluatorType::XprType DstXprType; |
507 | copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); |
508 | } |
509 | }; |
510 | |
511 | /************************** |
512 | *** Slice vectorization *** |
513 | ***************************/ |
514 | |
515 | template<typename Kernel> |
516 | struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling> |
517 | { |
518 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
519 | { |
520 | typedef typename Kernel::Scalar Scalar; |
521 | typedef typename Kernel::PacketType PacketType; |
522 | enum { |
523 | packetSize = unpacket_traits<PacketType>::size, |
524 | requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment), |
525 | alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar), |
526 | dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), |
527 | dstAlignment = alignable ? int(requestedAlignment) |
528 | : int(Kernel::AssignmentTraits::DstAlignment) |
529 | }; |
530 | const Scalar *dst_ptr = kernel.dstDataPtr(); |
531 | if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0) |
532 | { |
533 | // the pointer is not aligend-on scalar, so alignment is not possible |
534 | return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel); |
535 | } |
536 | const Index packetAlignedMask = packetSize - 1; |
537 | const Index innerSize = kernel.innerSize(); |
538 | const Index outerSize = kernel.outerSize(); |
539 | const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0; |
540 | Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize); |
541 | |
542 | for(Index outer = 0; outer < outerSize; ++outer) |
543 | { |
544 | const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); |
545 | // do the non-vectorizable part of the assignment |
546 | for(Index inner = 0; inner<alignedStart ; ++inner) |
547 | kernel.assignCoeffByOuterInner(outer, inner); |
548 | |
549 | // do the vectorizable part of the assignment |
550 | for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize) |
551 | kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner); |
552 | |
553 | // do the non-vectorizable part of the assignment |
554 | for(Index inner = alignedEnd; inner<innerSize ; ++inner) |
555 | kernel.assignCoeffByOuterInner(outer, inner); |
556 | |
557 | alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize); |
558 | } |
559 | } |
560 | }; |
561 | |
562 | #if EIGEN_UNALIGNED_VECTORIZE |
563 | template<typename Kernel> |
564 | struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling> |
565 | { |
566 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
567 | { |
568 | typedef typename Kernel::DstEvaluatorType::XprType DstXprType; |
569 | typedef typename Kernel::PacketType PacketType; |
570 | |
571 | enum { size = DstXprType::InnerSizeAtCompileTime, |
572 | packetSize =unpacket_traits<PacketType>::size, |
573 | vectorizableSize = (size/packetSize)*packetSize }; |
574 | |
575 | for(Index outer = 0; outer < kernel.outerSize(); ++outer) |
576 | { |
577 | copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer); |
578 | copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer); |
579 | } |
580 | } |
581 | }; |
582 | #endif |
583 | |
584 | |
585 | /*************************************************************************** |
586 | * Part 4 : Generic dense assignment kernel |
587 | ***************************************************************************/ |
588 | |
589 | // This class generalize the assignment of a coefficient (or packet) from one dense evaluator |
590 | // to another dense writable evaluator. |
591 | // It is parametrized by the two evaluators, and the actual assignment functor. |
592 | // This abstraction level permits to keep the evaluation loops as simple and as generic as possible. |
593 | // One can customize the assignment using this generic dense_assignment_kernel with different |
594 | // functors, or by completely overloading it, by-passing a functor. |
595 | template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized> |
596 | class generic_dense_assignment_kernel |
597 | { |
598 | protected: |
599 | typedef typename DstEvaluatorTypeT::XprType DstXprType; |
600 | typedef typename SrcEvaluatorTypeT::XprType SrcXprType; |
601 | public: |
602 | |
603 | typedef DstEvaluatorTypeT DstEvaluatorType; |
604 | typedef SrcEvaluatorTypeT SrcEvaluatorType; |
605 | typedef typename DstEvaluatorType::Scalar Scalar; |
606 | typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits; |
607 | typedef typename AssignmentTraits::PacketType PacketType; |
608 | |
609 | |
610 | EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) |
611 | : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) |
612 | { |
613 | #ifdef EIGEN_DEBUG_ASSIGN |
614 | AssignmentTraits::debug(); |
615 | #endif |
616 | } |
617 | |
618 | EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); } |
619 | EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); } |
620 | EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); } |
621 | EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); } |
622 | EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); } |
623 | EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); } |
624 | |
625 | EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; } |
626 | EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; } |
627 | |
628 | /// Assign src(row,col) to dst(row,col) through the assignment functor. |
629 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col) |
630 | { |
631 | m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col)); |
632 | } |
633 | |
634 | /// \sa assignCoeff(Index,Index) |
635 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) |
636 | { |
637 | m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); |
638 | } |
639 | |
640 | /// \sa assignCoeff(Index,Index) |
641 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) |
642 | { |
643 | Index row = rowIndexByOuterInner(outer, inner); |
644 | Index col = colIndexByOuterInner(outer, inner); |
645 | assignCoeff(row, col); |
646 | } |
647 | |
648 | |
649 | template<int StoreMode, int LoadMode, typename PacketType> |
650 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) |
651 | { |
652 | m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col)); |
653 | } |
654 | |
655 | template<int StoreMode, int LoadMode, typename PacketType> |
656 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) |
657 | { |
658 | m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index)); |
659 | } |
660 | |
661 | template<int StoreMode, int LoadMode, typename PacketType> |
662 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) |
663 | { |
664 | Index row = rowIndexByOuterInner(outer, inner); |
665 | Index col = colIndexByOuterInner(outer, inner); |
666 | assignPacket<StoreMode,LoadMode,PacketType>(row, col); |
667 | } |
668 | |
669 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) |
670 | { |
671 | typedef typename DstEvaluatorType::ExpressionTraits Traits; |
672 | return int(Traits::RowsAtCompileTime) == 1 ? 0 |
673 | : int(Traits::ColsAtCompileTime) == 1 ? inner |
674 | : int(DstEvaluatorType::Flags)&RowMajorBit ? outer |
675 | : inner; |
676 | } |
677 | |
678 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) |
679 | { |
680 | typedef typename DstEvaluatorType::ExpressionTraits Traits; |
681 | return int(Traits::ColsAtCompileTime) == 1 ? 0 |
682 | : int(Traits::RowsAtCompileTime) == 1 ? inner |
683 | : int(DstEvaluatorType::Flags)&RowMajorBit ? inner |
684 | : outer; |
685 | } |
686 | |
687 | EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const |
688 | { |
689 | return m_dstExpr.data(); |
690 | } |
691 | |
692 | protected: |
693 | DstEvaluatorType& m_dst; |
694 | const SrcEvaluatorType& m_src; |
695 | const Functor &m_functor; |
696 | // TODO find a way to avoid the needs of the original expression |
697 | DstXprType& m_dstExpr; |
698 | }; |
699 | |
700 | /*************************************************************************** |
701 | * Part 5 : Entry point for dense rectangular assignment |
702 | ***************************************************************************/ |
703 | |
704 | template<typename DstXprType,typename SrcXprType, typename Functor> |
705 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
706 | void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/) |
707 | { |
708 | EIGEN_ONLY_USED_FOR_DEBUG(dst); |
709 | EIGEN_ONLY_USED_FOR_DEBUG(src); |
710 | eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); |
711 | } |
712 | |
713 | template<typename DstXprType,typename SrcXprType, typename T1, typename T2> |
714 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
715 | void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/) |
716 | { |
717 | Index dstRows = src.rows(); |
718 | Index dstCols = src.cols(); |
719 | if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols))) |
720 | dst.resize(dstRows, dstCols); |
721 | eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols); |
722 | } |
723 | |
724 | template<typename DstXprType, typename SrcXprType, typename Functor> |
725 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func) |
726 | { |
727 | typedef evaluator<DstXprType> DstEvaluatorType; |
728 | typedef evaluator<SrcXprType> SrcEvaluatorType; |
729 | |
730 | SrcEvaluatorType srcEvaluator(src); |
731 | |
732 | // NOTE To properly handle A = (A*A.transpose())/s with A rectangular, |
733 | // we need to resize the destination after the source evaluator has been created. |
734 | resize_if_allowed(dst, src, func); |
735 | |
736 | DstEvaluatorType dstEvaluator(dst); |
737 | |
738 | typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel; |
739 | Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); |
740 | |
741 | dense_assignment_loop<Kernel>::run(kernel); |
742 | } |
743 | |
744 | template<typename DstXprType, typename SrcXprType> |
745 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) |
746 | { |
747 | call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>()); |
748 | } |
749 | |
750 | /*************************************************************************** |
751 | * Part 6 : Generic assignment |
752 | ***************************************************************************/ |
753 | |
754 | // Based on the respective shapes of the destination and source, |
755 | // the class AssignmentKind determine the kind of assignment mechanism. |
756 | // AssignmentKind must define a Kind typedef. |
757 | template<typename DstShape, typename SrcShape> struct AssignmentKind; |
758 | |
759 | // Assignement kind defined in this file: |
760 | struct Dense2Dense {}; |
761 | struct EigenBase2EigenBase {}; |
762 | |
763 | template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; |
764 | template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; }; |
765 | |
766 | // This is the main assignment class |
767 | template< typename DstXprType, typename SrcXprType, typename Functor, |
768 | typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind, |
769 | typename EnableIf = void> |
770 | struct Assignment; |
771 | |
772 | |
773 | // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition. |
774 | // Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated. |
775 | // So this intermediate function removes everything related to "assume-aliasing" such that Assignment |
776 | // does not has to bother about these annoying details. |
777 | |
778 | template<typename Dst, typename Src> |
779 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
780 | void call_assignment(Dst& dst, const Src& src) |
781 | { |
782 | call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>()); |
783 | } |
784 | template<typename Dst, typename Src> |
785 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
786 | void call_assignment(const Dst& dst, const Src& src) |
787 | { |
788 | call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>()); |
789 | } |
790 | |
791 | // Deal with "assume-aliasing" |
792 | template<typename Dst, typename Src, typename Func> |
793 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
794 | void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0) |
795 | { |
796 | typename plain_matrix_type<Src>::type tmp(src); |
797 | call_assignment_no_alias(dst, tmp, func); |
798 | } |
799 | |
800 | template<typename Dst, typename Src, typename Func> |
801 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
802 | void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0) |
803 | { |
804 | call_assignment_no_alias(dst, src, func); |
805 | } |
806 | |
807 | // by-pass "assume-aliasing" |
808 | // When there is no aliasing, we require that 'dst' has been properly resized |
809 | template<typename Dst, template <typename> class StorageBase, typename Src, typename Func> |
810 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
811 | void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func) |
812 | { |
813 | call_assignment_no_alias(dst.expression(), src, func); |
814 | } |
815 | |
816 | |
817 | template<typename Dst, typename Src, typename Func> |
818 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
819 | void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) |
820 | { |
821 | enum { |
822 | NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) |
823 | || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1) |
824 | ) && int(Dst::SizeAtCompileTime) != 1 |
825 | }; |
826 | |
827 | typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned; |
828 | typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType; |
829 | ActualDstType actualDst(dst); |
830 | |
831 | // TODO check whether this is the right place to perform these checks: |
832 | EIGEN_STATIC_ASSERT_LVALUE(Dst) |
833 | EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src) |
834 | EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar); |
835 | |
836 | Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func); |
837 | } |
838 | template<typename Dst, typename Src> |
839 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
840 | void call_assignment_no_alias(Dst& dst, const Src& src) |
841 | { |
842 | call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>()); |
843 | } |
844 | |
845 | template<typename Dst, typename Src, typename Func> |
846 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
847 | void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func) |
848 | { |
849 | // TODO check whether this is the right place to perform these checks: |
850 | EIGEN_STATIC_ASSERT_LVALUE(Dst) |
851 | EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src) |
852 | EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar); |
853 | |
854 | Assignment<Dst,Src,Func>::run(dst, src, func); |
855 | } |
856 | template<typename Dst, typename Src> |
857 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
858 | void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) |
859 | { |
860 | call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>()); |
861 | } |
862 | |
863 | // forward declaration |
864 | template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src); |
865 | |
866 | // Generic Dense to Dense assignment |
867 | // Note that the last template argument "Weak" is needed to make it possible to perform |
868 | // both partial specialization+SFINAE without ambiguous specialization |
869 | template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> |
870 | struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak> |
871 | { |
872 | EIGEN_DEVICE_FUNC |
873 | static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func) |
874 | { |
875 | #ifndef EIGEN_NO_DEBUG |
876 | internal::check_for_aliasing(dst, src); |
877 | #endif |
878 | |
879 | call_dense_assignment_loop(dst, src, func); |
880 | } |
881 | }; |
882 | |
883 | // Generic assignment through evalTo. |
884 | // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism. |
885 | // Note that the last template argument "Weak" is needed to make it possible to perform |
886 | // both partial specialization+SFINAE without ambiguous specialization |
887 | template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> |
888 | struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak> |
889 | { |
890 | EIGEN_DEVICE_FUNC |
891 | static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/) |
892 | { |
893 | Index dstRows = src.rows(); |
894 | Index dstCols = src.cols(); |
895 | if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) |
896 | dst.resize(dstRows, dstCols); |
897 | |
898 | eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); |
899 | src.evalTo(dst); |
900 | } |
901 | |
902 | // NOTE The following two functions are templated to avoid their instanciation if not needed |
903 | // This is needed because some expressions supports evalTo only and/or have 'void' as scalar type. |
904 | template<typename SrcScalarType> |
905 | EIGEN_DEVICE_FUNC |
906 | static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/) |
907 | { |
908 | Index dstRows = src.rows(); |
909 | Index dstCols = src.cols(); |
910 | if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) |
911 | dst.resize(dstRows, dstCols); |
912 | |
913 | eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); |
914 | src.addTo(dst); |
915 | } |
916 | |
917 | template<typename SrcScalarType> |
918 | EIGEN_DEVICE_FUNC |
919 | static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/) |
920 | { |
921 | Index dstRows = src.rows(); |
922 | Index dstCols = src.cols(); |
923 | if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) |
924 | dst.resize(dstRows, dstCols); |
925 | |
926 | eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); |
927 | src.subTo(dst); |
928 | } |
929 | }; |
930 | |
931 | } // namespace internal |
932 | |
933 | } // end namespace Eigen |
934 | |
935 | #endif // EIGEN_ASSIGN_EVALUATOR_H |
936 | |