| 1 | // This file is part of Eigen, a lightweight C++ template library |
| 2 | // for linear algebra. |
| 3 | // |
| 4 | // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com> |
| 5 | // Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr> |
| 6 | // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk> |
| 7 | // |
| 8 | // This Source Code Form is subject to the terms of the Mozilla |
| 9 | // Public License v. 2.0. If a copy of the MPL was not distributed |
| 10 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 11 | |
| 12 | #ifndef EIGEN_ASSIGN_EVALUATOR_H |
| 13 | #define EIGEN_ASSIGN_EVALUATOR_H |
| 14 | |
| 15 | namespace Eigen { |
| 16 | |
| 17 | // This implementation is based on Assign.h |
| 18 | |
| 19 | namespace internal { |
| 20 | |
| 21 | /*************************************************************************** |
| 22 | * Part 1 : the logic deciding a strategy for traversal and unrolling * |
| 23 | ***************************************************************************/ |
| 24 | |
| 25 | // copy_using_evaluator_traits is based on assign_traits |
| 26 | |
| 27 | template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc> |
| 28 | struct copy_using_evaluator_traits |
| 29 | { |
| 30 | typedef typename DstEvaluator::XprType Dst; |
| 31 | typedef typename Dst::Scalar DstScalar; |
| 32 | |
| 33 | enum { |
| 34 | DstFlags = DstEvaluator::Flags, |
| 35 | SrcFlags = SrcEvaluator::Flags |
| 36 | }; |
| 37 | |
| 38 | public: |
| 39 | enum { |
| 40 | DstAlignment = DstEvaluator::Alignment, |
| 41 | SrcAlignment = SrcEvaluator::Alignment, |
| 42 | DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit, |
| 43 | JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment) |
| 44 | }; |
| 45 | |
| 46 | private: |
| 47 | enum { |
| 48 | InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) |
| 49 | : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime) |
| 50 | : int(Dst::RowsAtCompileTime), |
| 51 | InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) |
| 52 | : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) |
| 53 | : int(Dst::MaxRowsAtCompileTime), |
| 54 | OuterStride = int(outer_stride_at_compile_time<Dst>::ret), |
| 55 | MaxSizeAtCompileTime = Dst::SizeAtCompileTime |
| 56 | }; |
| 57 | |
| 58 | // TODO distinguish between linear traversal and inner-traversals |
| 59 | typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType; |
| 60 | typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType; |
| 61 | |
| 62 | enum { |
| 63 | LinearPacketSize = unpacket_traits<LinearPacketType>::size, |
| 64 | InnerPacketSize = unpacket_traits<InnerPacketType>::size |
| 65 | }; |
| 66 | |
| 67 | public: |
| 68 | enum { |
| 69 | LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment, |
| 70 | InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment |
| 71 | }; |
| 72 | |
| 73 | private: |
| 74 | enum { |
| 75 | DstIsRowMajor = DstFlags&RowMajorBit, |
| 76 | SrcIsRowMajor = SrcFlags&RowMajorBit, |
| 77 | StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)), |
| 78 | MightVectorize = bool(StorageOrdersAgree) |
| 79 | && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) |
| 80 | && bool(functor_traits<AssignFunc>::PacketAccess), |
| 81 | MayInnerVectorize = MightVectorize |
| 82 | && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0 |
| 83 | && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0 |
| 84 | && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)), |
| 85 | MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), |
| 86 | MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess) |
| 87 | && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), |
| 88 | /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, |
| 89 | so it's only good for large enough sizes. */ |
| 90 | MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess) |
| 91 | && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize))) |
| 92 | /* slice vectorization can be slow, so we only want it if the slices are big, which is |
| 93 | indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block |
| 94 | in a fixed-size matrix |
| 95 | However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */ |
| 96 | }; |
| 97 | |
| 98 | public: |
| 99 | enum { |
| 100 | Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal) |
| 101 | : int(MayInnerVectorize) ? int(InnerVectorizedTraversal) |
| 102 | : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) |
| 103 | : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) |
| 104 | : int(MayLinearize) ? int(LinearTraversal) |
| 105 | : int(DefaultTraversal), |
| 106 | Vectorized = int(Traversal) == InnerVectorizedTraversal |
| 107 | || int(Traversal) == LinearVectorizedTraversal |
| 108 | || int(Traversal) == SliceVectorizedTraversal |
| 109 | }; |
| 110 | |
| 111 | typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType; |
| 112 | |
| 113 | private: |
| 114 | enum { |
| 115 | ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize |
| 116 | : Vectorized ? InnerPacketSize |
| 117 | : 1, |
| 118 | UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize, |
| 119 | MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic |
| 120 | && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit), |
| 121 | MayUnrollInner = int(InnerSize) != Dynamic |
| 122 | && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit) |
| 123 | }; |
| 124 | |
| 125 | public: |
| 126 | enum { |
| 127 | Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) |
| 128 | ? ( |
| 129 | int(MayUnrollCompletely) ? int(CompleteUnrolling) |
| 130 | : int(MayUnrollInner) ? int(InnerUnrolling) |
| 131 | : int(NoUnrolling) |
| 132 | ) |
| 133 | : int(Traversal) == int(LinearVectorizedTraversal) |
| 134 | ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment))) |
| 135 | ? int(CompleteUnrolling) |
| 136 | : int(NoUnrolling) ) |
| 137 | : int(Traversal) == int(LinearTraversal) |
| 138 | ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) |
| 139 | : int(NoUnrolling) ) |
| 140 | #if EIGEN_UNALIGNED_VECTORIZE |
| 141 | : int(Traversal) == int(SliceVectorizedTraversal) |
| 142 | ? ( bool(MayUnrollInner) ? int(InnerUnrolling) |
| 143 | : int(NoUnrolling) ) |
| 144 | #endif |
| 145 | : int(NoUnrolling) |
| 146 | }; |
| 147 | |
| 148 | #ifdef EIGEN_DEBUG_ASSIGN |
| 149 | static void debug() |
| 150 | { |
| 151 | std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl; |
| 152 | std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl; |
| 153 | std::cerr.setf(std::ios::hex, std::ios::basefield); |
| 154 | std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl; |
| 155 | std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl; |
| 156 | std::cerr.unsetf(std::ios::hex); |
| 157 | EIGEN_DEBUG_VAR(DstAlignment) |
| 158 | EIGEN_DEBUG_VAR(SrcAlignment) |
| 159 | EIGEN_DEBUG_VAR(LinearRequiredAlignment) |
| 160 | EIGEN_DEBUG_VAR(InnerRequiredAlignment) |
| 161 | EIGEN_DEBUG_VAR(JointAlignment) |
| 162 | EIGEN_DEBUG_VAR(InnerSize) |
| 163 | EIGEN_DEBUG_VAR(InnerMaxSize) |
| 164 | EIGEN_DEBUG_VAR(LinearPacketSize) |
| 165 | EIGEN_DEBUG_VAR(InnerPacketSize) |
| 166 | EIGEN_DEBUG_VAR(ActualPacketSize) |
| 167 | EIGEN_DEBUG_VAR(StorageOrdersAgree) |
| 168 | EIGEN_DEBUG_VAR(MightVectorize) |
| 169 | EIGEN_DEBUG_VAR(MayLinearize) |
| 170 | EIGEN_DEBUG_VAR(MayInnerVectorize) |
| 171 | EIGEN_DEBUG_VAR(MayLinearVectorize) |
| 172 | EIGEN_DEBUG_VAR(MaySliceVectorize) |
| 173 | std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; |
| 174 | EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost) |
| 175 | EIGEN_DEBUG_VAR(UnrollingLimit) |
| 176 | EIGEN_DEBUG_VAR(MayUnrollCompletely) |
| 177 | EIGEN_DEBUG_VAR(MayUnrollInner) |
| 178 | std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl; |
| 179 | std::cerr << std::endl; |
| 180 | } |
| 181 | #endif |
| 182 | }; |
| 183 | |
| 184 | /*************************************************************************** |
| 185 | * Part 2 : meta-unrollers |
| 186 | ***************************************************************************/ |
| 187 | |
| 188 | /************************ |
| 189 | *** Default traversal *** |
| 190 | ************************/ |
| 191 | |
| 192 | template<typename Kernel, int Index, int Stop> |
| 193 | struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling |
| 194 | { |
| 195 | // FIXME: this is not very clean, perhaps this information should be provided by the kernel? |
| 196 | typedef typename Kernel::DstEvaluatorType DstEvaluatorType; |
| 197 | typedef typename DstEvaluatorType::XprType DstXprType; |
| 198 | |
| 199 | enum { |
| 200 | outer = Index / DstXprType::InnerSizeAtCompileTime, |
| 201 | inner = Index % DstXprType::InnerSizeAtCompileTime |
| 202 | }; |
| 203 | |
| 204 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
| 205 | { |
| 206 | kernel.assignCoeffByOuterInner(outer, inner); |
| 207 | copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel); |
| 208 | } |
| 209 | }; |
| 210 | |
| 211 | template<typename Kernel, int Stop> |
| 212 | struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop> |
| 213 | { |
| 214 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } |
| 215 | }; |
| 216 | |
| 217 | template<typename Kernel, int Index_, int Stop> |
| 218 | struct copy_using_evaluator_DefaultTraversal_InnerUnrolling |
| 219 | { |
| 220 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) |
| 221 | { |
| 222 | kernel.assignCoeffByOuterInner(outer, Index_); |
| 223 | copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer); |
| 224 | } |
| 225 | }; |
| 226 | |
| 227 | template<typename Kernel, int Stop> |
| 228 | struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop> |
| 229 | { |
| 230 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { } |
| 231 | }; |
| 232 | |
| 233 | /*********************** |
| 234 | *** Linear traversal *** |
| 235 | ***********************/ |
| 236 | |
| 237 | template<typename Kernel, int Index, int Stop> |
| 238 | struct copy_using_evaluator_LinearTraversal_CompleteUnrolling |
| 239 | { |
| 240 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) |
| 241 | { |
| 242 | kernel.assignCoeff(Index); |
| 243 | copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel); |
| 244 | } |
| 245 | }; |
| 246 | |
| 247 | template<typename Kernel, int Stop> |
| 248 | struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop> |
| 249 | { |
| 250 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } |
| 251 | }; |
| 252 | |
| 253 | /************************** |
| 254 | *** Inner vectorization *** |
| 255 | **************************/ |
| 256 | |
| 257 | template<typename Kernel, int Index, int Stop> |
| 258 | struct copy_using_evaluator_innervec_CompleteUnrolling |
| 259 | { |
| 260 | // FIXME: this is not very clean, perhaps this information should be provided by the kernel? |
| 261 | typedef typename Kernel::DstEvaluatorType DstEvaluatorType; |
| 262 | typedef typename DstEvaluatorType::XprType DstXprType; |
| 263 | typedef typename Kernel::PacketType PacketType; |
| 264 | |
| 265 | enum { |
| 266 | outer = Index / DstXprType::InnerSizeAtCompileTime, |
| 267 | inner = Index % DstXprType::InnerSizeAtCompileTime, |
| 268 | SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, |
| 269 | DstAlignment = Kernel::AssignmentTraits::DstAlignment |
| 270 | }; |
| 271 | |
| 272 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
| 273 | { |
| 274 | kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner); |
| 275 | enum { NextIndex = Index + unpacket_traits<PacketType>::size }; |
| 276 | copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel); |
| 277 | } |
| 278 | }; |
| 279 | |
| 280 | template<typename Kernel, int Stop> |
| 281 | struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop> |
| 282 | { |
| 283 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } |
| 284 | }; |
| 285 | |
| 286 | template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment> |
| 287 | struct copy_using_evaluator_innervec_InnerUnrolling |
| 288 | { |
| 289 | typedef typename Kernel::PacketType PacketType; |
| 290 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) |
| 291 | { |
| 292 | kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_); |
| 293 | enum { NextIndex = Index_ + unpacket_traits<PacketType>::size }; |
| 294 | copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer); |
| 295 | } |
| 296 | }; |
| 297 | |
| 298 | template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment> |
| 299 | struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment> |
| 300 | { |
| 301 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { } |
| 302 | }; |
| 303 | |
| 304 | /*************************************************************************** |
| 305 | * Part 3 : implementation of all cases |
| 306 | ***************************************************************************/ |
| 307 | |
| 308 | // dense_assignment_loop is based on assign_impl |
| 309 | |
| 310 | template<typename Kernel, |
| 311 | int Traversal = Kernel::AssignmentTraits::Traversal, |
| 312 | int Unrolling = Kernel::AssignmentTraits::Unrolling> |
| 313 | struct dense_assignment_loop; |
| 314 | |
| 315 | /************************ |
| 316 | *** Default traversal *** |
| 317 | ************************/ |
| 318 | |
| 319 | template<typename Kernel> |
| 320 | struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling> |
| 321 | { |
| 322 | EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel) |
| 323 | { |
| 324 | for(Index outer = 0; outer < kernel.outerSize(); ++outer) { |
| 325 | for(Index inner = 0; inner < kernel.innerSize(); ++inner) { |
| 326 | kernel.assignCoeffByOuterInner(outer, inner); |
| 327 | } |
| 328 | } |
| 329 | } |
| 330 | }; |
| 331 | |
| 332 | template<typename Kernel> |
| 333 | struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling> |
| 334 | { |
| 335 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
| 336 | { |
| 337 | typedef typename Kernel::DstEvaluatorType::XprType DstXprType; |
| 338 | copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); |
| 339 | } |
| 340 | }; |
| 341 | |
| 342 | template<typename Kernel> |
| 343 | struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling> |
| 344 | { |
| 345 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
| 346 | { |
| 347 | typedef typename Kernel::DstEvaluatorType::XprType DstXprType; |
| 348 | |
| 349 | const Index outerSize = kernel.outerSize(); |
| 350 | for(Index outer = 0; outer < outerSize; ++outer) |
| 351 | copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer); |
| 352 | } |
| 353 | }; |
| 354 | |
| 355 | /*************************** |
| 356 | *** Linear vectorization *** |
| 357 | ***************************/ |
| 358 | |
| 359 | |
| 360 | // The goal of unaligned_dense_assignment_loop is simply to factorize the handling |
| 361 | // of the non vectorizable beginning and ending parts |
| 362 | |
| 363 | template <bool IsAligned = false> |
| 364 | struct unaligned_dense_assignment_loop |
| 365 | { |
| 366 | // if IsAligned = true, then do nothing |
| 367 | template <typename Kernel> |
| 368 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {} |
| 369 | }; |
| 370 | |
| 371 | template <> |
| 372 | struct unaligned_dense_assignment_loop<false> |
| 373 | { |
| 374 | // MSVC must not inline this functions. If it does, it fails to optimize the |
| 375 | // packet access path. |
| 376 | // FIXME check which version exhibits this issue |
| 377 | #if EIGEN_COMP_MSVC |
| 378 | template <typename Kernel> |
| 379 | static EIGEN_DONT_INLINE void run(Kernel &kernel, |
| 380 | Index start, |
| 381 | Index end) |
| 382 | #else |
| 383 | template <typename Kernel> |
| 384 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, |
| 385 | Index start, |
| 386 | Index end) |
| 387 | #endif |
| 388 | { |
| 389 | for (Index index = start; index < end; ++index) |
| 390 | kernel.assignCoeff(index); |
| 391 | } |
| 392 | }; |
| 393 | |
| 394 | template<typename Kernel> |
| 395 | struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling> |
| 396 | { |
| 397 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
| 398 | { |
| 399 | const Index size = kernel.size(); |
| 400 | typedef typename Kernel::Scalar Scalar; |
| 401 | typedef typename Kernel::PacketType PacketType; |
| 402 | enum { |
| 403 | requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment, |
| 404 | packetSize = unpacket_traits<PacketType>::size, |
| 405 | dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), |
| 406 | dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment) |
| 407 | : int(Kernel::AssignmentTraits::DstAlignment), |
| 408 | srcAlignment = Kernel::AssignmentTraits::JointAlignment |
| 409 | }; |
| 410 | const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size); |
| 411 | const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; |
| 412 | |
| 413 | unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart); |
| 414 | |
| 415 | for(Index index = alignedStart; index < alignedEnd; index += packetSize) |
| 416 | kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index); |
| 417 | |
| 418 | unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size); |
| 419 | } |
| 420 | }; |
| 421 | |
| 422 | template<typename Kernel> |
| 423 | struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling> |
| 424 | { |
| 425 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
| 426 | { |
| 427 | typedef typename Kernel::DstEvaluatorType::XprType DstXprType; |
| 428 | typedef typename Kernel::PacketType PacketType; |
| 429 | |
| 430 | enum { size = DstXprType::SizeAtCompileTime, |
| 431 | packetSize =unpacket_traits<PacketType>::size, |
| 432 | alignedSize = (size/packetSize)*packetSize }; |
| 433 | |
| 434 | copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel); |
| 435 | copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel); |
| 436 | } |
| 437 | }; |
| 438 | |
| 439 | /************************** |
| 440 | *** Inner vectorization *** |
| 441 | **************************/ |
| 442 | |
| 443 | template<typename Kernel> |
| 444 | struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling> |
| 445 | { |
| 446 | typedef typename Kernel::PacketType PacketType; |
| 447 | enum { |
| 448 | SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, |
| 449 | DstAlignment = Kernel::AssignmentTraits::DstAlignment |
| 450 | }; |
| 451 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
| 452 | { |
| 453 | const Index innerSize = kernel.innerSize(); |
| 454 | const Index outerSize = kernel.outerSize(); |
| 455 | const Index packetSize = unpacket_traits<PacketType>::size; |
| 456 | for(Index outer = 0; outer < outerSize; ++outer) |
| 457 | for(Index inner = 0; inner < innerSize; inner+=packetSize) |
| 458 | kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner); |
| 459 | } |
| 460 | }; |
| 461 | |
| 462 | template<typename Kernel> |
| 463 | struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling> |
| 464 | { |
| 465 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
| 466 | { |
| 467 | typedef typename Kernel::DstEvaluatorType::XprType DstXprType; |
| 468 | copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); |
| 469 | } |
| 470 | }; |
| 471 | |
| 472 | template<typename Kernel> |
| 473 | struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling> |
| 474 | { |
| 475 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
| 476 | { |
| 477 | typedef typename Kernel::DstEvaluatorType::XprType DstXprType; |
| 478 | typedef typename Kernel::AssignmentTraits Traits; |
| 479 | const Index outerSize = kernel.outerSize(); |
| 480 | for(Index outer = 0; outer < outerSize; ++outer) |
| 481 | copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime, |
| 482 | Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer); |
| 483 | } |
| 484 | }; |
| 485 | |
| 486 | /*********************** |
| 487 | *** Linear traversal *** |
| 488 | ***********************/ |
| 489 | |
| 490 | template<typename Kernel> |
| 491 | struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling> |
| 492 | { |
| 493 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
| 494 | { |
| 495 | const Index size = kernel.size(); |
| 496 | for(Index i = 0; i < size; ++i) |
| 497 | kernel.assignCoeff(i); |
| 498 | } |
| 499 | }; |
| 500 | |
| 501 | template<typename Kernel> |
| 502 | struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling> |
| 503 | { |
| 504 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
| 505 | { |
| 506 | typedef typename Kernel::DstEvaluatorType::XprType DstXprType; |
| 507 | copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); |
| 508 | } |
| 509 | }; |
| 510 | |
| 511 | /************************** |
| 512 | *** Slice vectorization *** |
| 513 | ***************************/ |
| 514 | |
| 515 | template<typename Kernel> |
| 516 | struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling> |
| 517 | { |
| 518 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
| 519 | { |
| 520 | typedef typename Kernel::Scalar Scalar; |
| 521 | typedef typename Kernel::PacketType PacketType; |
| 522 | enum { |
| 523 | packetSize = unpacket_traits<PacketType>::size, |
| 524 | requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment), |
| 525 | alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar), |
| 526 | dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), |
| 527 | dstAlignment = alignable ? int(requestedAlignment) |
| 528 | : int(Kernel::AssignmentTraits::DstAlignment) |
| 529 | }; |
| 530 | const Scalar *dst_ptr = kernel.dstDataPtr(); |
| 531 | if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0) |
| 532 | { |
| 533 | // the pointer is not aligend-on scalar, so alignment is not possible |
| 534 | return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel); |
| 535 | } |
| 536 | const Index packetAlignedMask = packetSize - 1; |
| 537 | const Index innerSize = kernel.innerSize(); |
| 538 | const Index outerSize = kernel.outerSize(); |
| 539 | const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0; |
| 540 | Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize); |
| 541 | |
| 542 | for(Index outer = 0; outer < outerSize; ++outer) |
| 543 | { |
| 544 | const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); |
| 545 | // do the non-vectorizable part of the assignment |
| 546 | for(Index inner = 0; inner<alignedStart ; ++inner) |
| 547 | kernel.assignCoeffByOuterInner(outer, inner); |
| 548 | |
| 549 | // do the vectorizable part of the assignment |
| 550 | for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize) |
| 551 | kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner); |
| 552 | |
| 553 | // do the non-vectorizable part of the assignment |
| 554 | for(Index inner = alignedEnd; inner<innerSize ; ++inner) |
| 555 | kernel.assignCoeffByOuterInner(outer, inner); |
| 556 | |
| 557 | alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize); |
| 558 | } |
| 559 | } |
| 560 | }; |
| 561 | |
| 562 | #if EIGEN_UNALIGNED_VECTORIZE |
| 563 | template<typename Kernel> |
| 564 | struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling> |
| 565 | { |
| 566 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) |
| 567 | { |
| 568 | typedef typename Kernel::DstEvaluatorType::XprType DstXprType; |
| 569 | typedef typename Kernel::PacketType PacketType; |
| 570 | |
| 571 | enum { size = DstXprType::InnerSizeAtCompileTime, |
| 572 | packetSize =unpacket_traits<PacketType>::size, |
| 573 | vectorizableSize = (size/packetSize)*packetSize }; |
| 574 | |
| 575 | for(Index outer = 0; outer < kernel.outerSize(); ++outer) |
| 576 | { |
| 577 | copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer); |
| 578 | copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer); |
| 579 | } |
| 580 | } |
| 581 | }; |
| 582 | #endif |
| 583 | |
| 584 | |
| 585 | /*************************************************************************** |
| 586 | * Part 4 : Generic dense assignment kernel |
| 587 | ***************************************************************************/ |
| 588 | |
| 589 | // This class generalize the assignment of a coefficient (or packet) from one dense evaluator |
| 590 | // to another dense writable evaluator. |
| 591 | // It is parametrized by the two evaluators, and the actual assignment functor. |
| 592 | // This abstraction level permits to keep the evaluation loops as simple and as generic as possible. |
| 593 | // One can customize the assignment using this generic dense_assignment_kernel with different |
| 594 | // functors, or by completely overloading it, by-passing a functor. |
| 595 | template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized> |
| 596 | class generic_dense_assignment_kernel |
| 597 | { |
| 598 | protected: |
| 599 | typedef typename DstEvaluatorTypeT::XprType DstXprType; |
| 600 | typedef typename SrcEvaluatorTypeT::XprType SrcXprType; |
| 601 | public: |
| 602 | |
| 603 | typedef DstEvaluatorTypeT DstEvaluatorType; |
| 604 | typedef SrcEvaluatorTypeT SrcEvaluatorType; |
| 605 | typedef typename DstEvaluatorType::Scalar Scalar; |
| 606 | typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits; |
| 607 | typedef typename AssignmentTraits::PacketType PacketType; |
| 608 | |
| 609 | |
| 610 | EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) |
| 611 | : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) |
| 612 | { |
| 613 | #ifdef EIGEN_DEBUG_ASSIGN |
| 614 | AssignmentTraits::debug(); |
| 615 | #endif |
| 616 | } |
| 617 | |
| 618 | EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); } |
| 619 | EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); } |
| 620 | EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); } |
| 621 | EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); } |
| 622 | EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); } |
| 623 | EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); } |
| 624 | |
| 625 | EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; } |
| 626 | EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; } |
| 627 | |
| 628 | /// Assign src(row,col) to dst(row,col) through the assignment functor. |
| 629 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col) |
| 630 | { |
| 631 | m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col)); |
| 632 | } |
| 633 | |
| 634 | /// \sa assignCoeff(Index,Index) |
| 635 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) |
| 636 | { |
| 637 | m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); |
| 638 | } |
| 639 | |
| 640 | /// \sa assignCoeff(Index,Index) |
| 641 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) |
| 642 | { |
| 643 | Index row = rowIndexByOuterInner(outer, inner); |
| 644 | Index col = colIndexByOuterInner(outer, inner); |
| 645 | assignCoeff(row, col); |
| 646 | } |
| 647 | |
| 648 | |
| 649 | template<int StoreMode, int LoadMode, typename PacketType> |
| 650 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) |
| 651 | { |
| 652 | m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col)); |
| 653 | } |
| 654 | |
| 655 | template<int StoreMode, int LoadMode, typename PacketType> |
| 656 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) |
| 657 | { |
| 658 | m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index)); |
| 659 | } |
| 660 | |
| 661 | template<int StoreMode, int LoadMode, typename PacketType> |
| 662 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) |
| 663 | { |
| 664 | Index row = rowIndexByOuterInner(outer, inner); |
| 665 | Index col = colIndexByOuterInner(outer, inner); |
| 666 | assignPacket<StoreMode,LoadMode,PacketType>(row, col); |
| 667 | } |
| 668 | |
| 669 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) |
| 670 | { |
| 671 | typedef typename DstEvaluatorType::ExpressionTraits Traits; |
| 672 | return int(Traits::RowsAtCompileTime) == 1 ? 0 |
| 673 | : int(Traits::ColsAtCompileTime) == 1 ? inner |
| 674 | : int(DstEvaluatorType::Flags)&RowMajorBit ? outer |
| 675 | : inner; |
| 676 | } |
| 677 | |
| 678 | EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) |
| 679 | { |
| 680 | typedef typename DstEvaluatorType::ExpressionTraits Traits; |
| 681 | return int(Traits::ColsAtCompileTime) == 1 ? 0 |
| 682 | : int(Traits::RowsAtCompileTime) == 1 ? inner |
| 683 | : int(DstEvaluatorType::Flags)&RowMajorBit ? inner |
| 684 | : outer; |
| 685 | } |
| 686 | |
| 687 | EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const |
| 688 | { |
| 689 | return m_dstExpr.data(); |
| 690 | } |
| 691 | |
| 692 | protected: |
| 693 | DstEvaluatorType& m_dst; |
| 694 | const SrcEvaluatorType& m_src; |
| 695 | const Functor &m_functor; |
| 696 | // TODO find a way to avoid the needs of the original expression |
| 697 | DstXprType& m_dstExpr; |
| 698 | }; |
| 699 | |
| 700 | /*************************************************************************** |
| 701 | * Part 5 : Entry point for dense rectangular assignment |
| 702 | ***************************************************************************/ |
| 703 | |
| 704 | template<typename DstXprType,typename SrcXprType, typename Functor> |
| 705 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
| 706 | void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/) |
| 707 | { |
| 708 | EIGEN_ONLY_USED_FOR_DEBUG(dst); |
| 709 | EIGEN_ONLY_USED_FOR_DEBUG(src); |
| 710 | eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); |
| 711 | } |
| 712 | |
| 713 | template<typename DstXprType,typename SrcXprType, typename T1, typename T2> |
| 714 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
| 715 | void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/) |
| 716 | { |
| 717 | Index dstRows = src.rows(); |
| 718 | Index dstCols = src.cols(); |
| 719 | if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols))) |
| 720 | dst.resize(dstRows, dstCols); |
| 721 | eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols); |
| 722 | } |
| 723 | |
| 724 | template<typename DstXprType, typename SrcXprType, typename Functor> |
| 725 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func) |
| 726 | { |
| 727 | typedef evaluator<DstXprType> DstEvaluatorType; |
| 728 | typedef evaluator<SrcXprType> SrcEvaluatorType; |
| 729 | |
| 730 | SrcEvaluatorType srcEvaluator(src); |
| 731 | |
| 732 | // NOTE To properly handle A = (A*A.transpose())/s with A rectangular, |
| 733 | // we need to resize the destination after the source evaluator has been created. |
| 734 | resize_if_allowed(dst, src, func); |
| 735 | |
| 736 | DstEvaluatorType dstEvaluator(dst); |
| 737 | |
| 738 | typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel; |
| 739 | Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); |
| 740 | |
| 741 | dense_assignment_loop<Kernel>::run(kernel); |
| 742 | } |
| 743 | |
| 744 | template<typename DstXprType, typename SrcXprType> |
| 745 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) |
| 746 | { |
| 747 | call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>()); |
| 748 | } |
| 749 | |
| 750 | /*************************************************************************** |
| 751 | * Part 6 : Generic assignment |
| 752 | ***************************************************************************/ |
| 753 | |
| 754 | // Based on the respective shapes of the destination and source, |
| 755 | // the class AssignmentKind determine the kind of assignment mechanism. |
| 756 | // AssignmentKind must define a Kind typedef. |
| 757 | template<typename DstShape, typename SrcShape> struct AssignmentKind; |
| 758 | |
| 759 | // Assignement kind defined in this file: |
| 760 | struct Dense2Dense {}; |
| 761 | struct EigenBase2EigenBase {}; |
| 762 | |
| 763 | template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; |
| 764 | template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; }; |
| 765 | |
| 766 | // This is the main assignment class |
| 767 | template< typename DstXprType, typename SrcXprType, typename Functor, |
| 768 | typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind, |
| 769 | typename EnableIf = void> |
| 770 | struct Assignment; |
| 771 | |
| 772 | |
| 773 | // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition. |
| 774 | // Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated. |
| 775 | // So this intermediate function removes everything related to "assume-aliasing" such that Assignment |
| 776 | // does not has to bother about these annoying details. |
| 777 | |
| 778 | template<typename Dst, typename Src> |
| 779 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
| 780 | void call_assignment(Dst& dst, const Src& src) |
| 781 | { |
| 782 | call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>()); |
| 783 | } |
| 784 | template<typename Dst, typename Src> |
| 785 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
| 786 | void call_assignment(const Dst& dst, const Src& src) |
| 787 | { |
| 788 | call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>()); |
| 789 | } |
| 790 | |
| 791 | // Deal with "assume-aliasing" |
| 792 | template<typename Dst, typename Src, typename Func> |
| 793 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
| 794 | void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0) |
| 795 | { |
| 796 | typename plain_matrix_type<Src>::type tmp(src); |
| 797 | call_assignment_no_alias(dst, tmp, func); |
| 798 | } |
| 799 | |
| 800 | template<typename Dst, typename Src, typename Func> |
| 801 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
| 802 | void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0) |
| 803 | { |
| 804 | call_assignment_no_alias(dst, src, func); |
| 805 | } |
| 806 | |
| 807 | // by-pass "assume-aliasing" |
| 808 | // When there is no aliasing, we require that 'dst' has been properly resized |
| 809 | template<typename Dst, template <typename> class StorageBase, typename Src, typename Func> |
| 810 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
| 811 | void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func) |
| 812 | { |
| 813 | call_assignment_no_alias(dst.expression(), src, func); |
| 814 | } |
| 815 | |
| 816 | |
| 817 | template<typename Dst, typename Src, typename Func> |
| 818 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
| 819 | void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) |
| 820 | { |
| 821 | enum { |
| 822 | NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) |
| 823 | || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1) |
| 824 | ) && int(Dst::SizeAtCompileTime) != 1 |
| 825 | }; |
| 826 | |
| 827 | typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned; |
| 828 | typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType; |
| 829 | ActualDstType actualDst(dst); |
| 830 | |
| 831 | // TODO check whether this is the right place to perform these checks: |
| 832 | EIGEN_STATIC_ASSERT_LVALUE(Dst) |
| 833 | EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src) |
| 834 | EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar); |
| 835 | |
| 836 | Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func); |
| 837 | } |
| 838 | template<typename Dst, typename Src> |
| 839 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
| 840 | void call_assignment_no_alias(Dst& dst, const Src& src) |
| 841 | { |
| 842 | call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>()); |
| 843 | } |
| 844 | |
| 845 | template<typename Dst, typename Src, typename Func> |
| 846 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
| 847 | void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func) |
| 848 | { |
| 849 | // TODO check whether this is the right place to perform these checks: |
| 850 | EIGEN_STATIC_ASSERT_LVALUE(Dst) |
| 851 | EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src) |
| 852 | EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar); |
| 853 | |
| 854 | Assignment<Dst,Src,Func>::run(dst, src, func); |
| 855 | } |
| 856 | template<typename Dst, typename Src> |
| 857 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
| 858 | void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) |
| 859 | { |
| 860 | call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>()); |
| 861 | } |
| 862 | |
| 863 | // forward declaration |
| 864 | template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src); |
| 865 | |
| 866 | // Generic Dense to Dense assignment |
| 867 | // Note that the last template argument "Weak" is needed to make it possible to perform |
| 868 | // both partial specialization+SFINAE without ambiguous specialization |
| 869 | template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> |
| 870 | struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak> |
| 871 | { |
| 872 | EIGEN_DEVICE_FUNC |
| 873 | static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func) |
| 874 | { |
| 875 | #ifndef EIGEN_NO_DEBUG |
| 876 | internal::check_for_aliasing(dst, src); |
| 877 | #endif |
| 878 | |
| 879 | call_dense_assignment_loop(dst, src, func); |
| 880 | } |
| 881 | }; |
| 882 | |
| 883 | // Generic assignment through evalTo. |
| 884 | // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism. |
| 885 | // Note that the last template argument "Weak" is needed to make it possible to perform |
| 886 | // both partial specialization+SFINAE without ambiguous specialization |
| 887 | template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> |
| 888 | struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak> |
| 889 | { |
| 890 | EIGEN_DEVICE_FUNC |
| 891 | static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/) |
| 892 | { |
| 893 | Index dstRows = src.rows(); |
| 894 | Index dstCols = src.cols(); |
| 895 | if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) |
| 896 | dst.resize(dstRows, dstCols); |
| 897 | |
| 898 | eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); |
| 899 | src.evalTo(dst); |
| 900 | } |
| 901 | |
| 902 | // NOTE The following two functions are templated to avoid their instanciation if not needed |
| 903 | // This is needed because some expressions supports evalTo only and/or have 'void' as scalar type. |
| 904 | template<typename SrcScalarType> |
| 905 | EIGEN_DEVICE_FUNC |
| 906 | static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/) |
| 907 | { |
| 908 | Index dstRows = src.rows(); |
| 909 | Index dstCols = src.cols(); |
| 910 | if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) |
| 911 | dst.resize(dstRows, dstCols); |
| 912 | |
| 913 | eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); |
| 914 | src.addTo(dst); |
| 915 | } |
| 916 | |
| 917 | template<typename SrcScalarType> |
| 918 | EIGEN_DEVICE_FUNC |
| 919 | static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/) |
| 920 | { |
| 921 | Index dstRows = src.rows(); |
| 922 | Index dstCols = src.cols(); |
| 923 | if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) |
| 924 | dst.resize(dstRows, dstCols); |
| 925 | |
| 926 | eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); |
| 927 | src.subTo(dst); |
| 928 | } |
| 929 | }; |
| 930 | |
| 931 | } // namespace internal |
| 932 | |
| 933 | } // end namespace Eigen |
| 934 | |
| 935 | #endif // EIGEN_ASSIGN_EVALUATOR_H |
| 936 | |