BDCSVD.h source code [NanoGUI/ext/eigen/Eigen/src/SVD/BDCSVD.h]

1	// This file is part of Eigen, a lightweight C++ template library
2	// for linear algebra.
3	//
4	// We used the "A Divide-And-Conquer Algorithm for the Bidiagonal SVD"
5	// research report written by Ming Gu and Stanley C.Eisenstat
6	// The code variable names correspond to the names they used in their
7	// report
8	//
9	// Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com>
10	// Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr>
11	// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
12	// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
13	// Copyright (C) 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
14	// Copyright (C) 2014-2017 Gael Guennebaud <gael.guennebaud@inria.fr>
15	//
16	// Source Code Form is subject to the terms of the Mozilla
17	// Public License v. 2.0. If a copy of the MPL was not distributed
18	// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
19
20	#ifndef EIGEN_BDCSVD_H
21	#define EIGEN_BDCSVD_H
22	// #define EIGEN_BDCSVD_DEBUG_VERBOSE
23	// #define EIGEN_BDCSVD_SANITY_CHECKS
24
25	namespace Eigen {
26
27	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
28	IOFormat bdcsvdfmt(`8`, `0`, ", ", "\n", " [", "]");
29	#endif
30
31	template<typename _MatrixType> class BDCSVD;
32
33	namespace internal {
34
35	template<typename _MatrixType>
36	struct traits<BDCSVD<_MatrixType> >
37	{
38	typedef _MatrixType MatrixType;
39	};
40
41	} // end namespace internal
42
43
44	/* \ingroup SVD_Module*
45	*
46	*
47	* \class BDCSVD
48	*
49	* \brief class Bidiagonal Divide and Conquer SVD
50	*
51	* \tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition
52	*
53	* This class first reduces the input matrix to bi-diagonal form using class UpperBidiagonalization,
54	* and then performs a divide-and-conquer diagonalization. Small blocks are diagonalized using class JacobiSVD.
55	* You can control the switching size with the setSwitchSize() method, default is 16.
56	* For small matrice (<16), it is thus preferable to directly use JacobiSVD. For larger ones, BDCSVD is highly
57	* recommended and can several order of magnitude faster.
58	*
59	* \warning this algorithm is unlikely to provide accurate result when compiled with unsafe math optimizations.
60	* For instance, this concerns Intel's compiler (ICC), which perfroms such optimization by default unless
61	* you compile with the \c -fp-model \c precise option. Likewise, the \c -ffast-math option of GCC or clang will
62	* significantly degrade the accuracy.
63	*
64	* \sa class JacobiSVD
65	*/
66	template<typename _MatrixType>
67	class BDCSVD : public SVDBase<BDCSVD<_MatrixType> >
68	{
69	typedef SVDBase<BDCSVD> Base;
70
71	public:
72	using Base::rows;
73	using Base::cols;
74	using Base::computeU;
75	using Base::computeV;
76
77	typedef _MatrixType MatrixType;
78	typedef typename MatrixType::Scalar Scalar;
79	typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
80	typedef typename NumTraits<RealScalar>::Literal Literal;
81	enum {
82	RowsAtCompileTime = MatrixType::RowsAtCompileTime,
83	ColsAtCompileTime = MatrixType::ColsAtCompileTime,
84	DiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime),
85	MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
86	MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
87	MaxDiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(MaxRowsAtCompileTime, MaxColsAtCompileTime),
88	MatrixOptions = MatrixType::Options
89	};
90
91	typedef typename Base::MatrixUType MatrixUType;
92	typedef typename Base::MatrixVType MatrixVType;
93	typedef typename Base::SingularValuesType SingularValuesType;
94
95	typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> MatrixX;
96	typedef Matrix<RealScalar, Dynamic, Dynamic, ColMajor> MatrixXr;
97	typedef Matrix<RealScalar, Dynamic, `1`> VectorType;
98	typedef Array<RealScalar, Dynamic, `1`> ArrayXr;
99	typedef Array<Index,`1`,Dynamic> ArrayXi;
100	typedef Ref<ArrayXr> ArrayRef;
101	typedef Ref<ArrayXi> IndicesRef;
102
103	/* \brief Default Constructor.*
104	*
105	* The default constructor is useful in cases in which the user intends to
106	* perform decompositions via BDCSVD::compute(const MatrixType&).
107	*/
108	BDCSVD() : m_algoswap(`16`), m_numIters(`0`)
109	{}
110
111
112	/* \brief Default Constructor with memory preallocation*
113	*
114	* Like the default constructor but with preallocation of the internal data
115	* according to the specified problem size.
116	* \sa BDCSVD()
117	*/
118	BDCSVD(Index rows, Index cols, unsigned int computationOptions = `0`)
119	: m_algoswap(`16`), m_numIters(`0`)
120	{
121	allocate(rows, cols, computationOptions);
122	}
123
124	/* \brief Constructor performing the decomposition of given matrix.*
125	*
126	* \param matrix the matrix to decompose
127	* \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
128	* By default, none is computed. This is a bit - field, the possible bits are #ComputeFullU, #ComputeThinU,
129	* #ComputeFullV, #ComputeThinV.
130	*
131	* Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
132	* available with the (non - default) FullPivHouseholderQR preconditioner.
133	*/
134	BDCSVD(const MatrixType& matrix, unsigned int computationOptions = `0`)
135	: m_algoswap(`16`), m_numIters(`0`)
136	{
137	compute(matrix, computationOptions);
138	}
139
140	~BDCSVD()
141	{
142	}
143
144	/* \brief Method performing the decomposition of given matrix using custom options.*
145	*
146	* \param matrix the matrix to decompose
147	* \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
148	* By default, none is computed. This is a bit - field, the possible bits are #ComputeFullU, #ComputeThinU,
149	* #ComputeFullV, #ComputeThinV.
150	*
151	* Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
152	* available with the (non - default) FullPivHouseholderQR preconditioner.
153	*/
154	BDCSVD& compute(const MatrixType& matrix, unsigned int computationOptions);
155
156	/* \brief Method performing the decomposition of given matrix using current options.*
157	*
158	* \param matrix the matrix to decompose
159	*
160	* This method uses the current \a computationOptions, as already passed to the constructor or to compute(const MatrixType&, unsigned int).
161	*/
162	BDCSVD& compute(const MatrixType& matrix)
163	{
164	return compute(matrix, this->m_computationOptions);
165	}
166
167	void setSwitchSize(int s)
168	{
169	eigen_assert(s>`3` && "BDCSVD the size of the algo switch has to be greater than 3");
170	m_algoswap = s;
171	}
172
173	private:
174	void allocate(Index rows, Index cols, unsigned int computationOptions);
175	void divide(Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift);
176	void computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V);
177	void computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, VectorType& singVals, ArrayRef shifts, ArrayRef mus);
178	void perturbCol0(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat);
179	void computeSingVecs(const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V);
180	void deflation43(Index firstCol, Index shift, Index i, Index size);
181	void deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size);
182	void deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift);
183	template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>
184	void copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naivev);
185	void structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1);
186	static RealScalar secularEq(RealScalar x, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift);
187
188	protected:
189	MatrixXr m_naiveU, m_naiveV;
190	MatrixXr m_computed;
191	Index m_nRec;
192	ArrayXr m_workspace;
193	ArrayXi m_workspaceI;
194	int m_algoswap;
195	bool m_isTranspose, m_compU, m_compV;
196
197	using Base::m_singularValues;
198	using Base::m_diagSize;
199	using Base::m_computeFullU;
200	using Base::m_computeFullV;
201	using Base::m_computeThinU;
202	using Base::m_computeThinV;
203	using Base::m_matrixU;
204	using Base::m_matrixV;
205	using Base::m_isInitialized;
206	using Base::m_nonzeroSingularValues;
207
208	public:
209	int m_numIters;
210	}; //end class BDCSVD
211
212
213	// Method to allocate and initialize matrix and attributes
214	template<typename MatrixType>
215	void BDCSVD<MatrixType>::allocate(Index rows, Index cols, unsigned int computationOptions)
216	{
217	m_isTranspose = (cols > rows);
218
219	if (Base::allocate(rows, cols, computationOptions))
220	return;
221
222	m_computed = MatrixXr::Zero(m_diagSize + `1`, m_diagSize );
223	m_compU = computeV();
224	m_compV = computeU();
225	if (m_isTranspose)
226	std::swap(m_compU, m_compV);
227
228	if (m_compU) m_naiveU = MatrixXr::Zero(m_diagSize + `1`, m_diagSize + `1` );
229	else m_naiveU = MatrixXr::Zero(`2`, m_diagSize + `1` );
230
231	if (m_compV) m_naiveV = MatrixXr::Zero(m_diagSize, m_diagSize);
232
233	m_workspace.resize((m_diagSize+`1`)(m_diagSize+`1`)`3`);
234	m_workspaceI.resize(`3`*m_diagSize);
235	}// end allocate
236
237	template<typename MatrixType>
238	BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsigned int computationOptions)
239	{
240	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
241	std::cout << "\n\n\n======================================================================================================================\n\n\n";
242	#endif
243	allocate(matrix.rows(), matrix.cols(), computationOptions);
244	using std::abs;
245
246	const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
247
248	//*** step -1 - If the problem is too small, directly falls back to JacobiSVD and return*
249	if(matrix.cols() < m_algoswap)
250	{
251	// FIXME this line involves temporaries
252	JacobiSVD<MatrixType> jsvd(matrix,computationOptions);
253	if(computeU()) m_matrixU = jsvd.matrixU();
254	if(computeV()) m_matrixV = jsvd.matrixV();
255	m_singularValues = jsvd.singularValues();
256	m_nonzeroSingularValues = jsvd.nonzeroSingularValues();
257	m_isInitialized = true;
258	return *this;
259	}
260
261	//*** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows*
262	RealScalar scale = matrix.cwiseAbs().maxCoeff();
263	if(scale==Literal(`0`)) scale = Literal(`1`);
264	MatrixX copy;
265	if (m_isTranspose) copy = matrix.adjoint()/scale;
266	else copy = matrix/scale;
267
268	//*** step 1 - Bidiagonalization*
269	// FIXME this line involves temporaries
270	internal::UpperBidiagonalization<MatrixX> bid(copy);
271
272	//*** step 2 - Divide & Conquer*
273	m_naiveU.setZero();
274	m_naiveV.setZero();
275	// FIXME this line involves a temporary matrix
276	m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose();
277	m_computed.template bottomRows<`1`>().setZero();
278	divide(`0`, m_diagSize - `1`, `0`, `0`, `0`);
279
280	//*** step 3 - Copy singular values and vectors*
281	for (int i=`0`; i<m_diagSize; i++)
282	{
283	RealScalar a = abs(m_computed.coeff(i, i));
284	m_singularValues.coeffRef(i) = a * scale;
285	if (a<considerZero)
286	{
287	m_nonzeroSingularValues = i;
288	m_singularValues.tail(m_diagSize - i - `1`).setZero();
289	break;
290	}
291	else if (i == m_diagSize - `1`)
292	{
293	m_nonzeroSingularValues = i + `1`;
294	break;
295	}
296	}
297
298	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
299	// std::cout << "m_naiveU\n" << m_naiveU << "\n\n";
300	// std::cout << "m_naiveV\n" << m_naiveV << "\n\n";
301	#endif
302	if(m_isTranspose) copyUV(bid.householderV(), bid.householderU(), m_naiveV, m_naiveU);
303	else copyUV(bid.householderU(), bid.householderV(), m_naiveU, m_naiveV);
304
305	m_isInitialized = true;
306	return *this;
307	}// end compute
308
309
310	template<typename MatrixType>
311	template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>
312	void BDCSVD<MatrixType>::copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naiveV)
313	{
314	// Note exchange of U and V: m_matrixU is set from m_naiveV and vice versa
315	if (computeU())
316	{
317	Index Ucols = m_computeThinU ? m_diagSize : householderU.cols();
318	m_matrixU = MatrixX::Identity(householderU.cols(), Ucols);
319	m_matrixU.topLeftCorner(m_diagSize, m_diagSize) = naiveV.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
320	householderU.applyThisOnTheLeft(m_matrixU); // FIXME this line involves a temporary buffer
321	}
322	if (computeV())
323	{
324	Index Vcols = m_computeThinV ? m_diagSize : householderV.cols();
325	m_matrixV = MatrixX::Identity(householderV.cols(), Vcols);
326	m_matrixV.topLeftCorner(m_diagSize, m_diagSize) = naiveU.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
327	householderV.applyThisOnTheLeft(m_matrixV); // FIXME this line involves a temporary buffer
328	}
329	}
330
331	/* \internal*
332	* Performs A = A * B exploiting the special structure of the matrix A. Splitting A as:
333	* A = [A1]
334	* [A2]
335	* such that A1.rows()==n1, then we assume that at least half of the columns of A1 and A2 are zeros.
336	* We can thus pack them prior to the the matrix product. However, this is only worth the effort if the matrix is large
337	* enough.
338	*/
339	template<typename MatrixType>
340	void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1)
341	{
342	Index n = A.rows();
343	if(n>`100`)
344	{
345	// If the matrices are large enough, let's exploit the sparse structure of A by
346	// splitting it in half (wrt n1), and packing the non-zero columns.
347	Index n2 = n - n1;
348	Map<MatrixXr> A1(m_workspace.data() , n1, n);
349	Map<MatrixXr> A2(m_workspace.data()+ n1*n, n2, n);
350	Map<MatrixXr> B1(m_workspace.data()+ n*n, n, n);
351	Map<MatrixXr> B2(m_workspace.data()+`2`nn, n, n);
352	Index k1=`0`, k2=`0`;
353	for(Index j=`0`; j<n; ++j)
354	{
355	if( (A.col(j).head(n1).array()!=Literal(`0`)).any() )
356	{
357	A1.col(k1) = A.col(j).head(n1);
358	B1.row(k1) = B.row(j);
359	++k1;
360	}
361	if( (A.col(j).tail(n2).array()!=Literal(`0`)).any() )
362	{
363	A2.col(k2) = A.col(j).tail(n2);
364	B2.row(k2) = B.row(j);
365	++k2;
366	}
367	}
368
369	A.topRows(n1).noalias() = A1.leftCols(k1) * B1.topRows(k1);
370	A.bottomRows(n2).noalias() = A2.leftCols(k2) * B2.topRows(k2);
371	}
372	else
373	{
374	Map<MatrixXr,Aligned> tmp(m_workspace.data(),n,n);
375	tmp.noalias() = A*B;
376	A = tmp;
377	}
378	}
379
380	// The divide algorithm is done "in place", we are always working on subsets of the same matrix. The divide methods takes as argument the
381	// place of the submatrix we are currently working on.
382
383	//@param firstCol : The Index of the first column of the submatrix of m_computed and for m_naiveU;
384	//@param lastCol : The Index of the last column of the submatrix of m_computed and for m_naiveU;
385	// lastCol + 1 - firstCol is the size of the submatrix.
386	//@param firstRowW : The Index of the first row of the matrix W that we are to change. (see the reference paper section 1 for more information on W)
387	//@param firstRowW : Same as firstRowW with the column.
388	//@param shift : Each time one takes the left submatrix, one must add 1 to the shift. Why? Because! We actually want the last column of the U submatrix
389	// to become the first column (coeff) and to shift all the other columns to the right. There are more details on the reference paper.*
390	template<typename MatrixType>
391	void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift)
392	{
393	// requires rows = cols + 1;
394	using std::pow;
395	using std::sqrt;
396	using std::abs;
397	const Index n = lastCol - firstCol + `1`;
398	const Index k = n/`2`;
399	const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
400	RealScalar alphaK;
401	RealScalar betaK;
402	RealScalar r0;
403	RealScalar lambda, phi, c0, s0;
404	VectorType l, f;
405	// We use the other algorithm which is more efficient for small
406	// matrices.
407	if (n < m_algoswap)
408	{
409	// FIXME this line involves temporaries
410	JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + `1`, n), ComputeFullU \| (m_compV ? ComputeFullV : `0`));
411	if (m_compU)
412	m_naiveU.block(firstCol, firstCol, n + `1`, n + `1`).real() = b.matrixU();
413	else
414	{
415	m_naiveU.row(`0`).segment(firstCol, n + `1`).real() = b.matrixU().row(`0`);
416	m_naiveU.row(`1`).segment(firstCol, n + `1`).real() = b.matrixU().row(n);
417	}
418	if (m_compV) m_naiveV.block(firstRowW, firstColW, n, n).real() = b.matrixV();
419	m_computed.block(firstCol + shift, firstCol + shift, n + `1`, n).setZero();
420	m_computed.diagonal().segment(firstCol + shift, n) = b.singularValues().head(n);
421	return;
422	}
423	// We use the divide and conquer algorithm
424	alphaK = m_computed(firstCol + k, firstCol + k);
425	betaK = m_computed(firstCol + k + `1`, firstCol + k);
426	// The divide must be done in that order in order to have good results. Divide change the data inside the submatrices
427	// and the divide of the right submatrice reads one column of the left submatrice. That's why we need to treat the
428	// right submatrix before the left one.
429	divide(k + `1` + firstCol, lastCol, k + `1` + firstRowW, k + `1` + firstColW, shift);
430	divide(firstCol, k - `1` + firstCol, firstRowW, firstColW + `1`, shift + `1`);
431
432	if (m_compU)
433	{
434	lambda = m_naiveU(firstCol + k, firstCol + k);
435	phi = m_naiveU(firstCol + k + `1`, lastCol + `1`);
436	}
437	else
438	{
439	lambda = m_naiveU(`1`, firstCol + k);
440	phi = m_naiveU(`0`, lastCol + `1`);
441	}
442	r0 = sqrt((abs(alphaK * lambda) * abs(alphaK * lambda)) + abs(betaK * phi) * abs(betaK * phi));
443	if (m_compU)
444	{
445	l = m_naiveU.row(firstCol + k).segment(firstCol, k);
446	f = m_naiveU.row(firstCol + k + `1`).segment(firstCol + k + `1`, n - k - `1`);
447	}
448	else
449	{
450	l = m_naiveU.row(`1`).segment(firstCol, k);
451	f = m_naiveU.row(`0`).segment(firstCol + k + `1`, n - k - `1`);
452	}
453	if (m_compV) m_naiveV(firstRowW+k, firstColW) = Literal(`1`);
454	if (r0<considerZero)
455	{
456	c0 = Literal(`1`);
457	s0 = Literal(`0`);
458	}
459	else
460	{
461	c0 = alphaK * lambda / r0;
462	s0 = betaK * phi / r0;
463	}
464
465	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
466	assert(m_naiveU.allFinite());
467	assert(m_naiveV.allFinite());
468	assert(m_computed.allFinite());
469	#endif
470
471	if (m_compU)
472	{
473	MatrixXr q1 (m_naiveU.col(firstCol + k).segment(firstCol, k + `1`));
474	// we shiftW Q1 to the right
475	for (Index i = firstCol + k - `1`; i >= firstCol; i--)
476	m_naiveU.col(i + `1`).segment(firstCol, k + `1`) = m_naiveU.col(i).segment(firstCol, k + `1`);
477	// we shift q1 at the left with a factor c0
478	m_naiveU.col(firstCol).segment( firstCol, k + `1`) = (q1 * c0);
479	// last column = q1 - s0*
480	m_naiveU.col(lastCol + `1`).segment(firstCol, k + `1`) = (q1 * ( - s0));
481	// first column = q2 s0*
482	m_naiveU.col(firstCol).segment(firstCol + k + `1`, n - k) = m_naiveU.col(lastCol + `1`).segment(firstCol + k + `1`, n - k) * s0;
483	// q2 = c0*
484	m_naiveU.col(lastCol + `1`).segment(firstCol + k + `1`, n - k) *= c0;
485	}
486	else
487	{
488	RealScalar q1 = m_naiveU(`0`, firstCol + k);
489	// we shift Q1 to the right
490	for (Index i = firstCol + k - `1`; i >= firstCol; i--)
491	m_naiveU(`0`, i + `1`) = m_naiveU(`0`, i);
492	// we shift q1 at the left with a factor c0
493	m_naiveU(`0`, firstCol) = (q1 * c0);
494	// last column = q1 - s0*
495	m_naiveU(`0`, lastCol + `1`) = (q1 * ( - s0));
496	// first column = q2 s0*
497	m_naiveU(`1`, firstCol) = m_naiveU(`1`, lastCol + `1`) *s0;
498	// q2 = c0*
499	m_naiveU(`1`, lastCol + `1`) *= c0;
500	m_naiveU.row(`1`).segment(firstCol + `1`, k).setZero();
501	m_naiveU.row(`0`).segment(firstCol + k + `1`, n - k - `1`).setZero();
502	}
503
504	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
505	assert(m_naiveU.allFinite());
506	assert(m_naiveV.allFinite());
507	assert(m_computed.allFinite());
508	#endif
509
510	m_computed(firstCol + shift, firstCol + shift) = r0;
511	m_computed.col(firstCol + shift).segment(firstCol + shift + `1`, k) = alphaK * l.transpose().real();
512	m_computed.col(firstCol + shift).segment(firstCol + shift + k + `1`, n - k - `1`) = betaK * f.transpose().real();
513
514	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
515	ArrayXr tmp1 = (m_computed.block(firstCol+shift, firstCol+shift, n, n)).jacobiSvd().singularValues();
516	#endif
517	// Second part: try to deflate singular values in combined matrix
518	deflation(firstCol, lastCol, k, firstRowW, firstColW, shift);
519	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
520	ArrayXr tmp2 = (m_computed.block(firstCol+shift, firstCol+shift, n, n)).jacobiSvd().singularValues();
521	std::cout << "\n\nj1 = " << tmp1.transpose().format(bdcsvdfmt) << "\n";
522	std::cout << "j2 = " << tmp2.transpose().format(bdcsvdfmt) << "\n\n";
523	std::cout << "err: " << ((tmp1-tmp2).abs()>`1e-12`*tmp2.abs()).transpose() << "\n";
524	static int count = `0`;
525	std::cout << "# " << ++count << "\n\n";
526	assert((tmp1-tmp2).matrix().norm() < `1e-14`*tmp2.matrix().norm());
527	// assert(count<681);
528	// assert(((tmp1-tmp2).abs()<1e-13tmp2.abs()).all());*
529	#endif
530
531	// Third part: compute SVD of combined matrix
532	MatrixXr UofSVD, VofSVD;
533	VectorType singVals;
534	computeSVDofM(firstCol + shift, n, UofSVD, singVals, VofSVD);
535
536	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
537	assert(UofSVD.allFinite());
538	assert(VofSVD.allFinite());
539	#endif
540
541	if (m_compU)
542	structured_update(m_naiveU.block(firstCol, firstCol, n + `1`, n + `1`), UofSVD, (n+`2`)/`2`);
543	else
544	{
545	Map<Matrix<RealScalar,`2`,Dynamic>,Aligned> tmp(m_workspace.data(),`2`,n+`1`);
546	tmp.noalias() = m_naiveU.middleCols(firstCol, n+`1`) * UofSVD;
547	m_naiveU.middleCols(firstCol, n + `1`) = tmp;
548	}
549
550	if (m_compV) structured_update(m_naiveV.block(firstRowW, firstColW, n, n), VofSVD, (n+`1`)/`2`);
551
552	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
553	assert(m_naiveU.allFinite());
554	assert(m_naiveV.allFinite());
555	assert(m_computed.allFinite());
556	#endif
557
558	m_computed.block(firstCol + shift, firstCol + shift, n, n).setZero();
559	m_computed.block(firstCol + shift, firstCol + shift, n, n).diagonal() = singVals;
560	}// end divide
561
562	// Compute SVD of m_computed.block(firstCol, firstCol, n + 1, n); this block only has non-zeros in
563	// the first column and on the diagonal and has undergone deflation, so diagonal is in increasing
564	// order except for possibly the (0,0) entry. The computed SVD is stored U, singVals and V, except
565	// that if m_compV is false, then V is not computed. Singular values are sorted in decreasing order.
566	//
567	// TODO Opportunities for optimization: better root finding algo, better stopping criterion, better
568	// handling of round-off errors, be consistent in ordering
569	// For instance, to solve the secular equation using FMM, see http://www.stat.uchicago.edu/~lekheng/courses/302/classics/greengard-rokhlin.pdf
570	template <typename MatrixType>
571	void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V)
572	{
573	const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
574	using std::abs;
575	ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n);
576	m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal();
577	ArrayRef diag = m_workspace.head(n);
578	diag(`0`) = Literal(`0`);
579
580	// Allocate space for singular values and vectors
581	singVals.resize(n);
582	U.resize(n+`1`, n+`1`);
583	if (m_compV) V.resize(n, n);
584
585	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
586	if (col0.hasNaN() \|\| diag.hasNaN())
587	std::cout << "\n\nHAS NAN\n\n";
588	#endif
589
590	// Many singular values might have been deflated, the zero ones have been moved to the end,
591	// but others are interleaved and we must ignore them at this stage.
592	// To this end, let's compute a permutation skipping them:
593	Index actual_n = n;
594	while(actual_n>`1` && diag(actual_n-`1`)==Literal(`0`)) --actual_n;
595	Index m = `0`; // size of the deflated problem
596	for(Index k=`0`;k<actual_n;++k)
597	if(abs(col0(k))>considerZero)
598	m_workspaceI(m++) = k;
599	Map<ArrayXi> perm(m_workspaceI.data(),m);
600
601	Map<ArrayXr> shifts(m_workspace.data()+`1`*n, n);
602	Map<ArrayXr> mus(m_workspace.data()+`2`*n, n);
603	Map<ArrayXr> zhat(m_workspace.data()+`3`*n, n);
604
605	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
606	std::cout << "computeSVDofM using:\n";
607	std::cout << " z: " << col0.transpose() << "\n";
608	std::cout << " d: " << diag.transpose() << "\n";
609	#endif
610
611	// Compute singVals, shifts, and mus
612	computeSingVals(col0, diag, perm, singVals, shifts, mus);
613
614	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
615	std::cout << " j: " << (m_computed.block(firstCol, firstCol, n, n)).jacobiSvd().singularValues().transpose().reverse() << "\n\n";
616	std::cout << " sing-val: " << singVals.transpose() << "\n";
617	std::cout << " mu: " << mus.transpose() << "\n";
618	std::cout << " shift: " << shifts.transpose() << "\n";
619
620	{
621	Index actual_n = n;
622	while(actual_n>`1` && abs(col0(actual_n-`1`))<considerZero) --actual_n;
623	std::cout << "\n\n mus: " << mus.head(actual_n).transpose() << "\n\n";
624	std::cout << " check1 (expect0) : " << ((singVals.array()-(shifts+mus)) / singVals.array()).head(actual_n).transpose() << "\n\n";
625	std::cout << " check2 (>0) : " << ((singVals.array()-diag) / singVals.array()).head(actual_n).transpose() << "\n\n";
626	std::cout << " check3 (>0) : " << ((diag.segment(`1`,actual_n-`1`)-singVals.head(actual_n-`1`).array()) / singVals.head(actual_n-`1`).array()).transpose() << "\n\n\n";
627	std::cout << " check4 (>0) : " << ((singVals.segment(`1`,actual_n-`1`)-singVals.head(actual_n-`1`))).transpose() << "\n\n\n";
628	}
629	#endif
630
631	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
632	assert(singVals.allFinite());
633	assert(mus.allFinite());
634	assert(shifts.allFinite());
635	#endif
636
637	// Compute zhat
638	perturbCol0(col0, diag, perm, singVals, shifts, mus, zhat);
639	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
640	std::cout << " zhat: " << zhat.transpose() << "\n";
641	#endif
642
643	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
644	assert(zhat.allFinite());
645	#endif
646
647	computeSingVecs(zhat, diag, perm, singVals, shifts, mus, U, V);
648
649	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
650	std::cout << "U^T U: " << (U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() << "\n";
651	std::cout << "V^T V: " << (V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() << "\n";
652	#endif
653
654	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
655	assert(U.allFinite());
656	assert(V.allFinite());
657	assert((U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() < `1e-14` * n);
658	assert((V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() < `1e-14` * n);
659	assert(m_naiveU.allFinite());
660	assert(m_naiveV.allFinite());
661	assert(m_computed.allFinite());
662	#endif
663
664	// Because of deflation, the singular values might not be completely sorted.
665	// Fortunately, reordering them is a O(n) problem
666	for(Index i=`0`; i<actual_n-`1`; ++i)
667	{
668	if(singVals(i)>singVals(i+`1`))
669	{
670	using std::swap;
671	swap(singVals(i),singVals(i+`1`));
672	U.col(i).swap(U.col(i+`1`));
673	if(m_compV) V.col(i).swap(V.col(i+`1`));
674	}
675	}
676
677	// Reverse order so that singular values in increased order
678	// Because of deflation, the zeros singular-values are already at the end
679	singVals.head(actual_n).reverseInPlace();
680	U.leftCols(actual_n).rowwise().reverseInPlace();
681	if (m_compV) V.leftCols(actual_n).rowwise().reverseInPlace();
682
683	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
684	JacobiSVD<MatrixXr> jsvd(m_computed.block(firstCol, firstCol, n, n) );
685	std::cout << " * j: " << jsvd.singularValues().transpose() << "\n\n";
686	std::cout << " * sing-val: " << singVals.transpose() << "\n";
687	// std::cout << " err: " << ((jsvd.singularValues()-singVals)>1e-13singVals.norm()).transpose() << "\n";
688	#endif
689	}
690
691	template <typename MatrixType>
692	typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift)
693	{
694	Index m = perm.size();
695	RealScalar res = Literal(`1`);
696	for(Index i=`0`; i<m; ++i)
697	{
698	Index j = perm (i);
699	// The following expression could be rewritten to involve only a single division,
700	// but this would make the expression more sensitive to overflow.
701	res += (col0(j) / (diagShifted(j) - mu)) * (col0(j) / (diag(j) + shift + mu));
702	}
703	return res;
704
705	}
706
707	template <typename MatrixType>
708	void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm,
709	VectorType& singVals, ArrayRef shifts, ArrayRef mus)
710	{
711	using std::abs;
712	using std::swap;
713	using std::sqrt;
714
715	Index n = col0.size();
716	Index actual_n = n;
717	// Note that here actual_n is computed based on col0(i)==0 instead of diag(i)==0 as above
718	// because 1) we have diag(i)==0 => col0(i)==0 and 2) if col0(i)==0, then diag(i) is already a singular value.
719	while(actual_n>`1` && col0(actual_n-`1`)==Literal(`0`)) --actual_n;
720
721	for (Index k = `0`; k < n; ++k)
722	{
723	if (col0(k) == Literal(`0`) \|\| actual_n==`1`)
724	{
725	// if col0(k) == 0, then entry is deflated, so singular value is on diagonal
726	// if actual_n==1, then the deflated problem is already diagonalized
727	singVals(k) = k==`0` ? col0(`0`) : diag(k);
728	mus(k) = Literal(`0`);
729	shifts(k) = k==`0` ? col0(`0`) : diag(k);
730	continue;
731	}
732
733	// otherwise, use secular equation to find singular value
734	RealScalar left = diag(k);
735	RealScalar right; // was: = (k != actual_n-1) ? diag(k+1) : (diag(actual_n-1) + col0.matrix().norm());
736	if(k==actual_n-`1`)
737	right = (diag(actual_n-`1`) + col0.matrix().norm());
738	else
739	{
740	// Skip deflated singular values,
741	// recall that at this stage we assume that z[j]!=0 and all entries for which z[j]==0 have been put aside.
742	// This should be equivalent to using perm[]
743	Index l = k+`1`;
744	while(col0(l)==Literal(`0`)) { ++l; eigen_internal_assert(l<actual_n); }
745	right = diag(l);
746	}
747
748	// first decide whether it's closer to the left end or the right end
749	RealScalar mid = left + (right-left) / Literal(`2`);
750	RealScalar fMid = secularEq(mid, col0, diag, perm, diag, Literal(`0`));
751	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
752	std::cout << right-left << "\n";
753	std::cout << "fMid = " << fMid << " " << secularEq(mid-left, col0, diag, perm, diag-left, left) << " " << secularEq(mid-right, col0, diag, perm, diag-right, right) << "\n";
754	std::cout << " = " << secularEq(`0.1`*(left+right), col0, diag, perm, diag, `0`)
755	<< " " << secularEq(`0.2`*(left+right), col0, diag, perm, diag, `0`)
756	<< " " << secularEq(`0.3`*(left+right), col0, diag, perm, diag, `0`)
757	<< " " << secularEq(`0.4`*(left+right), col0, diag, perm, diag, `0`)
758	<< " " << secularEq(`0.49`*(left+right), col0, diag, perm, diag, `0`)
759	<< " " << secularEq(`0.5`*(left+right), col0, diag, perm, diag, `0`)
760	<< " " << secularEq(`0.51`*(left+right), col0, diag, perm, diag, `0`)
761	<< " " << secularEq(`0.6`*(left+right), col0, diag, perm, diag, `0`)
762	<< " " << secularEq(`0.7`*(left+right), col0, diag, perm, diag, `0`)
763	<< " " << secularEq(`0.8`*(left+right), col0, diag, perm, diag, `0`)
764	<< " " << secularEq(`0.9`*(left+right), col0, diag, perm, diag, `0`) << "\n";
765	#endif
766	RealScalar shift = (k == actual_n-`1` \|\| fMid > Literal(`0`)) ? left : right;
767
768	// measure everything relative to shift
769	Map<ArrayXr> diagShifted(m_workspace.data()+`4`*n, n);
770	diagShifted = diag - shift;
771
772	// initial guess
773	RealScalar muPrev, muCur;
774	if (shift == left)
775	{
776	muPrev = (right - left) * RealScalar(`0.1`);
777	if (k == actual_n-`1`) muCur = right - left;
778	else muCur = (right - left) * RealScalar(`0.5`);
779	}
780	else
781	{
782	muPrev = -(right - left) * RealScalar(`0.1`);
783	muCur = -(right - left) * RealScalar(`0.5`);
784	}
785
786	RealScalar fPrev = secularEq(muPrev, col0, diag, perm, diagShifted, shift);
787	RealScalar fCur = secularEq(muCur, col0, diag, perm, diagShifted, shift);
788	if (abs(fPrev) < abs(fCur))
789	{
790	swap(fPrev, fCur);
791	swap(muPrev, muCur);
792	}
793
794	// rational interpolation: fit a function of the form a / mu + b through the two previous
795	// iterates and use its zero to compute the next iterate
796	bool useBisection = fPrev*fCur>Literal(`0`);
797	while (fCur!=Literal(`0`) && abs(muCur - muPrev) > Literal(`8`) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
798	{
799	++m_numIters;
800
801	// Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples.
802	RealScalar a = (fCur - fPrev) / (Literal(`1`)/muCur - Literal(`1`)/muPrev);
803	RealScalar b = fCur - a / muCur;
804	// And find mu such that f(mu)==0:
805	RealScalar muZero = -a/b;
806	RealScalar fZero = secularEq(muZero, col0, diag, perm, diagShifted, shift);
807
808	muPrev = muCur;
809	fPrev = fCur;
810	muCur = muZero;
811	fCur = fZero;
812
813
814	if (shift == left && (muCur < Literal(`0`) \|\| muCur > right - left)) useBisection = true;
815	if (shift == right && (muCur < -(right - left) \|\| muCur > Literal(`0`))) useBisection = true;
816	if (abs(fCur)>abs(fPrev)) useBisection = true;
817	}
818
819	// fall back on bisection method if rational interpolation did not work
820	if (useBisection)
821	{
822	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
823	std::cout << "useBisection for k = " << k << ", actual_n = " << actual_n << "\n";
824	#endif
825	RealScalar leftShifted, rightShifted;
826	if (shift == left)
827	{
828	// to avoid overflow, we must have mu > max(real_min, \|z(k)\|/sqrt(real_max)),
829	// the factor 2 is to be more conservative
830	leftShifted = numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), Literal(`2`) * abs(col0(k)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
831
832	// check that we did it right:
833	eigen_internal_assert( (numext::isfinite)( (col0(k)/leftShifted)*(col0(k)/(diag(k)+shift+leftShifted)) ) );
834	// I don't understand why the case k==0 would be special there:
835	// if (k == 0) rightShifted = right - left; else
836	rightShifted = (k==actual_n-`1`) ? right : ((right - left) * RealScalar(`0.51`)); // theoretically we can take 0.5, but let's be safe
837	}
838	else
839	{
840	leftShifted = -(right - left) * RealScalar(`0.51`);
841	if(k+`1`<n)
842	rightShifted = -numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), abs(col0(k+`1`)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
843	else
844	rightShifted = -(std::numeric_limits<RealScalar>::min)();
845	}
846
847	RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift);
848
849	#if defined EIGEN_INTERNAL_DEBUGGING \|\| defined EIGEN_BDCSVD_DEBUG_VERBOSE
850	RealScalar fRight = secularEq(rightShifted, col0, diag, perm, diagShifted, shift);
851	#endif
852
853	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
854	if(!(fLeft * fRight<`0`))
855	{
856	std::cout << "fLeft: " << leftShifted << " - " << diagShifted.head(`10`).transpose() << "\n ; " << bool(left==shift) << " " << (left-shift) << "\n";
857	std::cout << k << " : " << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n";
858	}
859	#endif
860	eigen_internal_assert(fLeft * fRight < Literal(`0`));
861
862	while (rightShifted - leftShifted > Literal(`2`) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted)))
863	{
864	RealScalar midShifted = (leftShifted + rightShifted) / Literal(`2`);
865	fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
866	if (fLeft * fMid < Literal(`0`))
867	{
868	rightShifted = midShifted;
869	}
870	else
871	{
872	leftShifted = midShifted;
873	fLeft = fMid;
874	}
875	}
876
877	muCur = (leftShifted + rightShifted) / Literal(`2`);
878	}
879
880	singVals[k] = shift + muCur;
881	shifts[k] = shift;
882	mus[k] = muCur;
883
884	// perturb singular value slightly if it equals diagonal entry to avoid division by zero later
885	// (deflation is supposed to avoid this from happening)
886	// - this does no seem to be necessary anymore -
887	// if (singVals[k] == left) singVals[k] = 1 + NumTraits<RealScalar>::epsilon();*
888	// if (singVals[k] == right) singVals[k] = 1 - NumTraits<RealScalar>::epsilon();*
889	}
890	}
891
892
893	// zhat is perturbation of col0 for which singular vectors can be computed stably (see Section 3.1)
894	template <typename MatrixType>
895	void BDCSVD<MatrixType>::perturbCol0
896	(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals,
897	const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat)
898	{
899	using std::sqrt;
900	Index n = col0.size();
901	Index m = perm.size();
902	if(m==`0`)
903	{
904	zhat.setZero();
905	return;
906	}
907	Index last = perm (m-`1`);
908	// The offset permits to skip deflated entries while computing zhat
909	for (Index k = `0`; k < n; ++k)
910	{
911	if (col0(k) == Literal(`0`)) // deflated
912	zhat(k) = Literal(`0`);
913	else
914	{
915	// see equation (3.6)
916	RealScalar dk = diag(k);
917	RealScalar prod = (singVals(last) + dk) * (mus(last) + (shifts(last) - dk));
918
919	for(Index l = `0`; l<m; ++l)
920	{
921	Index i = perm (l);
922	if(i!=k)
923	{
924	Index j = i<k ? i : perm (l-`1`);
925	prod = ((singVals(j)+dk) / ((diag(i)+dk))) ((mus(j)+(shifts(j)-dk)) / ((diag(i)-dk)));
926	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
927	if(i!=k && std::abs(((singVals(j)+dk)(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)(diag(i)-dk)) - `1`) > `0.9` )
928	std::cout << " " << ((singVals(j)+dk)(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)(diag(i)-dk)) << " == (" << (singVals(j)+dk) << " * " << (mus(j)+(shifts(j)-dk))
929	<< ") / (" << (diag(i)+dk) << " * " << (diag(i)-dk) << ")\n";
930	#endif
931	}
932	}
933	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
934	std::cout << "zhat(" << k << ") = sqrt( " << prod << ") ; " << (singVals(last) + dk) << " * " << mus(last) + shifts(last) << " - " << dk << "\n";
935	#endif
936	RealScalar tmp = sqrt(prod);
937	zhat(k) = col0(k) > Literal(`0`) ? tmp : -tmp;
938	}
939	}
940	}
941
942	// compute singular vectors
943	template <typename MatrixType>
944	void BDCSVD<MatrixType>::computeSingVecs
945	(const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals,
946	const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V)
947	{
948	Index n = zhat.size();
949	Index m = perm.size();
950
951	for (Index k = `0`; k < n; ++k)
952	{
953	if (zhat(k) == Literal(`0`))
954	{
955	U.col(k) = VectorType::Unit(n+`1`, k);
956	if (m_compV) V.col(k) = VectorType::Unit(n, k);
957	}
958	else
959	{
960	U.col(k).setZero();
961	for(Index l=`0`;l<m;++l)
962	{
963	Index i = perm (l);
964	U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
965	}
966	U(n,k) = Literal(`0`);
967	U.col(k).normalize();
968
969	if (m_compV)
970	{
971	V.col(k).setZero();
972	for(Index l=`1`;l<m;++l)
973	{
974	Index i = perm (l);
975	V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
976	}
977	V(`0`,k) = Literal(-`1`);
978	V.col(k).normalize();
979	}
980	}
981	}
982	U.col(n) = VectorType::Unit(n+`1`, n);
983	}
984
985
986	// page 12_13
987	// i >= 1, di almost null and zi non null.
988	// We use a rotation to zero out zi applied to the left of M
989	template <typename MatrixType>
990	void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index size)
991	{
992	using std::abs;
993	using std::sqrt;
994	using std::pow;
995	Index start = firstCol + shift;
996	RealScalar c = m_computed(start, start);
997	RealScalar s = m_computed(start+i, start);
998	RealScalar r = numext::hypot(c,s);
999	if (r == Literal(`0`))
1000	{
1001	m_computed(start+i, start+i) = Literal(`0`);
1002	return;
1003	}
1004	m_computed(start,start) = r;
1005	m_computed(start+i, start) = Literal(`0`);
1006	m_computed(start+i, start+i) = Literal(`0`);
1007
1008	JacobiRotation<RealScalar> J(c/r,-s/r);
1009	if (m_compU) m_naiveU.middleRows(firstCol, size+`1`).applyOnTheRight(firstCol, firstCol+i, J);
1010	else m_naiveU.applyOnTheRight(firstCol, firstCol+i, J);
1011	}// end deflation 43
1012
1013
1014	// page 13
1015	// i,j >= 1, i!=j and \|di - dj\| < epsilon norm2(M)*
1016	// We apply two rotations to have zj = 0;
1017	// TODO deflation44 is still broken and not properly tested
1018	template <typename MatrixType>
1019	void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size)
1020	{
1021	using std::abs;
1022	using std::sqrt;
1023	using std::conj;
1024	using std::pow;
1025	RealScalar c = m_computed(firstColm+i, firstColm);
1026	RealScalar s = m_computed(firstColm+j, firstColm);
1027	RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s));
1028	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
1029	std::cout << "deflation 4.4: " << i << "," << j << " -> " << c << " " << s << " " << r << " ; "
1030	<< m_computed(firstColm + i-`1`, firstColm) << " "
1031	<< m_computed(firstColm + i, firstColm) << " "
1032	<< m_computed(firstColm + i+`1`, firstColm) << " "
1033	<< m_computed(firstColm + i+`2`, firstColm) << "\n";
1034	std::cout << m_computed(firstColm + i-`1`, firstColm + i-`1`) << " "
1035	<< m_computed(firstColm + i, firstColm+i) << " "
1036	<< m_computed(firstColm + i+`1`, firstColm+i+`1`) << " "
1037	<< m_computed(firstColm + i+`2`, firstColm+i+`2`) << "\n";
1038	#endif
1039	if (r==Literal(`0`))
1040	{
1041	m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j);
1042	return;
1043	}
1044	c/=r;
1045	s/=r;
1046	m_computed(firstColm + i, firstColm) = r;
1047	m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i);
1048	m_computed(firstColm + j, firstColm) = Literal(`0`);
1049
1050	JacobiRotation<RealScalar> J(c,-s);
1051	if (m_compU) m_naiveU.middleRows(firstColu, size+`1`).applyOnTheRight(firstColu + i, firstColu + j, J);
1052	else m_naiveU.applyOnTheRight(firstColu+i, firstColu+j, J);
1053	if (m_compV) m_naiveV.middleRows(firstRowW, size).applyOnTheRight(firstColW + i, firstColW + j, J);
1054	}// end deflation 44
1055
1056
1057	// acts on block from (firstCol+shift, firstCol+shift) to (lastCol+shift, lastCol+shift) [inclusive]
1058	template <typename MatrixType>
1059	void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift)
1060	{
1061	using std::sqrt;
1062	using std::abs;
1063	const Index length = lastCol + `1` - firstCol;
1064
1065	Block<MatrixXr,Dynamic,`1`> col0(m_computed, firstCol+shift, firstCol+shift, length, `1`);
1066	Diagonal<MatrixXr> fulldiag(m_computed);
1067	VectorBlock<Diagonal<MatrixXr>,Dynamic> diag(fulldiag, firstCol+shift, length);
1068
1069	const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
1070	RealScalar maxDiag = diag.tail((std::max)(Index(`1`),length-`1`)).cwiseAbs().maxCoeff();
1071	RealScalar epsilon_strict = numext::maxi<RealScalar>(considerZero,NumTraits<RealScalar>::epsilon() * maxDiag);
1072	RealScalar epsilon_coarse = Literal(`8`) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);
1073
1074	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
1075	assert(m_naiveU.allFinite());
1076	assert(m_naiveV.allFinite());
1077	assert(m_computed.allFinite());
1078	#endif
1079
1080	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
1081	std::cout << "\ndeflate:" << diag.head(k+`1`).transpose() << " \| " << diag.segment(k+`1`,length-k-`1`).transpose() << "\n";
1082	#endif
1083
1084	//condition 4.1
1085	if (diag(`0`) < epsilon_coarse)
1086	{
1087	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
1088	std::cout << "deflation 4.1, because " << diag(`0`) << " < " << epsilon_coarse << "\n";
1089	#endif
1090	diag(`0`) = epsilon_coarse;
1091	}
1092
1093	//condition 4.2
1094	for (Index i=`1`;i<length;++i)
1095	if (abs(col0(i)) < epsilon_strict)
1096	{
1097	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
1098	std::cout << "deflation 4.2, set z(" << i << ") to zero because " << abs(col0(i)) << " < " << epsilon_strict << " (diag(" << i << ")=" << diag(i) << ")\n";
1099	#endif
1100	col0(i) = Literal(`0`);
1101	}
1102
1103	//condition 4.3
1104	for (Index i=`1`;i<length; i++)
1105	if (diag(i) < epsilon_coarse)
1106	{
1107	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
1108	std::cout << "deflation 4.3, cancel z(" << i << ")=" << col0(i) << " because diag(" << i << ")=" << diag(i) << " < " << epsilon_coarse << "\n";
1109	#endif
1110	deflation43(firstCol, shift, i, length);
1111	}
1112
1113	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
1114	assert(m_naiveU.allFinite());
1115	assert(m_naiveV.allFinite());
1116	assert(m_computed.allFinite());
1117	#endif
1118	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
1119	std::cout << "to be sorted: " << diag.transpose() << "\n\n";
1120	#endif
1121	{
1122	// Check for total deflation
1123	// If we have a total deflation, then we have to consider col0(0)==diag(0) as a singular value during sorting
1124	bool total_deflation = (col0.tail(length-`1`).array()<considerZero).all();
1125
1126	// Sort the diagonal entries, since diag(1:k-1) and diag(k:length) are already sorted, let's do a sorted merge.
1127	// First, compute the respective permutation.
1128	Index *permutation = m_workspaceI.data();
1129	{
1130	permutation[`0`] = `0`;
1131	Index p = `1`;
1132
1133	// Move deflated diagonal entries at the end.
1134	for(Index i=`1`; i<length; ++i)
1135	if(abs(diag(i))<considerZero)
1136	permutation[p++] = i;
1137
1138	Index i=`1`, j=k+`1`;
1139	for( ; p < length; ++p)
1140	{
1141	if (i > k) permutation[p] = j++;
1142	else if (j >= length) permutation[p] = i++;
1143	else if (diag(i) < diag(j)) permutation[p] = j++;
1144	else permutation[p] = i++;
1145	}
1146	}
1147
1148	// If we have a total deflation, then we have to insert diag(0) at the right place
1149	if(total_deflation)
1150	{
1151	for(Index i=`1`; i<length; ++i)
1152	{
1153	Index pi = permutation[i];
1154	if(abs(diag(pi))<considerZero \|\| diag(`0`)<diag(pi))
1155	permutation[i-`1`] = permutation[i];
1156	else
1157	{
1158	permutation[i-`1`] = `0`;
1159	break;
1160	}
1161	}
1162	}
1163
1164	// Current index of each col, and current column of each index
1165	Index *realInd = m_workspaceI.data()+length;
1166	Index realCol = m_workspaceI.data()+`2`length;
1167
1168	for(int pos = `0`; pos< length; pos++)
1169	{
1170	realCol[pos] = pos;
1171	realInd[pos] = pos;
1172	}
1173
1174	for(Index i = total_deflation?`0`:`1`; i < length; i++)
1175	{
1176	const Index pi = permutation[length - (total_deflation ? i+`1` : i)];
1177	const Index J = realCol[pi];
1178
1179	using std::swap;
1180	// swap diagonal and first column entries:
1181	swap(diag(i), diag(J));
1182	if(i!=`0` && J!=`0`) swap(col0(i), col0(J));
1183
1184	// change columns
1185	if (m_compU) m_naiveU.col(firstCol+i).segment(firstCol, length + `1`).swap(m_naiveU.col(firstCol+J).segment(firstCol, length + `1`));
1186	else m_naiveU.col(firstCol+i).segment(`0`, `2`) .swap(m_naiveU.col(firstCol+J).segment(`0`, `2`));
1187	if (m_compV) m_naiveV.col(firstColW + i).segment(firstRowW, length).swap(m_naiveV.col(firstColW + J).segment(firstRowW, length));
1188
1189	//update real pos
1190	const Index realI = realInd[i];
1191	realCol[realI] = J;
1192	realCol[pi] = i;
1193	realInd[J] = realI;
1194	realInd[i] = pi;
1195	}
1196	}
1197	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
1198	std::cout << "sorted: " << diag.transpose().format(bdcsvdfmt) << "\n";
1199	std::cout << " : " << col0.transpose() << "\n\n";
1200	#endif
1201
1202	//condition 4.4
1203	{
1204	Index i = length-`1`;
1205	while(i>`0` && (abs(diag(i))<considerZero \|\| abs(col0(i))<considerZero)) --i;
1206	for(; i>`1`;--i)
1207	if( (diag(i) - diag(i-`1`)) < NumTraits<RealScalar>::epsilon()*maxDiag )
1208	{
1209	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
1210	std::cout << "deflation 4.4 with i = " << i << " because " << (diag(i) - diag(i-`1`)) << " < " << NumTraits<RealScalar>::epsilon()*diag(i) << "\n";
1211	#endif
1212	eigen_internal_assert(abs(diag(i) - diag(i-`1`))<epsilon_coarse && " diagonal entries are not properly sorted");
1213	deflation44(firstCol, firstCol + shift, firstRowW, firstColW, i-`1`, i, length);
1214	}
1215	}
1216
1217	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
1218	for(Index j=`2`;j<length;++j)
1219	assert(diag(j-`1`)<=diag(j) \|\| abs(diag(j))<considerZero);
1220	#endif
1221
1222	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
1223	assert(m_naiveU.allFinite());
1224	assert(m_naiveV.allFinite());
1225	assert(m_computed.allFinite());
1226	#endif
1227	}//end deflation
1228
1229	#ifndef __CUDACC__
1230	/* \svd_module*
1231	*
1232	* \return the singular value decomposition of \c *this computed by Divide & Conquer algorithm
1233	*
1234	* \sa class BDCSVD
1235	*/
1236	template<typename Derived>
1237	BDCSVD<typename MatrixBase<Derived>::PlainObject>
1238	MatrixBase<Derived>::bdcSvd(unsigned int computationOptions) const
1239	{
1240	return BDCSVD<PlainObject>(*this, computationOptions);
1241	}
1242	#endif
1243
1244	} // end namespace Eigen
1245
1246	#endif
1247

Browse the source code of NanoGUI/ext/eigen/Eigen/src/SVD/BDCSVD.h