1/* $Id: ClpCholeskyDense.cpp 1753 2011-06-19 16:27:26Z stefan $ */
2/*
3 Copyright (C) 2003, International Business Machines Corporation
4 and others. All Rights Reserved.
5
6 This code is licensed under the terms of the Eclipse Public License (EPL).
7*/
8#include "CoinPragma.hpp"
9#include "CoinHelperFunctions.hpp"
10#include "ClpHelperFunctions.hpp"
11
12#include "ClpInterior.hpp"
13#include "ClpCholeskyDense.hpp"
14#include "ClpMessage.hpp"
15#include "ClpQuadraticObjective.hpp"
16
17/*#############################################################################*/
18/* Constructors / Destructor / Assignment*/
19/*#############################################################################*/
20
21/*-------------------------------------------------------------------*/
22/* Default Constructor */
23/*-------------------------------------------------------------------*/
24ClpCholeskyDense::ClpCholeskyDense ()
25 : ClpCholeskyBase(),
26 borrowSpace_(false)
27{
28 type_ = 11;
29}
30
31/*-------------------------------------------------------------------*/
32/* Copy constructor */
33/*-------------------------------------------------------------------*/
34ClpCholeskyDense::ClpCholeskyDense (const ClpCholeskyDense & rhs)
35 : ClpCholeskyBase(rhs),
36 borrowSpace_(rhs.borrowSpace_)
37{
38 assert(!rhs.borrowSpace_ || !rhs.sizeFactor_); /* can't do if borrowing space*/
39}
40
41
42/*-------------------------------------------------------------------*/
43/* Destructor */
44/*-------------------------------------------------------------------*/
45ClpCholeskyDense::~ClpCholeskyDense ()
46{
47 if (borrowSpace_) {
48 /* set NULL*/
49 sparseFactor_ = NULL;
50 workDouble_ = NULL;
51 diagonal_ = NULL;
52 }
53}
54
55/*----------------------------------------------------------------*/
56/* Assignment operator */
57/*-------------------------------------------------------------------*/
58ClpCholeskyDense &
59ClpCholeskyDense::operator=(const ClpCholeskyDense& rhs)
60{
61 if (this != &rhs) {
62 assert(!rhs.borrowSpace_ || !rhs.sizeFactor_); /* can't do if borrowing space*/
63 ClpCholeskyBase::operator=(rhs);
64 borrowSpace_ = rhs.borrowSpace_;
65 }
66 return *this;
67}
68/*-------------------------------------------------------------------*/
69/* Clone*/
70/*-------------------------------------------------------------------*/
71ClpCholeskyBase * ClpCholeskyDense::clone() const
72{
73 return new ClpCholeskyDense(*this);
74}
75/* If not power of 2 then need to redo a bit*/
76#define BLOCK 16
77#define BLOCKSHIFT 4
78/* Block unroll if power of 2 and at least 8*/
79#define BLOCKUNROLL
80
81#define BLOCKSQ ( BLOCK*BLOCK )
82#define BLOCKSQSHIFT ( BLOCKSHIFT+BLOCKSHIFT )
83#define number_blocks(x) (((x)+BLOCK-1)>>BLOCKSHIFT)
84#define number_rows(x) ((x)<<BLOCKSHIFT)
85#define number_entries(x) ((x)<<BLOCKSQSHIFT)
86/* Gets space */
87int
88ClpCholeskyDense::reserveSpace(const ClpCholeskyBase * factor, int numberRows)
89{
90 numberRows_ = numberRows;
91 int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT;
92 /* allow one stripe extra*/
93 numberBlocks = numberBlocks + ((numberBlocks * (numberBlocks + 1)) / 2);
94 sizeFactor_ = numberBlocks * BLOCKSQ;
95 /*#define CHOL_COMPARE*/
96#ifdef CHOL_COMPARE
97 sizeFactor_ += 95000;
98#endif
99 if (!factor) {
100 sparseFactor_ = new longDouble [sizeFactor_];
101 rowsDropped_ = new char [numberRows_];
102 memset(rowsDropped_, 0, numberRows_);
103 workDouble_ = new longDouble[numberRows_];
104 diagonal_ = new longDouble[numberRows_];
105 } else {
106 borrowSpace_ = true;
107 int numberFull = factor->numberRows();
108 sparseFactor_ = factor->sparseFactor() + (factor->size() - sizeFactor_);
109 workDouble_ = factor->workDouble() + (numberFull - numberRows_);
110 diagonal_ = factor->diagonal() + (numberFull - numberRows_);
111 }
112 numberRowsDropped_ = 0;
113 return 0;
114}
115/* Returns space needed */
116CoinBigIndex
117ClpCholeskyDense::space( int numberRows) const
118{
119 int numberBlocks = (numberRows + BLOCK - 1) >> BLOCKSHIFT;
120 /* allow one stripe extra*/
121 numberBlocks = numberBlocks + ((numberBlocks * (numberBlocks + 1)) / 2);
122 CoinBigIndex sizeFactor = numberBlocks * BLOCKSQ;
123#ifdef CHOL_COMPARE
124 sizeFactor += 95000;
125#endif
126 return sizeFactor;
127}
128/* Orders rows and saves pointer to matrix.and model */
129int
130ClpCholeskyDense::order(ClpInterior * model)
131{
132 model_ = model;
133 int numberRows;
134 int numberRowsModel = model_->numberRows();
135 int numberColumns = model_->numberColumns();
136 if (!doKKT_) {
137 numberRows = numberRowsModel;
138 } else {
139 numberRows = 2 * numberRowsModel + numberColumns;
140 }
141 reserveSpace(NULL, numberRows);
142 rowCopy_ = model->clpMatrix()->reverseOrderedCopy();
143 return 0;
144}
145/* Does Symbolic factorization given permutation.
146 This is called immediately after order. If user provides this then
147 user must provide factorize and solve. Otherwise the default factorization is used
148 returns non-zero if not enough memory */
149int
150ClpCholeskyDense::symbolic()
151{
152 return 0;
153}
154/* Factorize - filling in rowsDropped and returning number dropped */
155int
156ClpCholeskyDense::factorize(const CoinWorkDouble * diagonal, int * rowsDropped)
157{
158 assert (!borrowSpace_);
159 const CoinBigIndex * columnStart = model_->clpMatrix()->getVectorStarts();
160 const int * columnLength = model_->clpMatrix()->getVectorLengths();
161 const int * row = model_->clpMatrix()->getIndices();
162 const double * element = model_->clpMatrix()->getElements();
163 const CoinBigIndex * rowStart = rowCopy_->getVectorStarts();
164 const int * rowLength = rowCopy_->getVectorLengths();
165 const int * column = rowCopy_->getIndices();
166 const double * elementByRow = rowCopy_->getElements();
167 int numberColumns = model_->clpMatrix()->getNumCols();
168 CoinZeroN(sparseFactor_, sizeFactor_);
169 /*perturbation*/
170 CoinWorkDouble perturbation = model_->diagonalPerturbation() * model_->diagonalNorm();
171 perturbation = perturbation * perturbation;
172 if (perturbation > 1.0) {
173#ifdef COIN_DEVELOP
174 /*if (model_->model()->logLevel()&4) */
175 std::cout << "large perturbation " << perturbation << std::endl;
176#endif
177 perturbation = CoinSqrt(perturbation);
178 perturbation = 1.0;
179 }
180 int iRow;
181 int newDropped = 0;
182 CoinWorkDouble largest = 1.0;
183 CoinWorkDouble smallest = COIN_DBL_MAX;
184 CoinWorkDouble delta2 = model_->delta(); /* add delta*delta to diagonal*/
185 delta2 *= delta2;
186 if (!doKKT_) {
187 longDouble * work = sparseFactor_;
188 work--; /* skip diagonal*/
189 int addOffset = numberRows_ - 1;
190 const CoinWorkDouble * diagonalSlack = diagonal + numberColumns;
191 /* largest in initial matrix*/
192 CoinWorkDouble largest2 = 1.0e-20;
193 for (iRow = 0; iRow < numberRows_; iRow++) {
194 if (!rowsDropped_[iRow]) {
195 CoinBigIndex startRow = rowStart[iRow];
196 CoinBigIndex endRow = rowStart[iRow] + rowLength[iRow];
197 CoinWorkDouble diagonalValue = diagonalSlack[iRow] + delta2;
198 for (CoinBigIndex k = startRow; k < endRow; k++) {
199 int iColumn = column[k];
200 CoinBigIndex start = columnStart[iColumn];
201 CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn];
202 CoinWorkDouble multiplier = diagonal[iColumn] * elementByRow[k];
203 for (CoinBigIndex j = start; j < end; j++) {
204 int jRow = row[j];
205 if (!rowsDropped_[jRow]) {
206 if (jRow > iRow) {
207 CoinWorkDouble value = element[j] * multiplier;
208 work[jRow] += value;
209 } else if (jRow == iRow) {
210 CoinWorkDouble value = element[j] * multiplier;
211 diagonalValue += value;
212 }
213 }
214 }
215 }
216 for (int j = iRow + 1; j < numberRows_; j++)
217 largest2 = CoinMax(largest2, CoinAbs(work[j]));
218 diagonal_[iRow] = diagonalValue;
219 largest2 = CoinMax(largest2, CoinAbs(diagonalValue));
220 } else {
221 /* dropped*/
222 diagonal_[iRow] = 1.0;
223 }
224 addOffset--;
225 work += addOffset;
226 }
227 /*check sizes*/
228 largest2 *= 1.0e-20;
229 largest = CoinMin(largest2, CHOL_SMALL_VALUE);
230 int numberDroppedBefore = 0;
231 for (iRow = 0; iRow < numberRows_; iRow++) {
232 int dropped = rowsDropped_[iRow];
233 /* Move to int array*/
234 rowsDropped[iRow] = dropped;
235 if (!dropped) {
236 CoinWorkDouble diagonal = diagonal_[iRow];
237 if (diagonal > largest2) {
238 diagonal_[iRow] = diagonal + perturbation;
239 } else {
240 diagonal_[iRow] = diagonal + perturbation;
241 rowsDropped[iRow] = 2;
242 numberDroppedBefore++;
243 }
244 }
245 }
246 doubleParameters_[10] = CoinMax(1.0e-20, largest);
247 integerParameters_[20] = 0;
248 doubleParameters_[3] = 0.0;
249 doubleParameters_[4] = COIN_DBL_MAX;
250 integerParameters_[34] = 0; /* say all must be positive*/
251#ifdef CHOL_COMPARE
252 if (numberRows_ < 200)
253 factorizePart3(rowsDropped);
254#endif
255 factorizePart2(rowsDropped);
256 newDropped = integerParameters_[20] + numberDroppedBefore;
257 largest = doubleParameters_[3];
258 smallest = doubleParameters_[4];
259 if (model_->messageHandler()->logLevel() > 1)
260 std::cout << "Cholesky - largest " << largest << " smallest " << smallest << std::endl;
261 choleskyCondition_ = largest / smallest;
262 /*drop fresh makes some formADAT easier*/
263 if (newDropped || numberRowsDropped_) {
264 newDropped = 0;
265 for (int i = 0; i < numberRows_; i++) {
266 char dropped = static_cast<char>(rowsDropped[i]);
267 rowsDropped_[i] = dropped;
268 if (dropped == 2) {
269 /*dropped this time*/
270 rowsDropped[newDropped++] = i;
271 rowsDropped_[i] = 0;
272 }
273 }
274 numberRowsDropped_ = newDropped;
275 newDropped = -(2 + newDropped);
276 }
277 } else {
278 /* KKT*/
279 CoinPackedMatrix * quadratic = NULL;
280 ClpQuadraticObjective * quadraticObj =
281 (dynamic_cast< ClpQuadraticObjective*>(model_->objectiveAsObject()));
282 if (quadraticObj)
283 quadratic = quadraticObj->quadraticObjective();
284 /* matrix*/
285 int numberRowsModel = model_->numberRows();
286 int numberColumns = model_->numberColumns();
287 int numberTotal = numberColumns + numberRowsModel;
288 longDouble * work = sparseFactor_;
289 work--; /* skip diagonal*/
290 int addOffset = numberRows_ - 1;
291 int iColumn;
292 if (!quadratic) {
293 for (iColumn = 0; iColumn < numberColumns; iColumn++) {
294 CoinWorkDouble value = diagonal[iColumn];
295 if (CoinAbs(value) > 1.0e-100) {
296 value = 1.0 / value;
297 largest = CoinMax(largest, CoinAbs(value));
298 diagonal_[iColumn] = -value;
299 CoinBigIndex start = columnStart[iColumn];
300 CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn];
301 for (CoinBigIndex j = start; j < end; j++) {
302 /*choleskyRow_[numberElements]=row[j]+numberTotal;*/
303 /*sparseFactor_[numberElements++]=element[j];*/
304 work[row[j] + numberTotal] = element[j];
305 largest = CoinMax(largest, CoinAbs(element[j]));
306 }
307 } else {
308 diagonal_[iColumn] = -value;
309 }
310 addOffset--;
311 work += addOffset;
312 }
313 } else {
314 /* Quadratic*/
315 const int * columnQuadratic = quadratic->getIndices();
316 const CoinBigIndex * columnQuadraticStart = quadratic->getVectorStarts();
317 const int * columnQuadraticLength = quadratic->getVectorLengths();
318 const double * quadraticElement = quadratic->getElements();
319 for (iColumn = 0; iColumn < numberColumns; iColumn++) {
320 CoinWorkDouble value = diagonal[iColumn];
321 CoinBigIndex j;
322 if (CoinAbs(value) > 1.0e-100) {
323 value = 1.0 / value;
324 for (j = columnQuadraticStart[iColumn];
325 j < columnQuadraticStart[iColumn] + columnQuadraticLength[iColumn]; j++) {
326 int jColumn = columnQuadratic[j];
327 if (jColumn > iColumn) {
328 work[jColumn] = -quadraticElement[j];
329 } else if (iColumn == jColumn) {
330 value += quadraticElement[j];
331 }
332 }
333 largest = CoinMax(largest, CoinAbs(value));
334 diagonal_[iColumn] = -value;
335 CoinBigIndex start = columnStart[iColumn];
336 CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn];
337 for (j = start; j < end; j++) {
338 work[row[j] + numberTotal] = element[j];
339 largest = CoinMax(largest, CoinAbs(element[j]));
340 }
341 } else {
342 value = 1.0e100;
343 diagonal_[iColumn] = -value;
344 }
345 addOffset--;
346 work += addOffset;
347 }
348 }
349 /* slacks*/
350 for (iColumn = numberColumns; iColumn < numberTotal; iColumn++) {
351 CoinWorkDouble value = diagonal[iColumn];
352 if (CoinAbs(value) > 1.0e-100) {
353 value = 1.0 / value;
354 largest = CoinMax(largest, CoinAbs(value));
355 } else {
356 value = 1.0e100;
357 }
358 diagonal_[iColumn] = -value;
359 work[iColumn-numberColumns+numberTotal] = -1.0;
360 addOffset--;
361 work += addOffset;
362 }
363 /* Finish diagonal*/
364 for (iRow = 0; iRow < numberRowsModel; iRow++) {
365 diagonal_[iRow+numberTotal] = delta2;
366 }
367 /*check sizes*/
368 largest *= 1.0e-20;
369 largest = CoinMin(largest, CHOL_SMALL_VALUE);
370 doubleParameters_[10] = CoinMax(1.0e-20, largest);
371 integerParameters_[20] = 0;
372 doubleParameters_[3] = 0.0;
373 doubleParameters_[4] = COIN_DBL_MAX;
374 /* Set up LDL cutoff*/
375 integerParameters_[34] = numberTotal;
376 /* KKT*/
377 int * rowsDropped2 = new int[numberRows_];
378 CoinZeroN(rowsDropped2, numberRows_);
379#ifdef CHOL_COMPARE
380 if (numberRows_ < 200)
381 factorizePart3(rowsDropped2);
382#endif
383 factorizePart2(rowsDropped2);
384 newDropped = integerParameters_[20];
385 largest = doubleParameters_[3];
386 smallest = doubleParameters_[4];
387 if (model_->messageHandler()->logLevel() > 1)
388 COIN_DETAIL_PRINT(std::cout << "Cholesky - largest " << largest << " smallest " << smallest << std::endl);
389 choleskyCondition_ = largest / smallest;
390 /* Should save adjustments in ..R_*/
391 int n1 = 0, n2 = 0;
392 CoinWorkDouble * primalR = model_->primalR();
393 CoinWorkDouble * dualR = model_->dualR();
394 for (iRow = 0; iRow < numberTotal; iRow++) {
395 if (rowsDropped2[iRow]) {
396 n1++;
397 /*printf("row region1 %d dropped\n",iRow);*/
398 /*rowsDropped_[iRow]=1;*/
399 rowsDropped_[iRow] = 0;
400 primalR[iRow] = doubleParameters_[20];
401 } else {
402 rowsDropped_[iRow] = 0;
403 primalR[iRow] = 0.0;
404 }
405 }
406 for (; iRow < numberRows_; iRow++) {
407 if (rowsDropped2[iRow]) {
408 n2++;
409 /*printf("row region2 %d dropped\n",iRow);*/
410 /*rowsDropped_[iRow]=1;*/
411 rowsDropped_[iRow] = 0;
412 dualR[iRow-numberTotal] = doubleParameters_[34];
413 } else {
414 rowsDropped_[iRow] = 0;
415 dualR[iRow-numberTotal] = 0.0;
416 }
417 }
418 }
419 return 0;
420}
421/* Factorize - filling in rowsDropped and returning number dropped */
422void
423ClpCholeskyDense::factorizePart3( int * rowsDropped)
424{
425 int iColumn;
426 longDouble * xx = sparseFactor_;
427 longDouble * yy = diagonal_;
428 diagonal_ = sparseFactor_ + 40000;
429 sparseFactor_ = diagonal_ + numberRows_;
430 /*memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double));*/
431 CoinMemcpyN(xx, 40000, sparseFactor_);
432 CoinMemcpyN(yy, numberRows_, diagonal_);
433 int numberDropped = 0;
434 CoinWorkDouble largest = 0.0;
435 CoinWorkDouble smallest = COIN_DBL_MAX;
436 double dropValue = doubleParameters_[10];
437 int firstPositive = integerParameters_[34];
438 longDouble * work = sparseFactor_;
439 /* Allow for triangular*/
440 int addOffset = numberRows_ - 1;
441 work--;
442 for (iColumn = 0; iColumn < numberRows_; iColumn++) {
443 int iRow;
444 int addOffsetNow = numberRows_ - 1;
445 longDouble * workNow = sparseFactor_ - 1 + iColumn;
446 CoinWorkDouble diagonalValue = diagonal_[iColumn];
447 for (iRow = 0; iRow < iColumn; iRow++) {
448 double aj = *workNow;
449 addOffsetNow--;
450 workNow += addOffsetNow;
451 diagonalValue -= aj * aj * workDouble_[iRow];
452 }
453 bool dropColumn = false;
454 if (iColumn < firstPositive) {
455 /* must be negative*/
456 if (diagonalValue <= -dropValue) {
457 smallest = CoinMin(smallest, -diagonalValue);
458 largest = CoinMax(largest, -diagonalValue);
459 workDouble_[iColumn] = diagonalValue;
460 diagonalValue = 1.0 / diagonalValue;
461 } else {
462 dropColumn = true;
463 workDouble_[iColumn] = -1.0e100;
464 diagonalValue = 0.0;
465 integerParameters_[20]++;
466 }
467 } else {
468 /* must be positive*/
469 if (diagonalValue >= dropValue) {
470 smallest = CoinMin(smallest, diagonalValue);
471 largest = CoinMax(largest, diagonalValue);
472 workDouble_[iColumn] = diagonalValue;
473 diagonalValue = 1.0 / diagonalValue;
474 } else {
475 dropColumn = true;
476 workDouble_[iColumn] = 1.0e100;
477 diagonalValue = 0.0;
478 integerParameters_[20]++;
479 }
480 }
481 if (!dropColumn) {
482 diagonal_[iColumn] = diagonalValue;
483 for (iRow = iColumn + 1; iRow < numberRows_; iRow++) {
484 double value = work[iRow];
485 workNow = sparseFactor_ - 1;
486 int addOffsetNow = numberRows_ - 1;
487 for (int jColumn = 0; jColumn < iColumn; jColumn++) {
488 double aj = workNow[iColumn];
489 double multiplier = workDouble_[jColumn];
490 double ai = workNow[iRow];
491 addOffsetNow--;
492 workNow += addOffsetNow;
493 value -= aj * ai * multiplier;
494 }
495 work[iRow] = value * diagonalValue;
496 }
497 } else {
498 /* drop column*/
499 rowsDropped[iColumn] = 2;
500 numberDropped++;
501 diagonal_[iColumn] = 0.0;
502 for (iRow = iColumn + 1; iRow < numberRows_; iRow++) {
503 work[iRow] = 0.0;
504 }
505 }
506 addOffset--;
507 work += addOffset;
508 }
509 doubleParameters_[3] = largest;
510 doubleParameters_[4] = smallest;
511 integerParameters_[20] = numberDropped;
512 sparseFactor_ = xx;
513 diagonal_ = yy;
514}
515/*#define POS_DEBUG*/
516#ifdef POS_DEBUG
517static int counter = 0;
518int ClpCholeskyDense::bNumber(const longDouble * array, int &iRow, int &iCol)
519{
520 int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT;
521 longDouble * a = sparseFactor_ + BLOCKSQ * numberBlocks;
522 int k = array - a;
523 assert ((k % BLOCKSQ) == 0);
524 iCol = 0;
525 int kk = k >> BLOCKSQSHIFT;
526 /*printf("%d %d %d %d\n",k,kk,BLOCKSQ,BLOCKSQSHIFT);*/
527 k = kk;
528 while (k >= numberBlocks) {
529 iCol++;
530 k -= numberBlocks;
531 numberBlocks--;
532 }
533 iRow = iCol + k;
534 counter++;
535 if(counter > 100000)
536 exit(77);
537 return kk;
538}
539#endif
540/* Factorize - filling in rowsDropped and returning number dropped */
541void
542ClpCholeskyDense::factorizePart2( int * rowsDropped)
543{
544 int iColumn;
545 /*longDouble * xx = sparseFactor_;*/
546 /*longDouble * yy = diagonal_;*/
547 /*diagonal_ = sparseFactor_ + 40000;*/
548 /*sparseFactor_=diagonal_ + numberRows_;*/
549 /*memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double));*/
550 /*memcpy(sparseFactor_,xx,40000*sizeof(double));*/
551 /*memcpy(diagonal_,yy,numberRows_*sizeof(double));*/
552 int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT;
553 /* later align on boundary*/
554 longDouble * a = sparseFactor_ + BLOCKSQ * numberBlocks;
555 int n = numberRows_;
556 int nRound = numberRows_ & (~(BLOCK - 1));
557 /* adjust if exact*/
558 if (nRound == n)
559 nRound -= BLOCK;
560 int sizeLastBlock = n - nRound;
561 int get = n * (n - 1) / 2; /* ? as no diagonal*/
562 int block = numberBlocks * (numberBlocks + 1) / 2;
563 int ifOdd;
564 int rowLast;
565 if (sizeLastBlock != BLOCK) {
566 longDouble * aa = &a[(block-1)*BLOCKSQ];
567 rowLast = nRound - 1;
568 ifOdd = 1;
569 int put = BLOCKSQ;
570 /* do last separately*/
571 put -= (BLOCK - sizeLastBlock) * (BLOCK + 1);
572 for (iColumn = numberRows_ - 1; iColumn >= nRound; iColumn--) {
573 int put2 = put;
574 put -= BLOCK;
575 for (int iRow = numberRows_ - 1; iRow > iColumn; iRow--) {
576 aa[--put2] = sparseFactor_[--get];
577 assert (aa + put2 >= sparseFactor_ + get);
578 }
579 /* save diagonal as well*/
580 aa[--put2] = diagonal_[iColumn];
581 }
582 n = nRound;
583 block--;
584 } else {
585 /* exact fit*/
586 rowLast = numberRows_ - 1;
587 ifOdd = 0;
588 }
589 /* Now main loop*/
590 int nBlock = 0;
591 for (; n > 0; n -= BLOCK) {
592 longDouble * aa = &a[(block-1)*BLOCKSQ];
593 longDouble * aaLast = NULL;
594 int put = BLOCKSQ;
595 int putLast = 0;
596 /* see if we have small block*/
597 if (ifOdd) {
598 aaLast = &a[(block-1)*BLOCKSQ];
599 aa = aaLast - BLOCKSQ;
600 putLast = BLOCKSQ - BLOCK + sizeLastBlock;
601 }
602 for (iColumn = n - 1; iColumn >= n - BLOCK; iColumn--) {
603 if (aaLast) {
604 /* last bit*/
605 for (int iRow = numberRows_ - 1; iRow > rowLast; iRow--) {
606 aaLast[--putLast] = sparseFactor_[--get];
607 assert (aaLast + putLast >= sparseFactor_ + get);
608 }
609 putLast -= BLOCK - sizeLastBlock;
610 }
611 longDouble * aPut = aa;
612 int j = rowLast;
613 for (int jBlock = 0; jBlock <= nBlock; jBlock++) {
614 int put2 = put;
615 int last = CoinMax(j - BLOCK, iColumn);
616 for (int iRow = j; iRow > last; iRow--) {
617 aPut[--put2] = sparseFactor_[--get];
618 assert (aPut + put2 >= sparseFactor_ + get);
619 }
620 if (j - BLOCK < iColumn) {
621 /* save diagonal as well*/
622 aPut[--put2] = diagonal_[iColumn];
623 }
624 j -= BLOCK;
625 aPut -= BLOCKSQ;
626 }
627 put -= BLOCK;
628 }
629 nBlock++;
630 block -= nBlock + ifOdd;
631 }
632 ClpCholeskyDenseC info;
633 info.diagonal_ = diagonal_;
634 info.doubleParameters_[0] = doubleParameters_[10];
635 info.integerParameters_[0] = integerParameters_[34];
636#ifndef CLP_CILK
637 ClpCholeskyCfactor(&info, a, numberRows_, numberBlocks,
638 diagonal_, workDouble_, rowsDropped);
639#else
640 info.a = a;
641 info.n = numberRows_;
642 info.numberBlocks = numberBlocks;
643 info.work = workDouble_;
644 info.rowsDropped = rowsDropped;
645 info.integerParameters_[1] = model_->numberThreads();
646 ClpCholeskySpawn(&info);
647#endif
648 double largest = 0.0;
649 double smallest = COIN_DBL_MAX;
650 int numberDropped = 0;
651 for (int i = 0; i < numberRows_; i++) {
652 if (diagonal_[i]) {
653 largest = CoinMax(largest, CoinAbs(diagonal_[i]));
654 smallest = CoinMin(smallest, CoinAbs(diagonal_[i]));
655 } else {
656 numberDropped++;
657 }
658 }
659 doubleParameters_[3] = CoinMax(doubleParameters_[3], 1.0 / smallest);
660 doubleParameters_[4] = CoinMin(doubleParameters_[4], 1.0 / largest);
661 integerParameters_[20] += numberDropped;
662}
663/* Non leaf recursive factor*/
664void
665ClpCholeskyCfactor(ClpCholeskyDenseC * thisStruct, longDouble * a, int n, int numberBlocks,
666 longDouble * diagonal, longDouble * work, int * rowsDropped)
667{
668 if (n <= BLOCK) {
669 ClpCholeskyCfactorLeaf(thisStruct, a, n, diagonal, work, rowsDropped);
670 } else {
671 int nb = number_blocks((n + 1) >> 1);
672 int nThis = number_rows(nb);
673 longDouble * aother;
674 int nLeft = n - nThis;
675 int nintri = (nb * (nb + 1)) >> 1;
676 int nbelow = (numberBlocks - nb) * nb;
677 ClpCholeskyCfactor(thisStruct, a, nThis, numberBlocks, diagonal, work, rowsDropped);
678 ClpCholeskyCtriRec(thisStruct, a, nThis, a + number_entries(nb), diagonal, work, nLeft, nb, 0, numberBlocks);
679 aother = a + number_entries(nintri + nbelow);
680 ClpCholeskyCrecTri(thisStruct, a + number_entries(nb), nLeft, nThis, nb, 0, aother, diagonal, work, numberBlocks);
681 ClpCholeskyCfactor(thisStruct, aother, nLeft,
682 numberBlocks - nb, diagonal + nThis, work + nThis, rowsDropped);
683 }
684}
685/* Non leaf recursive triangle rectangle update*/
686void
687ClpCholeskyCtriRec(ClpCholeskyDenseC * thisStruct, longDouble * aTri, int nThis, longDouble * aUnder,
688 longDouble * diagonal, longDouble * work,
689 int nLeft, int iBlock, int jBlock,
690 int numberBlocks)
691{
692 if (nThis <= BLOCK && nLeft <= BLOCK) {
693 ClpCholeskyCtriRecLeaf(/*thisStruct,*/ aTri, aUnder, diagonal, work, nLeft);
694 } else if (nThis < nLeft) {
695 int nb = number_blocks((nLeft + 1) >> 1);
696 int nLeft2 = number_rows(nb);
697 ClpCholeskyCtriRec(thisStruct, aTri, nThis, aUnder, diagonal, work, nLeft2, iBlock, jBlock, numberBlocks);
698 ClpCholeskyCtriRec(thisStruct, aTri, nThis, aUnder + number_entries(nb), diagonal, work, nLeft - nLeft2,
699 iBlock + nb, jBlock, numberBlocks);
700 } else {
701 int nb = number_blocks((nThis + 1) >> 1);
702 int nThis2 = number_rows(nb);
703 longDouble * aother;
704 int kBlock = jBlock + nb;
705 int i;
706 int nintri = (nb * (nb + 1)) >> 1;
707 int nbelow = (numberBlocks - nb) * nb;
708 ClpCholeskyCtriRec(thisStruct, aTri, nThis2, aUnder, diagonal, work, nLeft, iBlock, jBlock, numberBlocks);
709 /* and rectangular update */
710 i = ((numberBlocks - jBlock) * (numberBlocks - jBlock - 1) -
711 (numberBlocks - jBlock - nb) * (numberBlocks - jBlock - nb - 1)) >> 1;
712 aother = aUnder + number_entries(i);
713 ClpCholeskyCrecRec(thisStruct, aTri + number_entries(nb), nThis - nThis2, nLeft, nThis2, aUnder, aother,
714 work, kBlock, jBlock, numberBlocks);
715 ClpCholeskyCtriRec(thisStruct, aTri + number_entries(nintri + nbelow), nThis - nThis2, aother, diagonal + nThis2,
716 work + nThis2, nLeft,
717 iBlock - nb, kBlock - nb, numberBlocks - nb);
718 }
719}
720/* Non leaf recursive rectangle triangle update*/
721void
722ClpCholeskyCrecTri(ClpCholeskyDenseC * thisStruct, longDouble * aUnder, int nTri, int nDo,
723 int iBlock, int jBlock, longDouble * aTri,
724 longDouble * diagonal, longDouble * work,
725 int numberBlocks)
726{
727 if (nTri <= BLOCK && nDo <= BLOCK) {
728 ClpCholeskyCrecTriLeaf(/*thisStruct,*/ aUnder, aTri,/*diagonal,*/work, nTri);
729 } else if (nTri < nDo) {
730 int nb = number_blocks((nDo + 1) >> 1);
731 int nDo2 = number_rows(nb);
732 longDouble * aother;
733 int i;
734 ClpCholeskyCrecTri(thisStruct, aUnder, nTri, nDo2, iBlock, jBlock, aTri, diagonal, work, numberBlocks);
735 i = ((numberBlocks - jBlock) * (numberBlocks - jBlock - 1) -
736 (numberBlocks - jBlock - nb) * (numberBlocks - jBlock - nb - 1)) >> 1;
737 aother = aUnder + number_entries(i);
738 ClpCholeskyCrecTri(thisStruct, aother, nTri, nDo - nDo2, iBlock - nb, jBlock, aTri, diagonal + nDo2,
739 work + nDo2, numberBlocks - nb);
740 } else {
741 int nb = number_blocks((nTri + 1) >> 1);
742 int nTri2 = number_rows(nb);
743 longDouble * aother;
744 int i;
745 ClpCholeskyCrecTri(thisStruct, aUnder, nTri2, nDo, iBlock, jBlock, aTri, diagonal, work, numberBlocks);
746 /* and rectangular update */
747 i = ((numberBlocks - iBlock) * (numberBlocks - iBlock + 1) -
748 (numberBlocks - iBlock - nb) * (numberBlocks - iBlock - nb + 1)) >> 1;
749 aother = aTri + number_entries(nb);
750 ClpCholeskyCrecRec(thisStruct, aUnder, nTri2, nTri - nTri2, nDo, aUnder + number_entries(nb), aother,
751 work, iBlock, jBlock, numberBlocks);
752 ClpCholeskyCrecTri(thisStruct, aUnder + number_entries(nb), nTri - nTri2, nDo, iBlock + nb, jBlock,
753 aTri + number_entries(i), diagonal, work, numberBlocks);
754 }
755}
756/* Non leaf recursive rectangle rectangle update,
757 nUnder is number of rows in iBlock,
758 nUnderK is number of rows in kBlock
759*/
760void
761ClpCholeskyCrecRec(ClpCholeskyDenseC * thisStruct, longDouble * above, int nUnder, int nUnderK,
762 int nDo, longDouble * aUnder, longDouble *aOther,
763 longDouble * work,
764 int iBlock, int jBlock,
765 int numberBlocks)
766{
767 if (nDo <= BLOCK && nUnder <= BLOCK && nUnderK <= BLOCK) {
768 assert (nDo == BLOCK && nUnder == BLOCK);
769 ClpCholeskyCrecRecLeaf(/*thisStruct,*/ above , aUnder , aOther, work, nUnderK);
770 } else if (nDo <= nUnderK && nUnder <= nUnderK) {
771 int nb = number_blocks((nUnderK + 1) >> 1);
772 int nUnder2 = number_rows(nb);
773 ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnder2, nDo, aUnder, aOther, work,
774 iBlock, jBlock, numberBlocks);
775 ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnderK - nUnder2, nDo, aUnder + number_entries(nb),
776 aOther + number_entries(nb), work, iBlock, jBlock, numberBlocks);
777 } else if (nUnderK <= nDo && nUnder <= nDo) {
778 int nb = number_blocks((nDo + 1) >> 1);
779 int nDo2 = number_rows(nb);
780 int i;
781 ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnderK, nDo2, aUnder, aOther, work,
782 iBlock, jBlock, numberBlocks);
783 i = ((numberBlocks - jBlock) * (numberBlocks - jBlock - 1) -
784 (numberBlocks - jBlock - nb) * (numberBlocks - jBlock - nb - 1)) >> 1;
785 ClpCholeskyCrecRec(thisStruct, above + number_entries(i), nUnder, nUnderK, nDo - nDo2,
786 aUnder + number_entries(i),
787 aOther, work + nDo2,
788 iBlock - nb, jBlock, numberBlocks - nb);
789 } else {
790 int nb = number_blocks((nUnder + 1) >> 1);
791 int nUnder2 = number_rows(nb);
792 int i;
793 ClpCholeskyCrecRec(thisStruct, above, nUnder2, nUnderK, nDo, aUnder, aOther, work,
794 iBlock, jBlock, numberBlocks);
795 i = ((numberBlocks - iBlock) * (numberBlocks - iBlock - 1) -
796 (numberBlocks - iBlock - nb) * (numberBlocks - iBlock - nb - 1)) >> 1;
797 ClpCholeskyCrecRec(thisStruct, above + number_entries(nb), nUnder - nUnder2, nUnderK, nDo, aUnder,
798 aOther + number_entries(i), work, iBlock + nb, jBlock, numberBlocks);
799 }
800}
801/* Leaf recursive factor*/
802void
803ClpCholeskyCfactorLeaf(ClpCholeskyDenseC * thisStruct, longDouble * a, int n,
804 longDouble * diagonal, longDouble * work, int * rowsDropped)
805{
806 double dropValue = thisStruct->doubleParameters_[0];
807 int firstPositive = thisStruct->integerParameters_[0];
808 int rowOffset = static_cast<int>(diagonal - thisStruct->diagonal_);
809 int i, j, k;
810 CoinWorkDouble t00, temp1;
811 longDouble * aa;
812 aa = a - BLOCK;
813 for (j = 0; j < n; j ++) {
814 bool dropColumn;
815 CoinWorkDouble useT00;
816 aa += BLOCK;
817 t00 = aa[j];
818 for (k = 0; k < j; ++k) {
819 CoinWorkDouble multiplier = work[k];
820 t00 -= a[j + k * BLOCK] * a[j + k * BLOCK] * multiplier;
821 }
822 dropColumn = false;
823 useT00 = t00;
824 if (j + rowOffset < firstPositive) {
825 /* must be negative*/
826 if (t00 <= -dropValue) {
827 /*aa[j]=t00;*/
828 t00 = 1.0 / t00;
829 } else {
830 dropColumn = true;
831 /*aa[j]=-1.0e100;*/
832 useT00 = -1.0e-100;
833 t00 = 0.0;
834 }
835 } else {
836 /* must be positive*/
837 if (t00 >= dropValue) {
838 /*aa[j]=t00;*/
839 t00 = 1.0 / t00;
840 } else {
841 dropColumn = true;
842 /*aa[j]=1.0e100;*/
843 useT00 = 1.0e-100;
844 t00 = 0.0;
845 }
846 }
847 if (!dropColumn) {
848 diagonal[j] = t00;
849 work[j] = useT00;
850 temp1 = t00;
851 for (i = j + 1; i < n; i++) {
852 t00 = aa[i];
853 for (k = 0; k < j; ++k) {
854 CoinWorkDouble multiplier = work[k];
855 t00 -= a[i + k * BLOCK] * a[j + k * BLOCK] * multiplier;
856 }
857 aa[i] = t00 * temp1;
858 }
859 } else {
860 /* drop column*/
861 rowsDropped[j+rowOffset] = 2;
862 diagonal[j] = 0.0;
863 /*aa[j]=1.0e100;*/
864 work[j] = 1.0e100;
865 for (i = j + 1; i < n; i++) {
866 aa[i] = 0.0;
867 }
868 }
869 }
870}
871/* Leaf recursive triangle rectangle update*/
872void
873ClpCholeskyCtriRecLeaf(/*ClpCholeskyDenseC * thisStruct,*/ longDouble * aTri, longDouble * aUnder, longDouble * diagonal, longDouble * work,
874 int nUnder)
875{
876#ifdef POS_DEBUG
877 int iru, icu;
878 int iu = bNumber(aUnder, iru, icu);
879 int irt, ict;
880 int it = bNumber(aTri, irt, ict);
881 /*printf("%d %d\n",iu,it);*/
882 printf("trirecleaf under (%d,%d), tri (%d,%d)\n",
883 iru, icu, irt, ict);
884 assert (diagonal == thisStruct->diagonal_ + ict * BLOCK);
885#endif
886 int j;
887 longDouble * aa;
888#ifdef BLOCKUNROLL
889 if (nUnder == BLOCK) {
890 aa = aTri - 2 * BLOCK;
891 for (j = 0; j < BLOCK; j += 2) {
892 int i;
893 CoinWorkDouble temp0 = diagonal[j];
894 CoinWorkDouble temp1 = diagonal[j+1];
895 aa += 2 * BLOCK;
896 for ( i = 0; i < BLOCK; i += 2) {
897 CoinWorkDouble at1;
898 CoinWorkDouble t00 = aUnder[i+j*BLOCK];
899 CoinWorkDouble t10 = aUnder[i+ BLOCK + j*BLOCK];
900 CoinWorkDouble t01 = aUnder[i+1+j*BLOCK];
901 CoinWorkDouble t11 = aUnder[i+1+ BLOCK + j*BLOCK];
902 int k;
903 for (k = 0; k < j; ++k) {
904 CoinWorkDouble multiplier = work[k];
905 CoinWorkDouble au0 = aUnder[i + k * BLOCK] * multiplier;
906 CoinWorkDouble au1 = aUnder[i + 1 + k * BLOCK] * multiplier;
907 CoinWorkDouble at0 = aTri[j + k * BLOCK];
908 CoinWorkDouble at1 = aTri[j + 1 + k * BLOCK];
909 t00 -= au0 * at0;
910 t10 -= au0 * at1;
911 t01 -= au1 * at0;
912 t11 -= au1 * at1;
913 }
914 t00 *= temp0;
915 at1 = aTri[j + 1 + j*BLOCK] * work[j];
916 t10 -= t00 * at1;
917 t01 *= temp0;
918 t11 -= t01 * at1;
919 aUnder[i+j*BLOCK] = t00;
920 aUnder[i+1+j*BLOCK] = t01;
921 aUnder[i+ BLOCK + j*BLOCK] = t10 * temp1;
922 aUnder[i+1+ BLOCK + j*BLOCK] = t11 * temp1;
923 }
924 }
925 } else {
926#endif
927 aa = aTri - BLOCK;
928 for (j = 0; j < BLOCK; j ++) {
929 int i;
930 CoinWorkDouble temp1 = diagonal[j];
931 aa += BLOCK;
932 for (i = 0; i < nUnder; i++) {
933 int k;
934 CoinWorkDouble t00 = aUnder[i+j*BLOCK];
935 for ( k = 0; k < j; ++k) {
936 CoinWorkDouble multiplier = work[k];
937 t00 -= aUnder[i + k * BLOCK] * aTri[j + k * BLOCK] * multiplier;
938 }
939 aUnder[i+j*BLOCK] = t00 * temp1;
940 }
941 }
942#ifdef BLOCKUNROLL
943 }
944#endif
945}
946/* Leaf recursive rectangle triangle update*/
947void ClpCholeskyCrecTriLeaf(/*ClpCholeskyDenseC * thisStruct,*/ longDouble * aUnder, longDouble * aTri,
948 /*longDouble * diagonal,*/ longDouble * work, int nUnder)
949{
950#ifdef POS_DEBUG
951 int iru, icu;
952 int iu = bNumber(aUnder, iru, icu);
953 int irt, ict;
954 int it = bNumber(aTri, irt, ict);
955 /*printf("%d %d\n",iu,it);*/
956 printf("rectrileaf under (%d,%d), tri (%d,%d)\n",
957 iru, icu, irt, ict);
958 assert (diagonal == thisStruct->diagonal_ + icu * BLOCK);
959#endif
960 int i, j, k;
961 CoinWorkDouble t00;
962 longDouble * aa;
963#ifdef BLOCKUNROLL
964 if (nUnder == BLOCK) {
965 longDouble * aUnder2 = aUnder - 2;
966 aa = aTri - 2 * BLOCK;
967 for (j = 0; j < BLOCK; j += 2) {
968 CoinWorkDouble t00, t01, t10, t11;
969 aa += 2 * BLOCK;
970 aUnder2 += 2;
971 t00 = aa[j];
972 t01 = aa[j+1];
973 t10 = aa[j+1+BLOCK];
974 for (k = 0; k < BLOCK; ++k) {
975 CoinWorkDouble multiplier = work[k];
976 CoinWorkDouble a0 = aUnder2[k * BLOCK];
977 CoinWorkDouble a1 = aUnder2[1 + k * BLOCK];
978 CoinWorkDouble x0 = a0 * multiplier;
979 CoinWorkDouble x1 = a1 * multiplier;
980 t00 -= a0 * x0;
981 t01 -= a1 * x0;
982 t10 -= a1 * x1;
983 }
984 aa[j] = t00;
985 aa[j+1] = t01;
986 aa[j+1+BLOCK] = t10;
987 for (i = j + 2; i < BLOCK; i += 2) {
988 t00 = aa[i];
989 t01 = aa[i+BLOCK];
990 t10 = aa[i+1];
991 t11 = aa[i+1+BLOCK];
992 for (k = 0; k < BLOCK; ++k) {
993 CoinWorkDouble multiplier = work[k];
994 CoinWorkDouble a0 = aUnder2[k * BLOCK] * multiplier;
995 CoinWorkDouble a1 = aUnder2[1 + k * BLOCK] * multiplier;
996 t00 -= aUnder[i + k * BLOCK] * a0;
997 t01 -= aUnder[i + k * BLOCK] * a1;
998 t10 -= aUnder[i + 1 + k * BLOCK] * a0;
999 t11 -= aUnder[i + 1 + k * BLOCK] * a1;
1000 }
1001 aa[i] = t00;
1002 aa[i+BLOCK] = t01;
1003 aa[i+1] = t10;
1004 aa[i+1+BLOCK] = t11;
1005 }
1006 }
1007 } else {
1008#endif
1009 aa = aTri - BLOCK;
1010 for (j = 0; j < nUnder; j ++) {
1011 aa += BLOCK;
1012 for (i = j; i < nUnder; i++) {
1013 t00 = aa[i];
1014 for (k = 0; k < BLOCK; ++k) {
1015 CoinWorkDouble multiplier = work[k];
1016 t00 -= aUnder[i + k * BLOCK] * aUnder[j + k * BLOCK] * multiplier;
1017 }
1018 aa[i] = t00;
1019 }
1020 }
1021#ifdef BLOCKUNROLL
1022 }
1023#endif
1024}
1025/* Leaf recursive rectangle rectangle update,
1026 nUnder is number of rows in iBlock,
1027 nUnderK is number of rows in kBlock
1028*/
1029void
1030ClpCholeskyCrecRecLeaf(/*ClpCholeskyDenseC * thisStruct,*/
1031 const longDouble * COIN_RESTRICT above,
1032 const longDouble * COIN_RESTRICT aUnder,
1033 longDouble * COIN_RESTRICT aOther,
1034 const longDouble * COIN_RESTRICT work,
1035 int nUnder)
1036{
1037#ifdef POS_DEBUG
1038 int ira, ica;
1039 int ia = bNumber(above, ira, ica);
1040 int iru, icu;
1041 int iu = bNumber(aUnder, iru, icu);
1042 int iro, ico;
1043 int io = bNumber(aOther, iro, ico);
1044 /*printf("%d %d %d\n",ia,iu,io);*/
1045 printf("recrecleaf above (%d,%d), under (%d,%d), other (%d,%d)\n",
1046 ira, ica, iru, icu, iro, ico);
1047#endif
1048 int i, j, k;
1049 longDouble * aa;
1050#ifdef BLOCKUNROLL
1051 aa = aOther - 4 * BLOCK;
1052 if (nUnder == BLOCK) {
1053 /*#define INTEL*/
1054#ifdef INTEL
1055 aa += 2 * BLOCK;
1056 for (j = 0; j < BLOCK; j += 2) {
1057 aa += 2 * BLOCK;
1058 for (i = 0; i < BLOCK; i += 2) {
1059 CoinWorkDouble t00 = aa[i+0*BLOCK];
1060 CoinWorkDouble t10 = aa[i+1*BLOCK];
1061 CoinWorkDouble t01 = aa[i+1+0*BLOCK];
1062 CoinWorkDouble t11 = aa[i+1+1*BLOCK];
1063 for (k = 0; k < BLOCK; k++) {
1064 CoinWorkDouble multiplier = work[k];
1065 CoinWorkDouble a00 = aUnder[i+k*BLOCK] * multiplier;
1066 CoinWorkDouble a01 = aUnder[i+1+k*BLOCK] * multiplier;
1067 t00 -= a00 * above[j + 0 + k * BLOCK];
1068 t10 -= a00 * above[j + 1 + k * BLOCK];
1069 t01 -= a01 * above[j + 0 + k * BLOCK];
1070 t11 -= a01 * above[j + 1 + k * BLOCK];
1071 }
1072 aa[i+0*BLOCK] = t00;
1073 aa[i+1*BLOCK] = t10;
1074 aa[i+1+0*BLOCK] = t01;
1075 aa[i+1+1*BLOCK] = t11;
1076 }
1077 }
1078#else
1079 for (j = 0; j < BLOCK; j += 4) {
1080 aa += 4 * BLOCK;
1081 for (i = 0; i < BLOCK; i += 4) {
1082 CoinWorkDouble t00 = aa[i+0+0*BLOCK];
1083 CoinWorkDouble t10 = aa[i+0+1*BLOCK];
1084 CoinWorkDouble t20 = aa[i+0+2*BLOCK];
1085 CoinWorkDouble t30 = aa[i+0+3*BLOCK];
1086 CoinWorkDouble t01 = aa[i+1+0*BLOCK];
1087 CoinWorkDouble t11 = aa[i+1+1*BLOCK];
1088 CoinWorkDouble t21 = aa[i+1+2*BLOCK];
1089 CoinWorkDouble t31 = aa[i+1+3*BLOCK];
1090 CoinWorkDouble t02 = aa[i+2+0*BLOCK];
1091 CoinWorkDouble t12 = aa[i+2+1*BLOCK];
1092 CoinWorkDouble t22 = aa[i+2+2*BLOCK];
1093 CoinWorkDouble t32 = aa[i+2+3*BLOCK];
1094 CoinWorkDouble t03 = aa[i+3+0*BLOCK];
1095 CoinWorkDouble t13 = aa[i+3+1*BLOCK];
1096 CoinWorkDouble t23 = aa[i+3+2*BLOCK];
1097 CoinWorkDouble t33 = aa[i+3+3*BLOCK];
1098 const longDouble * COIN_RESTRICT aUnderNow = aUnder + i;
1099 const longDouble * COIN_RESTRICT aboveNow = above + j;
1100 for (k = 0; k < BLOCK; k++) {
1101 CoinWorkDouble multiplier = work[k];
1102 CoinWorkDouble a00 = aUnderNow[0] * multiplier;
1103 CoinWorkDouble a01 = aUnderNow[1] * multiplier;
1104 CoinWorkDouble a02 = aUnderNow[2] * multiplier;
1105 CoinWorkDouble a03 = aUnderNow[3] * multiplier;
1106 t00 -= a00 * aboveNow[0];
1107 t10 -= a00 * aboveNow[1];
1108 t20 -= a00 * aboveNow[2];
1109 t30 -= a00 * aboveNow[3];
1110 t01 -= a01 * aboveNow[0];
1111 t11 -= a01 * aboveNow[1];
1112 t21 -= a01 * aboveNow[2];
1113 t31 -= a01 * aboveNow[3];
1114 t02 -= a02 * aboveNow[0];
1115 t12 -= a02 * aboveNow[1];
1116 t22 -= a02 * aboveNow[2];
1117 t32 -= a02 * aboveNow[3];
1118 t03 -= a03 * aboveNow[0];
1119 t13 -= a03 * aboveNow[1];
1120 t23 -= a03 * aboveNow[2];
1121 t33 -= a03 * aboveNow[3];
1122 aUnderNow += BLOCK;
1123 aboveNow += BLOCK;
1124 }
1125 aa[i+0+0*BLOCK] = t00;
1126 aa[i+0+1*BLOCK] = t10;
1127 aa[i+0+2*BLOCK] = t20;
1128 aa[i+0+3*BLOCK] = t30;
1129 aa[i+1+0*BLOCK] = t01;
1130 aa[i+1+1*BLOCK] = t11;
1131 aa[i+1+2*BLOCK] = t21;
1132 aa[i+1+3*BLOCK] = t31;
1133 aa[i+2+0*BLOCK] = t02;
1134 aa[i+2+1*BLOCK] = t12;
1135 aa[i+2+2*BLOCK] = t22;
1136 aa[i+2+3*BLOCK] = t32;
1137 aa[i+3+0*BLOCK] = t03;
1138 aa[i+3+1*BLOCK] = t13;
1139 aa[i+3+2*BLOCK] = t23;
1140 aa[i+3+3*BLOCK] = t33;
1141 }
1142 }
1143#endif
1144 } else {
1145 int odd = nUnder & 1;
1146 int n = nUnder - odd;
1147 for (j = 0; j < BLOCK; j += 4) {
1148 aa += 4 * BLOCK;
1149 for (i = 0; i < n; i += 2) {
1150 CoinWorkDouble t00 = aa[i+0*BLOCK];
1151 CoinWorkDouble t10 = aa[i+1*BLOCK];
1152 CoinWorkDouble t20 = aa[i+2*BLOCK];
1153 CoinWorkDouble t30 = aa[i+3*BLOCK];
1154 CoinWorkDouble t01 = aa[i+1+0*BLOCK];
1155 CoinWorkDouble t11 = aa[i+1+1*BLOCK];
1156 CoinWorkDouble t21 = aa[i+1+2*BLOCK];
1157 CoinWorkDouble t31 = aa[i+1+3*BLOCK];
1158 const longDouble * COIN_RESTRICT aUnderNow = aUnder + i;
1159 const longDouble * COIN_RESTRICT aboveNow = above + j;
1160 for (k = 0; k < BLOCK; k++) {
1161 CoinWorkDouble multiplier = work[k];
1162 CoinWorkDouble a00 = aUnderNow[0] * multiplier;
1163 CoinWorkDouble a01 = aUnderNow[1] * multiplier;
1164 t00 -= a00 * aboveNow[0];
1165 t10 -= a00 * aboveNow[1];
1166 t20 -= a00 * aboveNow[2];
1167 t30 -= a00 * aboveNow[3];
1168 t01 -= a01 * aboveNow[0];
1169 t11 -= a01 * aboveNow[1];
1170 t21 -= a01 * aboveNow[2];
1171 t31 -= a01 * aboveNow[3];
1172 aUnderNow += BLOCK;
1173 aboveNow += BLOCK;
1174 }
1175 aa[i+0*BLOCK] = t00;
1176 aa[i+1*BLOCK] = t10;
1177 aa[i+2*BLOCK] = t20;
1178 aa[i+3*BLOCK] = t30;
1179 aa[i+1+0*BLOCK] = t01;
1180 aa[i+1+1*BLOCK] = t11;
1181 aa[i+1+2*BLOCK] = t21;
1182 aa[i+1+3*BLOCK] = t31;
1183 }
1184 if (odd) {
1185 CoinWorkDouble t0 = aa[n+0*BLOCK];
1186 CoinWorkDouble t1 = aa[n+1*BLOCK];
1187 CoinWorkDouble t2 = aa[n+2*BLOCK];
1188 CoinWorkDouble t3 = aa[n+3*BLOCK];
1189 CoinWorkDouble a0;
1190 for (k = 0; k < BLOCK; k++) {
1191 a0 = aUnder[n+k*BLOCK] * work[k];
1192 t0 -= a0 * above[j + 0 + k * BLOCK];
1193 t1 -= a0 * above[j + 1 + k * BLOCK];
1194 t2 -= a0 * above[j + 2 + k * BLOCK];
1195 t3 -= a0 * above[j + 3 + k * BLOCK];
1196 }
1197 aa[n+0*BLOCK] = t0;
1198 aa[n+1*BLOCK] = t1;
1199 aa[n+2*BLOCK] = t2;
1200 aa[n+3*BLOCK] = t3;
1201 }
1202 }
1203 }
1204#else
1205 aa = aOther - BLOCK;
1206 for (j = 0; j < BLOCK; j ++) {
1207 aa += BLOCK;
1208 for (i = 0; i < nUnder; i++) {
1209 CoinWorkDouble t00 = aa[i+0*BLOCK];
1210 for (k = 0; k < BLOCK; k++) {
1211 CoinWorkDouble a00 = aUnder[i+k*BLOCK] * work[k];
1212 t00 -= a00 * above[j + k * BLOCK];
1213 }
1214 aa[i] = t00;
1215 }
1216 }
1217#endif
1218}
1219/* Uses factorization to solve. */
1220void
1221ClpCholeskyDense::solve (CoinWorkDouble * region)
1222{
1223#ifdef CHOL_COMPARE
1224 double * region2 = NULL;
1225 if (numberRows_ < 200) {
1226 longDouble * xx = sparseFactor_;
1227 longDouble * yy = diagonal_;
1228 diagonal_ = sparseFactor_ + 40000;
1229 sparseFactor_ = diagonal_ + numberRows_;
1230 region2 = ClpCopyOfArray(region, numberRows_);
1231 int iRow, iColumn;
1232 int addOffset = numberRows_ - 1;
1233 longDouble * work = sparseFactor_ - 1;
1234 for (iColumn = 0; iColumn < numberRows_; iColumn++) {
1235 double value = region2[iColumn];
1236 for (iRow = iColumn + 1; iRow < numberRows_; iRow++)
1237 region2[iRow] -= value * work[iRow];
1238 addOffset--;
1239 work += addOffset;
1240 }
1241 for (iColumn = numberRows_ - 1; iColumn >= 0; iColumn--) {
1242 double value = region2[iColumn] * diagonal_[iColumn];
1243 work -= addOffset;
1244 addOffset++;
1245 for (iRow = iColumn + 1; iRow < numberRows_; iRow++)
1246 value -= region2[iRow] * work[iRow];
1247 region2[iColumn] = value;
1248 }
1249 sparseFactor_ = xx;
1250 diagonal_ = yy;
1251 }
1252#endif
1253 /*longDouble * xx = sparseFactor_;*/
1254 /*longDouble * yy = diagonal_;*/
1255 /*diagonal_ = sparseFactor_ + 40000;*/
1256 /*sparseFactor_=diagonal_ + numberRows_;*/
1257 int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT;
1258 /* later align on boundary*/
1259 longDouble * a = sparseFactor_ + BLOCKSQ * numberBlocks;
1260 int iBlock;
1261 longDouble * aa = a;
1262 int iColumn;
1263 for (iBlock = 0; iBlock < numberBlocks; iBlock++) {
1264 int nChunk;
1265 int jBlock;
1266 int iDo = iBlock * BLOCK;
1267 int base = iDo;
1268 if (iDo + BLOCK > numberRows_) {
1269 nChunk = numberRows_ - iDo;
1270 } else {
1271 nChunk = BLOCK;
1272 }
1273 solveF1(aa, nChunk, region + iDo);
1274 for (jBlock = iBlock + 1; jBlock < numberBlocks; jBlock++) {
1275 base += BLOCK;
1276 aa += BLOCKSQ;
1277 if (base + BLOCK > numberRows_) {
1278 nChunk = numberRows_ - base;
1279 } else {
1280 nChunk = BLOCK;
1281 }
1282 solveF2(aa, nChunk, region + iDo, region + base);
1283 }
1284 aa += BLOCKSQ;
1285 }
1286 /* do diagonal outside*/
1287 for (iColumn = 0; iColumn < numberRows_; iColumn++)
1288 region[iColumn] *= diagonal_[iColumn];
1289 int offset = ((numberBlocks * (numberBlocks + 1)) >> 1);
1290 aa = a + number_entries(offset - 1);
1291 int lBase = (numberBlocks - 1) * BLOCK;
1292 for (iBlock = numberBlocks - 1; iBlock >= 0; iBlock--) {
1293 int nChunk;
1294 int jBlock;
1295 int triBase = iBlock * BLOCK;
1296 int iBase = lBase;
1297 for (jBlock = iBlock + 1; jBlock < numberBlocks; jBlock++) {
1298 if (iBase + BLOCK > numberRows_) {
1299 nChunk = numberRows_ - iBase;
1300 } else {
1301 nChunk = BLOCK;
1302 }
1303 solveB2(aa, nChunk, region + triBase, region + iBase);
1304 iBase -= BLOCK;
1305 aa -= BLOCKSQ;
1306 }
1307 if (triBase + BLOCK > numberRows_) {
1308 nChunk = numberRows_ - triBase;
1309 } else {
1310 nChunk = BLOCK;
1311 }
1312 solveB1(aa, nChunk, region + triBase);
1313 aa -= BLOCKSQ;
1314 }
1315#ifdef CHOL_COMPARE
1316 if (numberRows_ < 200) {
1317 for (int i = 0; i < numberRows_; i++) {
1318 assert(CoinAbs(region[i] - region2[i]) < 1.0e-3);
1319 }
1320 delete [] region2;
1321 }
1322#endif
1323}
1324/* Forward part of solve 1*/
1325void
1326ClpCholeskyDense::solveF1(longDouble * a, int n, CoinWorkDouble * region)
1327{
1328 int j, k;
1329 CoinWorkDouble t00;
1330 for (j = 0; j < n; j ++) {
1331 t00 = region[j];
1332 for (k = 0; k < j; ++k) {
1333 t00 -= region[k] * a[j + k * BLOCK];
1334 }
1335 /*t00*=a[j + j * BLOCK];*/
1336 region[j] = t00;
1337 }
1338}
1339/* Forward part of solve 2*/
1340void
1341ClpCholeskyDense::solveF2(longDouble * a, int n, CoinWorkDouble * region, CoinWorkDouble * region2)
1342{
1343 int j, k;
1344#ifdef BLOCKUNROLL
1345 if (n == BLOCK) {
1346 for (k = 0; k < BLOCK; k += 4) {
1347 CoinWorkDouble t0 = region2[0];
1348 CoinWorkDouble t1 = region2[1];
1349 CoinWorkDouble t2 = region2[2];
1350 CoinWorkDouble t3 = region2[3];
1351 t0 -= region[0] * a[0 + 0 * BLOCK];
1352 t1 -= region[0] * a[1 + 0 * BLOCK];
1353 t2 -= region[0] * a[2 + 0 * BLOCK];
1354 t3 -= region[0] * a[3 + 0 * BLOCK];
1355
1356 t0 -= region[1] * a[0 + 1 * BLOCK];
1357 t1 -= region[1] * a[1 + 1 * BLOCK];
1358 t2 -= region[1] * a[2 + 1 * BLOCK];
1359 t3 -= region[1] * a[3 + 1 * BLOCK];
1360
1361 t0 -= region[2] * a[0 + 2 * BLOCK];
1362 t1 -= region[2] * a[1 + 2 * BLOCK];
1363 t2 -= region[2] * a[2 + 2 * BLOCK];
1364 t3 -= region[2] * a[3 + 2 * BLOCK];
1365
1366 t0 -= region[3] * a[0 + 3 * BLOCK];
1367 t1 -= region[3] * a[1 + 3 * BLOCK];
1368 t2 -= region[3] * a[2 + 3 * BLOCK];
1369 t3 -= region[3] * a[3 + 3 * BLOCK];
1370
1371 t0 -= region[4] * a[0 + 4 * BLOCK];
1372 t1 -= region[4] * a[1 + 4 * BLOCK];
1373 t2 -= region[4] * a[2 + 4 * BLOCK];
1374 t3 -= region[4] * a[3 + 4 * BLOCK];
1375
1376 t0 -= region[5] * a[0 + 5 * BLOCK];
1377 t1 -= region[5] * a[1 + 5 * BLOCK];
1378 t2 -= region[5] * a[2 + 5 * BLOCK];
1379 t3 -= region[5] * a[3 + 5 * BLOCK];
1380
1381 t0 -= region[6] * a[0 + 6 * BLOCK];
1382 t1 -= region[6] * a[1 + 6 * BLOCK];
1383 t2 -= region[6] * a[2 + 6 * BLOCK];
1384 t3 -= region[6] * a[3 + 6 * BLOCK];
1385
1386 t0 -= region[7] * a[0 + 7 * BLOCK];
1387 t1 -= region[7] * a[1 + 7 * BLOCK];
1388 t2 -= region[7] * a[2 + 7 * BLOCK];
1389 t3 -= region[7] * a[3 + 7 * BLOCK];
1390#if BLOCK>8
1391 t0 -= region[8] * a[0 + 8 * BLOCK];
1392 t1 -= region[8] * a[1 + 8 * BLOCK];
1393 t2 -= region[8] * a[2 + 8 * BLOCK];
1394 t3 -= region[8] * a[3 + 8 * BLOCK];
1395
1396 t0 -= region[9] * a[0 + 9 * BLOCK];
1397 t1 -= region[9] * a[1 + 9 * BLOCK];
1398 t2 -= region[9] * a[2 + 9 * BLOCK];
1399 t3 -= region[9] * a[3 + 9 * BLOCK];
1400
1401 t0 -= region[10] * a[0 + 10 * BLOCK];
1402 t1 -= region[10] * a[1 + 10 * BLOCK];
1403 t2 -= region[10] * a[2 + 10 * BLOCK];
1404 t3 -= region[10] * a[3 + 10 * BLOCK];
1405
1406 t0 -= region[11] * a[0 + 11 * BLOCK];
1407 t1 -= region[11] * a[1 + 11 * BLOCK];
1408 t2 -= region[11] * a[2 + 11 * BLOCK];
1409 t3 -= region[11] * a[3 + 11 * BLOCK];
1410
1411 t0 -= region[12] * a[0 + 12 * BLOCK];
1412 t1 -= region[12] * a[1 + 12 * BLOCK];
1413 t2 -= region[12] * a[2 + 12 * BLOCK];
1414 t3 -= region[12] * a[3 + 12 * BLOCK];
1415
1416 t0 -= region[13] * a[0 + 13 * BLOCK];
1417 t1 -= region[13] * a[1 + 13 * BLOCK];
1418 t2 -= region[13] * a[2 + 13 * BLOCK];
1419 t3 -= region[13] * a[3 + 13 * BLOCK];
1420
1421 t0 -= region[14] * a[0 + 14 * BLOCK];
1422 t1 -= region[14] * a[1 + 14 * BLOCK];
1423 t2 -= region[14] * a[2 + 14 * BLOCK];
1424 t3 -= region[14] * a[3 + 14 * BLOCK];
1425
1426 t0 -= region[15] * a[0 + 15 * BLOCK];
1427 t1 -= region[15] * a[1 + 15 * BLOCK];
1428 t2 -= region[15] * a[2 + 15 * BLOCK];
1429 t3 -= region[15] * a[3 + 15 * BLOCK];
1430#endif
1431 region2[0] = t0;
1432 region2[1] = t1;
1433 region2[2] = t2;
1434 region2[3] = t3;
1435 region2 += 4;
1436 a += 4;
1437 }
1438 } else {
1439#endif
1440 for (k = 0; k < n; ++k) {
1441 CoinWorkDouble t00 = region2[k];
1442 for (j = 0; j < BLOCK; j ++) {
1443 t00 -= region[j] * a[k + j * BLOCK];
1444 }
1445 region2[k] = t00;
1446 }
1447#ifdef BLOCKUNROLL
1448 }
1449#endif
1450}
1451/* Backward part of solve 1*/
1452void
1453ClpCholeskyDense::solveB1(longDouble * a, int n, CoinWorkDouble * region)
1454{
1455 int j, k;
1456 CoinWorkDouble t00;
1457 for (j = n - 1; j >= 0; j --) {
1458 t00 = region[j];
1459 for (k = j + 1; k < n; ++k) {
1460 t00 -= region[k] * a[k + j * BLOCK];
1461 }
1462 /*t00*=a[j + j * BLOCK];*/
1463 region[j] = t00;
1464 }
1465}
1466/* Backward part of solve 2*/
1467void
1468ClpCholeskyDense::solveB2(longDouble * a, int n, CoinWorkDouble * region, CoinWorkDouble * region2)
1469{
1470 int j, k;
1471#ifdef BLOCKUNROLL
1472 if (n == BLOCK) {
1473 for (j = 0; j < BLOCK; j += 4) {
1474 CoinWorkDouble t0 = region[0];
1475 CoinWorkDouble t1 = region[1];
1476 CoinWorkDouble t2 = region[2];
1477 CoinWorkDouble t3 = region[3];
1478 t0 -= region2[0] * a[0 + 0*BLOCK];
1479 t1 -= region2[0] * a[0 + 1*BLOCK];
1480 t2 -= region2[0] * a[0 + 2*BLOCK];
1481 t3 -= region2[0] * a[0 + 3*BLOCK];
1482
1483 t0 -= region2[1] * a[1 + 0*BLOCK];
1484 t1 -= region2[1] * a[1 + 1*BLOCK];
1485 t2 -= region2[1] * a[1 + 2*BLOCK];
1486 t3 -= region2[1] * a[1 + 3*BLOCK];
1487
1488 t0 -= region2[2] * a[2 + 0*BLOCK];
1489 t1 -= region2[2] * a[2 + 1*BLOCK];
1490 t2 -= region2[2] * a[2 + 2*BLOCK];
1491 t3 -= region2[2] * a[2 + 3*BLOCK];
1492
1493 t0 -= region2[3] * a[3 + 0*BLOCK];
1494 t1 -= region2[3] * a[3 + 1*BLOCK];
1495 t2 -= region2[3] * a[3 + 2*BLOCK];
1496 t3 -= region2[3] * a[3 + 3*BLOCK];
1497
1498 t0 -= region2[4] * a[4 + 0*BLOCK];
1499 t1 -= region2[4] * a[4 + 1*BLOCK];
1500 t2 -= region2[4] * a[4 + 2*BLOCK];
1501 t3 -= region2[4] * a[4 + 3*BLOCK];
1502
1503 t0 -= region2[5] * a[5 + 0*BLOCK];
1504 t1 -= region2[5] * a[5 + 1*BLOCK];
1505 t2 -= region2[5] * a[5 + 2*BLOCK];
1506 t3 -= region2[5] * a[5 + 3*BLOCK];
1507
1508 t0 -= region2[6] * a[6 + 0*BLOCK];
1509 t1 -= region2[6] * a[6 + 1*BLOCK];
1510 t2 -= region2[6] * a[6 + 2*BLOCK];
1511 t3 -= region2[6] * a[6 + 3*BLOCK];
1512
1513 t0 -= region2[7] * a[7 + 0*BLOCK];
1514 t1 -= region2[7] * a[7 + 1*BLOCK];
1515 t2 -= region2[7] * a[7 + 2*BLOCK];
1516 t3 -= region2[7] * a[7 + 3*BLOCK];
1517#if BLOCK>8
1518
1519 t0 -= region2[8] * a[8 + 0*BLOCK];
1520 t1 -= region2[8] * a[8 + 1*BLOCK];
1521 t2 -= region2[8] * a[8 + 2*BLOCK];
1522 t3 -= region2[8] * a[8 + 3*BLOCK];
1523
1524 t0 -= region2[9] * a[9 + 0*BLOCK];
1525 t1 -= region2[9] * a[9 + 1*BLOCK];
1526 t2 -= region2[9] * a[9 + 2*BLOCK];
1527 t3 -= region2[9] * a[9 + 3*BLOCK];
1528
1529 t0 -= region2[10] * a[10 + 0*BLOCK];
1530 t1 -= region2[10] * a[10 + 1*BLOCK];
1531 t2 -= region2[10] * a[10 + 2*BLOCK];
1532 t3 -= region2[10] * a[10 + 3*BLOCK];
1533
1534 t0 -= region2[11] * a[11 + 0*BLOCK];
1535 t1 -= region2[11] * a[11 + 1*BLOCK];
1536 t2 -= region2[11] * a[11 + 2*BLOCK];
1537 t3 -= region2[11] * a[11 + 3*BLOCK];
1538
1539 t0 -= region2[12] * a[12 + 0*BLOCK];
1540 t1 -= region2[12] * a[12 + 1*BLOCK];
1541 t2 -= region2[12] * a[12 + 2*BLOCK];
1542 t3 -= region2[12] * a[12 + 3*BLOCK];
1543
1544 t0 -= region2[13] * a[13 + 0*BLOCK];
1545 t1 -= region2[13] * a[13 + 1*BLOCK];
1546 t2 -= region2[13] * a[13 + 2*BLOCK];
1547 t3 -= region2[13] * a[13 + 3*BLOCK];
1548
1549 t0 -= region2[14] * a[14 + 0*BLOCK];
1550 t1 -= region2[14] * a[14 + 1*BLOCK];
1551 t2 -= region2[14] * a[14 + 2*BLOCK];
1552 t3 -= region2[14] * a[14 + 3*BLOCK];
1553
1554 t0 -= region2[15] * a[15 + 0*BLOCK];
1555 t1 -= region2[15] * a[15 + 1*BLOCK];
1556 t2 -= region2[15] * a[15 + 2*BLOCK];
1557 t3 -= region2[15] * a[15 + 3*BLOCK];
1558#endif
1559 region[0] = t0;
1560 region[1] = t1;
1561 region[2] = t2;
1562 region[3] = t3;
1563 a += 4 * BLOCK;
1564 region += 4;
1565 }
1566 } else {
1567#endif
1568 for (j = 0; j < BLOCK; j ++) {
1569 CoinWorkDouble t00 = region[j];
1570 for (k = 0; k < n; ++k) {
1571 t00 -= region2[k] * a[k + j * BLOCK];
1572 }
1573 region[j] = t00;
1574 }
1575#ifdef BLOCKUNROLL
1576 }
1577#endif
1578}
1579