1 | /* $Id: ClpCholeskyDense.cpp 1753 2011-06-19 16:27:26Z stefan $ */ |
2 | /* |
3 | Copyright (C) 2003, International Business Machines Corporation |
4 | and others. All Rights Reserved. |
5 | |
6 | This code is licensed under the terms of the Eclipse Public License (EPL). |
7 | */ |
8 | #include "CoinPragma.hpp" |
9 | #include "CoinHelperFunctions.hpp" |
10 | #include "ClpHelperFunctions.hpp" |
11 | |
12 | #include "ClpInterior.hpp" |
13 | #include "ClpCholeskyDense.hpp" |
14 | #include "ClpMessage.hpp" |
15 | #include "ClpQuadraticObjective.hpp" |
16 | |
17 | /*#############################################################################*/ |
18 | /* Constructors / Destructor / Assignment*/ |
19 | /*#############################################################################*/ |
20 | |
21 | /*-------------------------------------------------------------------*/ |
22 | /* Default Constructor */ |
23 | /*-------------------------------------------------------------------*/ |
24 | ClpCholeskyDense::ClpCholeskyDense () |
25 | : ClpCholeskyBase(), |
26 | borrowSpace_(false) |
27 | { |
28 | type_ = 11; |
29 | } |
30 | |
31 | /*-------------------------------------------------------------------*/ |
32 | /* Copy constructor */ |
33 | /*-------------------------------------------------------------------*/ |
34 | ClpCholeskyDense::ClpCholeskyDense (const ClpCholeskyDense & rhs) |
35 | : ClpCholeskyBase(rhs), |
36 | borrowSpace_(rhs.borrowSpace_) |
37 | { |
38 | assert(!rhs.borrowSpace_ || !rhs.sizeFactor_); /* can't do if borrowing space*/ |
39 | } |
40 | |
41 | |
42 | /*-------------------------------------------------------------------*/ |
43 | /* Destructor */ |
44 | /*-------------------------------------------------------------------*/ |
45 | ClpCholeskyDense::~ClpCholeskyDense () |
46 | { |
47 | if (borrowSpace_) { |
48 | /* set NULL*/ |
49 | sparseFactor_ = NULL; |
50 | workDouble_ = NULL; |
51 | diagonal_ = NULL; |
52 | } |
53 | } |
54 | |
55 | /*----------------------------------------------------------------*/ |
56 | /* Assignment operator */ |
57 | /*-------------------------------------------------------------------*/ |
58 | ClpCholeskyDense & |
59 | ClpCholeskyDense::operator=(const ClpCholeskyDense& rhs) |
60 | { |
61 | if (this != &rhs) { |
62 | assert(!rhs.borrowSpace_ || !rhs.sizeFactor_); /* can't do if borrowing space*/ |
63 | ClpCholeskyBase::operator=(rhs); |
64 | borrowSpace_ = rhs.borrowSpace_; |
65 | } |
66 | return *this; |
67 | } |
68 | /*-------------------------------------------------------------------*/ |
69 | /* Clone*/ |
70 | /*-------------------------------------------------------------------*/ |
71 | ClpCholeskyBase * ClpCholeskyDense::clone() const |
72 | { |
73 | return new ClpCholeskyDense(*this); |
74 | } |
75 | /* If not power of 2 then need to redo a bit*/ |
76 | #define BLOCK 16 |
77 | #define BLOCKSHIFT 4 |
78 | /* Block unroll if power of 2 and at least 8*/ |
79 | #define BLOCKUNROLL |
80 | |
81 | #define BLOCKSQ ( BLOCK*BLOCK ) |
82 | #define BLOCKSQSHIFT ( BLOCKSHIFT+BLOCKSHIFT ) |
83 | #define number_blocks(x) (((x)+BLOCK-1)>>BLOCKSHIFT) |
84 | #define number_rows(x) ((x)<<BLOCKSHIFT) |
85 | #define number_entries(x) ((x)<<BLOCKSQSHIFT) |
86 | /* Gets space */ |
87 | int |
88 | ClpCholeskyDense::reserveSpace(const ClpCholeskyBase * factor, int numberRows) |
89 | { |
90 | numberRows_ = numberRows; |
91 | int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT; |
92 | /* allow one stripe extra*/ |
93 | numberBlocks = numberBlocks + ((numberBlocks * (numberBlocks + 1)) / 2); |
94 | sizeFactor_ = numberBlocks * BLOCKSQ; |
95 | /*#define CHOL_COMPARE*/ |
96 | #ifdef CHOL_COMPARE |
97 | sizeFactor_ += 95000; |
98 | #endif |
99 | if (!factor) { |
100 | sparseFactor_ = new longDouble [sizeFactor_]; |
101 | rowsDropped_ = new char [numberRows_]; |
102 | memset(rowsDropped_, 0, numberRows_); |
103 | workDouble_ = new longDouble[numberRows_]; |
104 | diagonal_ = new longDouble[numberRows_]; |
105 | } else { |
106 | borrowSpace_ = true; |
107 | int numberFull = factor->numberRows(); |
108 | sparseFactor_ = factor->sparseFactor() + (factor->size() - sizeFactor_); |
109 | workDouble_ = factor->workDouble() + (numberFull - numberRows_); |
110 | diagonal_ = factor->diagonal() + (numberFull - numberRows_); |
111 | } |
112 | numberRowsDropped_ = 0; |
113 | return 0; |
114 | } |
115 | /* Returns space needed */ |
116 | CoinBigIndex |
117 | ClpCholeskyDense::space( int numberRows) const |
118 | { |
119 | int numberBlocks = (numberRows + BLOCK - 1) >> BLOCKSHIFT; |
120 | /* allow one stripe extra*/ |
121 | numberBlocks = numberBlocks + ((numberBlocks * (numberBlocks + 1)) / 2); |
122 | CoinBigIndex sizeFactor = numberBlocks * BLOCKSQ; |
123 | #ifdef CHOL_COMPARE |
124 | sizeFactor += 95000; |
125 | #endif |
126 | return sizeFactor; |
127 | } |
128 | /* Orders rows and saves pointer to matrix.and model */ |
129 | int |
130 | ClpCholeskyDense::order(ClpInterior * model) |
131 | { |
132 | model_ = model; |
133 | int numberRows; |
134 | int numberRowsModel = model_->numberRows(); |
135 | int numberColumns = model_->numberColumns(); |
136 | if (!doKKT_) { |
137 | numberRows = numberRowsModel; |
138 | } else { |
139 | numberRows = 2 * numberRowsModel + numberColumns; |
140 | } |
141 | reserveSpace(NULL, numberRows); |
142 | rowCopy_ = model->clpMatrix()->reverseOrderedCopy(); |
143 | return 0; |
144 | } |
145 | /* Does Symbolic factorization given permutation. |
146 | This is called immediately after order. If user provides this then |
147 | user must provide factorize and solve. Otherwise the default factorization is used |
148 | returns non-zero if not enough memory */ |
149 | int |
150 | ClpCholeskyDense::symbolic() |
151 | { |
152 | return 0; |
153 | } |
154 | /* Factorize - filling in rowsDropped and returning number dropped */ |
155 | int |
156 | ClpCholeskyDense::factorize(const CoinWorkDouble * diagonal, int * rowsDropped) |
157 | { |
158 | assert (!borrowSpace_); |
159 | const CoinBigIndex * columnStart = model_->clpMatrix()->getVectorStarts(); |
160 | const int * columnLength = model_->clpMatrix()->getVectorLengths(); |
161 | const int * row = model_->clpMatrix()->getIndices(); |
162 | const double * element = model_->clpMatrix()->getElements(); |
163 | const CoinBigIndex * rowStart = rowCopy_->getVectorStarts(); |
164 | const int * rowLength = rowCopy_->getVectorLengths(); |
165 | const int * column = rowCopy_->getIndices(); |
166 | const double * elementByRow = rowCopy_->getElements(); |
167 | int numberColumns = model_->clpMatrix()->getNumCols(); |
168 | CoinZeroN(sparseFactor_, sizeFactor_); |
169 | /*perturbation*/ |
170 | CoinWorkDouble perturbation = model_->diagonalPerturbation() * model_->diagonalNorm(); |
171 | perturbation = perturbation * perturbation; |
172 | if (perturbation > 1.0) { |
173 | #ifdef COIN_DEVELOP |
174 | /*if (model_->model()->logLevel()&4) */ |
175 | std::cout << "large perturbation " << perturbation << std::endl; |
176 | #endif |
177 | perturbation = CoinSqrt(perturbation); |
178 | perturbation = 1.0; |
179 | } |
180 | int iRow; |
181 | int newDropped = 0; |
182 | CoinWorkDouble largest = 1.0; |
183 | CoinWorkDouble smallest = COIN_DBL_MAX; |
184 | CoinWorkDouble delta2 = model_->delta(); /* add delta*delta to diagonal*/ |
185 | delta2 *= delta2; |
186 | if (!doKKT_) { |
187 | longDouble * work = sparseFactor_; |
188 | work--; /* skip diagonal*/ |
189 | int addOffset = numberRows_ - 1; |
190 | const CoinWorkDouble * diagonalSlack = diagonal + numberColumns; |
191 | /* largest in initial matrix*/ |
192 | CoinWorkDouble largest2 = 1.0e-20; |
193 | for (iRow = 0; iRow < numberRows_; iRow++) { |
194 | if (!rowsDropped_[iRow]) { |
195 | CoinBigIndex startRow = rowStart[iRow]; |
196 | CoinBigIndex endRow = rowStart[iRow] + rowLength[iRow]; |
197 | CoinWorkDouble diagonalValue = diagonalSlack[iRow] + delta2; |
198 | for (CoinBigIndex k = startRow; k < endRow; k++) { |
199 | int iColumn = column[k]; |
200 | CoinBigIndex start = columnStart[iColumn]; |
201 | CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn]; |
202 | CoinWorkDouble multiplier = diagonal[iColumn] * elementByRow[k]; |
203 | for (CoinBigIndex j = start; j < end; j++) { |
204 | int jRow = row[j]; |
205 | if (!rowsDropped_[jRow]) { |
206 | if (jRow > iRow) { |
207 | CoinWorkDouble value = element[j] * multiplier; |
208 | work[jRow] += value; |
209 | } else if (jRow == iRow) { |
210 | CoinWorkDouble value = element[j] * multiplier; |
211 | diagonalValue += value; |
212 | } |
213 | } |
214 | } |
215 | } |
216 | for (int j = iRow + 1; j < numberRows_; j++) |
217 | largest2 = CoinMax(largest2, CoinAbs(work[j])); |
218 | diagonal_[iRow] = diagonalValue; |
219 | largest2 = CoinMax(largest2, CoinAbs(diagonalValue)); |
220 | } else { |
221 | /* dropped*/ |
222 | diagonal_[iRow] = 1.0; |
223 | } |
224 | addOffset--; |
225 | work += addOffset; |
226 | } |
227 | /*check sizes*/ |
228 | largest2 *= 1.0e-20; |
229 | largest = CoinMin(largest2, CHOL_SMALL_VALUE); |
230 | int numberDroppedBefore = 0; |
231 | for (iRow = 0; iRow < numberRows_; iRow++) { |
232 | int dropped = rowsDropped_[iRow]; |
233 | /* Move to int array*/ |
234 | rowsDropped[iRow] = dropped; |
235 | if (!dropped) { |
236 | CoinWorkDouble diagonal = diagonal_[iRow]; |
237 | if (diagonal > largest2) { |
238 | diagonal_[iRow] = diagonal + perturbation; |
239 | } else { |
240 | diagonal_[iRow] = diagonal + perturbation; |
241 | rowsDropped[iRow] = 2; |
242 | numberDroppedBefore++; |
243 | } |
244 | } |
245 | } |
246 | doubleParameters_[10] = CoinMax(1.0e-20, largest); |
247 | integerParameters_[20] = 0; |
248 | doubleParameters_[3] = 0.0; |
249 | doubleParameters_[4] = COIN_DBL_MAX; |
250 | integerParameters_[34] = 0; /* say all must be positive*/ |
251 | #ifdef CHOL_COMPARE |
252 | if (numberRows_ < 200) |
253 | factorizePart3(rowsDropped); |
254 | #endif |
255 | factorizePart2(rowsDropped); |
256 | newDropped = integerParameters_[20] + numberDroppedBefore; |
257 | largest = doubleParameters_[3]; |
258 | smallest = doubleParameters_[4]; |
259 | if (model_->messageHandler()->logLevel() > 1) |
260 | std::cout << "Cholesky - largest " << largest << " smallest " << smallest << std::endl; |
261 | choleskyCondition_ = largest / smallest; |
262 | /*drop fresh makes some formADAT easier*/ |
263 | if (newDropped || numberRowsDropped_) { |
264 | newDropped = 0; |
265 | for (int i = 0; i < numberRows_; i++) { |
266 | char dropped = static_cast<char>(rowsDropped[i]); |
267 | rowsDropped_[i] = dropped; |
268 | if (dropped == 2) { |
269 | /*dropped this time*/ |
270 | rowsDropped[newDropped++] = i; |
271 | rowsDropped_[i] = 0; |
272 | } |
273 | } |
274 | numberRowsDropped_ = newDropped; |
275 | newDropped = -(2 + newDropped); |
276 | } |
277 | } else { |
278 | /* KKT*/ |
279 | CoinPackedMatrix * quadratic = NULL; |
280 | ClpQuadraticObjective * quadraticObj = |
281 | (dynamic_cast< ClpQuadraticObjective*>(model_->objectiveAsObject())); |
282 | if (quadraticObj) |
283 | quadratic = quadraticObj->quadraticObjective(); |
284 | /* matrix*/ |
285 | int numberRowsModel = model_->numberRows(); |
286 | int numberColumns = model_->numberColumns(); |
287 | int numberTotal = numberColumns + numberRowsModel; |
288 | longDouble * work = sparseFactor_; |
289 | work--; /* skip diagonal*/ |
290 | int addOffset = numberRows_ - 1; |
291 | int iColumn; |
292 | if (!quadratic) { |
293 | for (iColumn = 0; iColumn < numberColumns; iColumn++) { |
294 | CoinWorkDouble value = diagonal[iColumn]; |
295 | if (CoinAbs(value) > 1.0e-100) { |
296 | value = 1.0 / value; |
297 | largest = CoinMax(largest, CoinAbs(value)); |
298 | diagonal_[iColumn] = -value; |
299 | CoinBigIndex start = columnStart[iColumn]; |
300 | CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn]; |
301 | for (CoinBigIndex j = start; j < end; j++) { |
302 | /*choleskyRow_[numberElements]=row[j]+numberTotal;*/ |
303 | /*sparseFactor_[numberElements++]=element[j];*/ |
304 | work[row[j] + numberTotal] = element[j]; |
305 | largest = CoinMax(largest, CoinAbs(element[j])); |
306 | } |
307 | } else { |
308 | diagonal_[iColumn] = -value; |
309 | } |
310 | addOffset--; |
311 | work += addOffset; |
312 | } |
313 | } else { |
314 | /* Quadratic*/ |
315 | const int * columnQuadratic = quadratic->getIndices(); |
316 | const CoinBigIndex * columnQuadraticStart = quadratic->getVectorStarts(); |
317 | const int * columnQuadraticLength = quadratic->getVectorLengths(); |
318 | const double * quadraticElement = quadratic->getElements(); |
319 | for (iColumn = 0; iColumn < numberColumns; iColumn++) { |
320 | CoinWorkDouble value = diagonal[iColumn]; |
321 | CoinBigIndex j; |
322 | if (CoinAbs(value) > 1.0e-100) { |
323 | value = 1.0 / value; |
324 | for (j = columnQuadraticStart[iColumn]; |
325 | j < columnQuadraticStart[iColumn] + columnQuadraticLength[iColumn]; j++) { |
326 | int jColumn = columnQuadratic[j]; |
327 | if (jColumn > iColumn) { |
328 | work[jColumn] = -quadraticElement[j]; |
329 | } else if (iColumn == jColumn) { |
330 | value += quadraticElement[j]; |
331 | } |
332 | } |
333 | largest = CoinMax(largest, CoinAbs(value)); |
334 | diagonal_[iColumn] = -value; |
335 | CoinBigIndex start = columnStart[iColumn]; |
336 | CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn]; |
337 | for (j = start; j < end; j++) { |
338 | work[row[j] + numberTotal] = element[j]; |
339 | largest = CoinMax(largest, CoinAbs(element[j])); |
340 | } |
341 | } else { |
342 | value = 1.0e100; |
343 | diagonal_[iColumn] = -value; |
344 | } |
345 | addOffset--; |
346 | work += addOffset; |
347 | } |
348 | } |
349 | /* slacks*/ |
350 | for (iColumn = numberColumns; iColumn < numberTotal; iColumn++) { |
351 | CoinWorkDouble value = diagonal[iColumn]; |
352 | if (CoinAbs(value) > 1.0e-100) { |
353 | value = 1.0 / value; |
354 | largest = CoinMax(largest, CoinAbs(value)); |
355 | } else { |
356 | value = 1.0e100; |
357 | } |
358 | diagonal_[iColumn] = -value; |
359 | work[iColumn-numberColumns+numberTotal] = -1.0; |
360 | addOffset--; |
361 | work += addOffset; |
362 | } |
363 | /* Finish diagonal*/ |
364 | for (iRow = 0; iRow < numberRowsModel; iRow++) { |
365 | diagonal_[iRow+numberTotal] = delta2; |
366 | } |
367 | /*check sizes*/ |
368 | largest *= 1.0e-20; |
369 | largest = CoinMin(largest, CHOL_SMALL_VALUE); |
370 | doubleParameters_[10] = CoinMax(1.0e-20, largest); |
371 | integerParameters_[20] = 0; |
372 | doubleParameters_[3] = 0.0; |
373 | doubleParameters_[4] = COIN_DBL_MAX; |
374 | /* Set up LDL cutoff*/ |
375 | integerParameters_[34] = numberTotal; |
376 | /* KKT*/ |
377 | int * rowsDropped2 = new int[numberRows_]; |
378 | CoinZeroN(rowsDropped2, numberRows_); |
379 | #ifdef CHOL_COMPARE |
380 | if (numberRows_ < 200) |
381 | factorizePart3(rowsDropped2); |
382 | #endif |
383 | factorizePart2(rowsDropped2); |
384 | newDropped = integerParameters_[20]; |
385 | largest = doubleParameters_[3]; |
386 | smallest = doubleParameters_[4]; |
387 | if (model_->messageHandler()->logLevel() > 1) |
388 | COIN_DETAIL_PRINT(std::cout << "Cholesky - largest " << largest << " smallest " << smallest << std::endl); |
389 | choleskyCondition_ = largest / smallest; |
390 | /* Should save adjustments in ..R_*/ |
391 | int n1 = 0, n2 = 0; |
392 | CoinWorkDouble * primalR = model_->primalR(); |
393 | CoinWorkDouble * dualR = model_->dualR(); |
394 | for (iRow = 0; iRow < numberTotal; iRow++) { |
395 | if (rowsDropped2[iRow]) { |
396 | n1++; |
397 | /*printf("row region1 %d dropped\n",iRow);*/ |
398 | /*rowsDropped_[iRow]=1;*/ |
399 | rowsDropped_[iRow] = 0; |
400 | primalR[iRow] = doubleParameters_[20]; |
401 | } else { |
402 | rowsDropped_[iRow] = 0; |
403 | primalR[iRow] = 0.0; |
404 | } |
405 | } |
406 | for (; iRow < numberRows_; iRow++) { |
407 | if (rowsDropped2[iRow]) { |
408 | n2++; |
409 | /*printf("row region2 %d dropped\n",iRow);*/ |
410 | /*rowsDropped_[iRow]=1;*/ |
411 | rowsDropped_[iRow] = 0; |
412 | dualR[iRow-numberTotal] = doubleParameters_[34]; |
413 | } else { |
414 | rowsDropped_[iRow] = 0; |
415 | dualR[iRow-numberTotal] = 0.0; |
416 | } |
417 | } |
418 | } |
419 | return 0; |
420 | } |
421 | /* Factorize - filling in rowsDropped and returning number dropped */ |
422 | void |
423 | ClpCholeskyDense::factorizePart3( int * rowsDropped) |
424 | { |
425 | int iColumn; |
426 | longDouble * xx = sparseFactor_; |
427 | longDouble * yy = diagonal_; |
428 | diagonal_ = sparseFactor_ + 40000; |
429 | sparseFactor_ = diagonal_ + numberRows_; |
430 | /*memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double));*/ |
431 | CoinMemcpyN(xx, 40000, sparseFactor_); |
432 | CoinMemcpyN(yy, numberRows_, diagonal_); |
433 | int numberDropped = 0; |
434 | CoinWorkDouble largest = 0.0; |
435 | CoinWorkDouble smallest = COIN_DBL_MAX; |
436 | double dropValue = doubleParameters_[10]; |
437 | int firstPositive = integerParameters_[34]; |
438 | longDouble * work = sparseFactor_; |
439 | /* Allow for triangular*/ |
440 | int addOffset = numberRows_ - 1; |
441 | work--; |
442 | for (iColumn = 0; iColumn < numberRows_; iColumn++) { |
443 | int iRow; |
444 | int addOffsetNow = numberRows_ - 1; |
445 | longDouble * workNow = sparseFactor_ - 1 + iColumn; |
446 | CoinWorkDouble diagonalValue = diagonal_[iColumn]; |
447 | for (iRow = 0; iRow < iColumn; iRow++) { |
448 | double aj = *workNow; |
449 | addOffsetNow--; |
450 | workNow += addOffsetNow; |
451 | diagonalValue -= aj * aj * workDouble_[iRow]; |
452 | } |
453 | bool dropColumn = false; |
454 | if (iColumn < firstPositive) { |
455 | /* must be negative*/ |
456 | if (diagonalValue <= -dropValue) { |
457 | smallest = CoinMin(smallest, -diagonalValue); |
458 | largest = CoinMax(largest, -diagonalValue); |
459 | workDouble_[iColumn] = diagonalValue; |
460 | diagonalValue = 1.0 / diagonalValue; |
461 | } else { |
462 | dropColumn = true; |
463 | workDouble_[iColumn] = -1.0e100; |
464 | diagonalValue = 0.0; |
465 | integerParameters_[20]++; |
466 | } |
467 | } else { |
468 | /* must be positive*/ |
469 | if (diagonalValue >= dropValue) { |
470 | smallest = CoinMin(smallest, diagonalValue); |
471 | largest = CoinMax(largest, diagonalValue); |
472 | workDouble_[iColumn] = diagonalValue; |
473 | diagonalValue = 1.0 / diagonalValue; |
474 | } else { |
475 | dropColumn = true; |
476 | workDouble_[iColumn] = 1.0e100; |
477 | diagonalValue = 0.0; |
478 | integerParameters_[20]++; |
479 | } |
480 | } |
481 | if (!dropColumn) { |
482 | diagonal_[iColumn] = diagonalValue; |
483 | for (iRow = iColumn + 1; iRow < numberRows_; iRow++) { |
484 | double value = work[iRow]; |
485 | workNow = sparseFactor_ - 1; |
486 | int addOffsetNow = numberRows_ - 1; |
487 | for (int jColumn = 0; jColumn < iColumn; jColumn++) { |
488 | double aj = workNow[iColumn]; |
489 | double multiplier = workDouble_[jColumn]; |
490 | double ai = workNow[iRow]; |
491 | addOffsetNow--; |
492 | workNow += addOffsetNow; |
493 | value -= aj * ai * multiplier; |
494 | } |
495 | work[iRow] = value * diagonalValue; |
496 | } |
497 | } else { |
498 | /* drop column*/ |
499 | rowsDropped[iColumn] = 2; |
500 | numberDropped++; |
501 | diagonal_[iColumn] = 0.0; |
502 | for (iRow = iColumn + 1; iRow < numberRows_; iRow++) { |
503 | work[iRow] = 0.0; |
504 | } |
505 | } |
506 | addOffset--; |
507 | work += addOffset; |
508 | } |
509 | doubleParameters_[3] = largest; |
510 | doubleParameters_[4] = smallest; |
511 | integerParameters_[20] = numberDropped; |
512 | sparseFactor_ = xx; |
513 | diagonal_ = yy; |
514 | } |
515 | /*#define POS_DEBUG*/ |
516 | #ifdef POS_DEBUG |
517 | static int counter = 0; |
518 | int ClpCholeskyDense::bNumber(const longDouble * array, int &iRow, int &iCol) |
519 | { |
520 | int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT; |
521 | longDouble * a = sparseFactor_ + BLOCKSQ * numberBlocks; |
522 | int k = array - a; |
523 | assert ((k % BLOCKSQ) == 0); |
524 | iCol = 0; |
525 | int kk = k >> BLOCKSQSHIFT; |
526 | /*printf("%d %d %d %d\n",k,kk,BLOCKSQ,BLOCKSQSHIFT);*/ |
527 | k = kk; |
528 | while (k >= numberBlocks) { |
529 | iCol++; |
530 | k -= numberBlocks; |
531 | numberBlocks--; |
532 | } |
533 | iRow = iCol + k; |
534 | counter++; |
535 | if(counter > 100000) |
536 | exit(77); |
537 | return kk; |
538 | } |
539 | #endif |
540 | /* Factorize - filling in rowsDropped and returning number dropped */ |
541 | void |
542 | ClpCholeskyDense::factorizePart2( int * rowsDropped) |
543 | { |
544 | int iColumn; |
545 | /*longDouble * xx = sparseFactor_;*/ |
546 | /*longDouble * yy = diagonal_;*/ |
547 | /*diagonal_ = sparseFactor_ + 40000;*/ |
548 | /*sparseFactor_=diagonal_ + numberRows_;*/ |
549 | /*memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double));*/ |
550 | /*memcpy(sparseFactor_,xx,40000*sizeof(double));*/ |
551 | /*memcpy(diagonal_,yy,numberRows_*sizeof(double));*/ |
552 | int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT; |
553 | /* later align on boundary*/ |
554 | longDouble * a = sparseFactor_ + BLOCKSQ * numberBlocks; |
555 | int n = numberRows_; |
556 | int nRound = numberRows_ & (~(BLOCK - 1)); |
557 | /* adjust if exact*/ |
558 | if (nRound == n) |
559 | nRound -= BLOCK; |
560 | int sizeLastBlock = n - nRound; |
561 | int get = n * (n - 1) / 2; /* ? as no diagonal*/ |
562 | int block = numberBlocks * (numberBlocks + 1) / 2; |
563 | int ifOdd; |
564 | int rowLast; |
565 | if (sizeLastBlock != BLOCK) { |
566 | longDouble * aa = &a[(block-1)*BLOCKSQ]; |
567 | rowLast = nRound - 1; |
568 | ifOdd = 1; |
569 | int put = BLOCKSQ; |
570 | /* do last separately*/ |
571 | put -= (BLOCK - sizeLastBlock) * (BLOCK + 1); |
572 | for (iColumn = numberRows_ - 1; iColumn >= nRound; iColumn--) { |
573 | int put2 = put; |
574 | put -= BLOCK; |
575 | for (int iRow = numberRows_ - 1; iRow > iColumn; iRow--) { |
576 | aa[--put2] = sparseFactor_[--get]; |
577 | assert (aa + put2 >= sparseFactor_ + get); |
578 | } |
579 | /* save diagonal as well*/ |
580 | aa[--put2] = diagonal_[iColumn]; |
581 | } |
582 | n = nRound; |
583 | block--; |
584 | } else { |
585 | /* exact fit*/ |
586 | rowLast = numberRows_ - 1; |
587 | ifOdd = 0; |
588 | } |
589 | /* Now main loop*/ |
590 | int nBlock = 0; |
591 | for (; n > 0; n -= BLOCK) { |
592 | longDouble * aa = &a[(block-1)*BLOCKSQ]; |
593 | longDouble * aaLast = NULL; |
594 | int put = BLOCKSQ; |
595 | int putLast = 0; |
596 | /* see if we have small block*/ |
597 | if (ifOdd) { |
598 | aaLast = &a[(block-1)*BLOCKSQ]; |
599 | aa = aaLast - BLOCKSQ; |
600 | putLast = BLOCKSQ - BLOCK + sizeLastBlock; |
601 | } |
602 | for (iColumn = n - 1; iColumn >= n - BLOCK; iColumn--) { |
603 | if (aaLast) { |
604 | /* last bit*/ |
605 | for (int iRow = numberRows_ - 1; iRow > rowLast; iRow--) { |
606 | aaLast[--putLast] = sparseFactor_[--get]; |
607 | assert (aaLast + putLast >= sparseFactor_ + get); |
608 | } |
609 | putLast -= BLOCK - sizeLastBlock; |
610 | } |
611 | longDouble * aPut = aa; |
612 | int j = rowLast; |
613 | for (int jBlock = 0; jBlock <= nBlock; jBlock++) { |
614 | int put2 = put; |
615 | int last = CoinMax(j - BLOCK, iColumn); |
616 | for (int iRow = j; iRow > last; iRow--) { |
617 | aPut[--put2] = sparseFactor_[--get]; |
618 | assert (aPut + put2 >= sparseFactor_ + get); |
619 | } |
620 | if (j - BLOCK < iColumn) { |
621 | /* save diagonal as well*/ |
622 | aPut[--put2] = diagonal_[iColumn]; |
623 | } |
624 | j -= BLOCK; |
625 | aPut -= BLOCKSQ; |
626 | } |
627 | put -= BLOCK; |
628 | } |
629 | nBlock++; |
630 | block -= nBlock + ifOdd; |
631 | } |
632 | ClpCholeskyDenseC info; |
633 | info.diagonal_ = diagonal_; |
634 | info.doubleParameters_[0] = doubleParameters_[10]; |
635 | info.integerParameters_[0] = integerParameters_[34]; |
636 | #ifndef CLP_CILK |
637 | ClpCholeskyCfactor(&info, a, numberRows_, numberBlocks, |
638 | diagonal_, workDouble_, rowsDropped); |
639 | #else |
640 | info.a = a; |
641 | info.n = numberRows_; |
642 | info.numberBlocks = numberBlocks; |
643 | info.work = workDouble_; |
644 | info.rowsDropped = rowsDropped; |
645 | info.integerParameters_[1] = model_->numberThreads(); |
646 | ClpCholeskySpawn(&info); |
647 | #endif |
648 | double largest = 0.0; |
649 | double smallest = COIN_DBL_MAX; |
650 | int numberDropped = 0; |
651 | for (int i = 0; i < numberRows_; i++) { |
652 | if (diagonal_[i]) { |
653 | largest = CoinMax(largest, CoinAbs(diagonal_[i])); |
654 | smallest = CoinMin(smallest, CoinAbs(diagonal_[i])); |
655 | } else { |
656 | numberDropped++; |
657 | } |
658 | } |
659 | doubleParameters_[3] = CoinMax(doubleParameters_[3], 1.0 / smallest); |
660 | doubleParameters_[4] = CoinMin(doubleParameters_[4], 1.0 / largest); |
661 | integerParameters_[20] += numberDropped; |
662 | } |
663 | /* Non leaf recursive factor*/ |
664 | void |
665 | ClpCholeskyCfactor(ClpCholeskyDenseC * thisStruct, longDouble * a, int n, int numberBlocks, |
666 | longDouble * diagonal, longDouble * work, int * rowsDropped) |
667 | { |
668 | if (n <= BLOCK) { |
669 | ClpCholeskyCfactorLeaf(thisStruct, a, n, diagonal, work, rowsDropped); |
670 | } else { |
671 | int nb = number_blocks((n + 1) >> 1); |
672 | int nThis = number_rows(nb); |
673 | longDouble * aother; |
674 | int nLeft = n - nThis; |
675 | int nintri = (nb * (nb + 1)) >> 1; |
676 | int nbelow = (numberBlocks - nb) * nb; |
677 | ClpCholeskyCfactor(thisStruct, a, nThis, numberBlocks, diagonal, work, rowsDropped); |
678 | ClpCholeskyCtriRec(thisStruct, a, nThis, a + number_entries(nb), diagonal, work, nLeft, nb, 0, numberBlocks); |
679 | aother = a + number_entries(nintri + nbelow); |
680 | ClpCholeskyCrecTri(thisStruct, a + number_entries(nb), nLeft, nThis, nb, 0, aother, diagonal, work, numberBlocks); |
681 | ClpCholeskyCfactor(thisStruct, aother, nLeft, |
682 | numberBlocks - nb, diagonal + nThis, work + nThis, rowsDropped); |
683 | } |
684 | } |
685 | /* Non leaf recursive triangle rectangle update*/ |
686 | void |
687 | ClpCholeskyCtriRec(ClpCholeskyDenseC * thisStruct, longDouble * aTri, int nThis, longDouble * aUnder, |
688 | longDouble * diagonal, longDouble * work, |
689 | int nLeft, int iBlock, int jBlock, |
690 | int numberBlocks) |
691 | { |
692 | if (nThis <= BLOCK && nLeft <= BLOCK) { |
693 | ClpCholeskyCtriRecLeaf(/*thisStruct,*/ aTri, aUnder, diagonal, work, nLeft); |
694 | } else if (nThis < nLeft) { |
695 | int nb = number_blocks((nLeft + 1) >> 1); |
696 | int nLeft2 = number_rows(nb); |
697 | ClpCholeskyCtriRec(thisStruct, aTri, nThis, aUnder, diagonal, work, nLeft2, iBlock, jBlock, numberBlocks); |
698 | ClpCholeskyCtriRec(thisStruct, aTri, nThis, aUnder + number_entries(nb), diagonal, work, nLeft - nLeft2, |
699 | iBlock + nb, jBlock, numberBlocks); |
700 | } else { |
701 | int nb = number_blocks((nThis + 1) >> 1); |
702 | int nThis2 = number_rows(nb); |
703 | longDouble * aother; |
704 | int kBlock = jBlock + nb; |
705 | int i; |
706 | int nintri = (nb * (nb + 1)) >> 1; |
707 | int nbelow = (numberBlocks - nb) * nb; |
708 | ClpCholeskyCtriRec(thisStruct, aTri, nThis2, aUnder, diagonal, work, nLeft, iBlock, jBlock, numberBlocks); |
709 | /* and rectangular update */ |
710 | i = ((numberBlocks - jBlock) * (numberBlocks - jBlock - 1) - |
711 | (numberBlocks - jBlock - nb) * (numberBlocks - jBlock - nb - 1)) >> 1; |
712 | aother = aUnder + number_entries(i); |
713 | ClpCholeskyCrecRec(thisStruct, aTri + number_entries(nb), nThis - nThis2, nLeft, nThis2, aUnder, aother, |
714 | work, kBlock, jBlock, numberBlocks); |
715 | ClpCholeskyCtriRec(thisStruct, aTri + number_entries(nintri + nbelow), nThis - nThis2, aother, diagonal + nThis2, |
716 | work + nThis2, nLeft, |
717 | iBlock - nb, kBlock - nb, numberBlocks - nb); |
718 | } |
719 | } |
720 | /* Non leaf recursive rectangle triangle update*/ |
721 | void |
722 | ClpCholeskyCrecTri(ClpCholeskyDenseC * thisStruct, longDouble * aUnder, int nTri, int nDo, |
723 | int iBlock, int jBlock, longDouble * aTri, |
724 | longDouble * diagonal, longDouble * work, |
725 | int numberBlocks) |
726 | { |
727 | if (nTri <= BLOCK && nDo <= BLOCK) { |
728 | ClpCholeskyCrecTriLeaf(/*thisStruct,*/ aUnder, aTri,/*diagonal,*/work, nTri); |
729 | } else if (nTri < nDo) { |
730 | int nb = number_blocks((nDo + 1) >> 1); |
731 | int nDo2 = number_rows(nb); |
732 | longDouble * aother; |
733 | int i; |
734 | ClpCholeskyCrecTri(thisStruct, aUnder, nTri, nDo2, iBlock, jBlock, aTri, diagonal, work, numberBlocks); |
735 | i = ((numberBlocks - jBlock) * (numberBlocks - jBlock - 1) - |
736 | (numberBlocks - jBlock - nb) * (numberBlocks - jBlock - nb - 1)) >> 1; |
737 | aother = aUnder + number_entries(i); |
738 | ClpCholeskyCrecTri(thisStruct, aother, nTri, nDo - nDo2, iBlock - nb, jBlock, aTri, diagonal + nDo2, |
739 | work + nDo2, numberBlocks - nb); |
740 | } else { |
741 | int nb = number_blocks((nTri + 1) >> 1); |
742 | int nTri2 = number_rows(nb); |
743 | longDouble * aother; |
744 | int i; |
745 | ClpCholeskyCrecTri(thisStruct, aUnder, nTri2, nDo, iBlock, jBlock, aTri, diagonal, work, numberBlocks); |
746 | /* and rectangular update */ |
747 | i = ((numberBlocks - iBlock) * (numberBlocks - iBlock + 1) - |
748 | (numberBlocks - iBlock - nb) * (numberBlocks - iBlock - nb + 1)) >> 1; |
749 | aother = aTri + number_entries(nb); |
750 | ClpCholeskyCrecRec(thisStruct, aUnder, nTri2, nTri - nTri2, nDo, aUnder + number_entries(nb), aother, |
751 | work, iBlock, jBlock, numberBlocks); |
752 | ClpCholeskyCrecTri(thisStruct, aUnder + number_entries(nb), nTri - nTri2, nDo, iBlock + nb, jBlock, |
753 | aTri + number_entries(i), diagonal, work, numberBlocks); |
754 | } |
755 | } |
756 | /* Non leaf recursive rectangle rectangle update, |
757 | nUnder is number of rows in iBlock, |
758 | nUnderK is number of rows in kBlock |
759 | */ |
760 | void |
761 | ClpCholeskyCrecRec(ClpCholeskyDenseC * thisStruct, longDouble * above, int nUnder, int nUnderK, |
762 | int nDo, longDouble * aUnder, longDouble *aOther, |
763 | longDouble * work, |
764 | int iBlock, int jBlock, |
765 | int numberBlocks) |
766 | { |
767 | if (nDo <= BLOCK && nUnder <= BLOCK && nUnderK <= BLOCK) { |
768 | assert (nDo == BLOCK && nUnder == BLOCK); |
769 | ClpCholeskyCrecRecLeaf(/*thisStruct,*/ above , aUnder , aOther, work, nUnderK); |
770 | } else if (nDo <= nUnderK && nUnder <= nUnderK) { |
771 | int nb = number_blocks((nUnderK + 1) >> 1); |
772 | int nUnder2 = number_rows(nb); |
773 | ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnder2, nDo, aUnder, aOther, work, |
774 | iBlock, jBlock, numberBlocks); |
775 | ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnderK - nUnder2, nDo, aUnder + number_entries(nb), |
776 | aOther + number_entries(nb), work, iBlock, jBlock, numberBlocks); |
777 | } else if (nUnderK <= nDo && nUnder <= nDo) { |
778 | int nb = number_blocks((nDo + 1) >> 1); |
779 | int nDo2 = number_rows(nb); |
780 | int i; |
781 | ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnderK, nDo2, aUnder, aOther, work, |
782 | iBlock, jBlock, numberBlocks); |
783 | i = ((numberBlocks - jBlock) * (numberBlocks - jBlock - 1) - |
784 | (numberBlocks - jBlock - nb) * (numberBlocks - jBlock - nb - 1)) >> 1; |
785 | ClpCholeskyCrecRec(thisStruct, above + number_entries(i), nUnder, nUnderK, nDo - nDo2, |
786 | aUnder + number_entries(i), |
787 | aOther, work + nDo2, |
788 | iBlock - nb, jBlock, numberBlocks - nb); |
789 | } else { |
790 | int nb = number_blocks((nUnder + 1) >> 1); |
791 | int nUnder2 = number_rows(nb); |
792 | int i; |
793 | ClpCholeskyCrecRec(thisStruct, above, nUnder2, nUnderK, nDo, aUnder, aOther, work, |
794 | iBlock, jBlock, numberBlocks); |
795 | i = ((numberBlocks - iBlock) * (numberBlocks - iBlock - 1) - |
796 | (numberBlocks - iBlock - nb) * (numberBlocks - iBlock - nb - 1)) >> 1; |
797 | ClpCholeskyCrecRec(thisStruct, above + number_entries(nb), nUnder - nUnder2, nUnderK, nDo, aUnder, |
798 | aOther + number_entries(i), work, iBlock + nb, jBlock, numberBlocks); |
799 | } |
800 | } |
801 | /* Leaf recursive factor*/ |
802 | void |
803 | ClpCholeskyCfactorLeaf(ClpCholeskyDenseC * thisStruct, longDouble * a, int n, |
804 | longDouble * diagonal, longDouble * work, int * rowsDropped) |
805 | { |
806 | double dropValue = thisStruct->doubleParameters_[0]; |
807 | int firstPositive = thisStruct->integerParameters_[0]; |
808 | int rowOffset = static_cast<int>(diagonal - thisStruct->diagonal_); |
809 | int i, j, k; |
810 | CoinWorkDouble t00, temp1; |
811 | longDouble * aa; |
812 | aa = a - BLOCK; |
813 | for (j = 0; j < n; j ++) { |
814 | bool dropColumn; |
815 | CoinWorkDouble useT00; |
816 | aa += BLOCK; |
817 | t00 = aa[j]; |
818 | for (k = 0; k < j; ++k) { |
819 | CoinWorkDouble multiplier = work[k]; |
820 | t00 -= a[j + k * BLOCK] * a[j + k * BLOCK] * multiplier; |
821 | } |
822 | dropColumn = false; |
823 | useT00 = t00; |
824 | if (j + rowOffset < firstPositive) { |
825 | /* must be negative*/ |
826 | if (t00 <= -dropValue) { |
827 | /*aa[j]=t00;*/ |
828 | t00 = 1.0 / t00; |
829 | } else { |
830 | dropColumn = true; |
831 | /*aa[j]=-1.0e100;*/ |
832 | useT00 = -1.0e-100; |
833 | t00 = 0.0; |
834 | } |
835 | } else { |
836 | /* must be positive*/ |
837 | if (t00 >= dropValue) { |
838 | /*aa[j]=t00;*/ |
839 | t00 = 1.0 / t00; |
840 | } else { |
841 | dropColumn = true; |
842 | /*aa[j]=1.0e100;*/ |
843 | useT00 = 1.0e-100; |
844 | t00 = 0.0; |
845 | } |
846 | } |
847 | if (!dropColumn) { |
848 | diagonal[j] = t00; |
849 | work[j] = useT00; |
850 | temp1 = t00; |
851 | for (i = j + 1; i < n; i++) { |
852 | t00 = aa[i]; |
853 | for (k = 0; k < j; ++k) { |
854 | CoinWorkDouble multiplier = work[k]; |
855 | t00 -= a[i + k * BLOCK] * a[j + k * BLOCK] * multiplier; |
856 | } |
857 | aa[i] = t00 * temp1; |
858 | } |
859 | } else { |
860 | /* drop column*/ |
861 | rowsDropped[j+rowOffset] = 2; |
862 | diagonal[j] = 0.0; |
863 | /*aa[j]=1.0e100;*/ |
864 | work[j] = 1.0e100; |
865 | for (i = j + 1; i < n; i++) { |
866 | aa[i] = 0.0; |
867 | } |
868 | } |
869 | } |
870 | } |
871 | /* Leaf recursive triangle rectangle update*/ |
872 | void |
873 | ClpCholeskyCtriRecLeaf(/*ClpCholeskyDenseC * thisStruct,*/ longDouble * aTri, longDouble * aUnder, longDouble * diagonal, longDouble * work, |
874 | int nUnder) |
875 | { |
876 | #ifdef POS_DEBUG |
877 | int iru, icu; |
878 | int iu = bNumber(aUnder, iru, icu); |
879 | int irt, ict; |
880 | int it = bNumber(aTri, irt, ict); |
881 | /*printf("%d %d\n",iu,it);*/ |
882 | printf("trirecleaf under (%d,%d), tri (%d,%d)\n" , |
883 | iru, icu, irt, ict); |
884 | assert (diagonal == thisStruct->diagonal_ + ict * BLOCK); |
885 | #endif |
886 | int j; |
887 | longDouble * aa; |
888 | #ifdef BLOCKUNROLL |
889 | if (nUnder == BLOCK) { |
890 | aa = aTri - 2 * BLOCK; |
891 | for (j = 0; j < BLOCK; j += 2) { |
892 | int i; |
893 | CoinWorkDouble temp0 = diagonal[j]; |
894 | CoinWorkDouble temp1 = diagonal[j+1]; |
895 | aa += 2 * BLOCK; |
896 | for ( i = 0; i < BLOCK; i += 2) { |
897 | CoinWorkDouble at1; |
898 | CoinWorkDouble t00 = aUnder[i+j*BLOCK]; |
899 | CoinWorkDouble t10 = aUnder[i+ BLOCK + j*BLOCK]; |
900 | CoinWorkDouble t01 = aUnder[i+1+j*BLOCK]; |
901 | CoinWorkDouble t11 = aUnder[i+1+ BLOCK + j*BLOCK]; |
902 | int k; |
903 | for (k = 0; k < j; ++k) { |
904 | CoinWorkDouble multiplier = work[k]; |
905 | CoinWorkDouble au0 = aUnder[i + k * BLOCK] * multiplier; |
906 | CoinWorkDouble au1 = aUnder[i + 1 + k * BLOCK] * multiplier; |
907 | CoinWorkDouble at0 = aTri[j + k * BLOCK]; |
908 | CoinWorkDouble at1 = aTri[j + 1 + k * BLOCK]; |
909 | t00 -= au0 * at0; |
910 | t10 -= au0 * at1; |
911 | t01 -= au1 * at0; |
912 | t11 -= au1 * at1; |
913 | } |
914 | t00 *= temp0; |
915 | at1 = aTri[j + 1 + j*BLOCK] * work[j]; |
916 | t10 -= t00 * at1; |
917 | t01 *= temp0; |
918 | t11 -= t01 * at1; |
919 | aUnder[i+j*BLOCK] = t00; |
920 | aUnder[i+1+j*BLOCK] = t01; |
921 | aUnder[i+ BLOCK + j*BLOCK] = t10 * temp1; |
922 | aUnder[i+1+ BLOCK + j*BLOCK] = t11 * temp1; |
923 | } |
924 | } |
925 | } else { |
926 | #endif |
927 | aa = aTri - BLOCK; |
928 | for (j = 0; j < BLOCK; j ++) { |
929 | int i; |
930 | CoinWorkDouble temp1 = diagonal[j]; |
931 | aa += BLOCK; |
932 | for (i = 0; i < nUnder; i++) { |
933 | int k; |
934 | CoinWorkDouble t00 = aUnder[i+j*BLOCK]; |
935 | for ( k = 0; k < j; ++k) { |
936 | CoinWorkDouble multiplier = work[k]; |
937 | t00 -= aUnder[i + k * BLOCK] * aTri[j + k * BLOCK] * multiplier; |
938 | } |
939 | aUnder[i+j*BLOCK] = t00 * temp1; |
940 | } |
941 | } |
942 | #ifdef BLOCKUNROLL |
943 | } |
944 | #endif |
945 | } |
946 | /* Leaf recursive rectangle triangle update*/ |
947 | void ClpCholeskyCrecTriLeaf(/*ClpCholeskyDenseC * thisStruct,*/ longDouble * aUnder, longDouble * aTri, |
948 | /*longDouble * diagonal,*/ longDouble * work, int nUnder) |
949 | { |
950 | #ifdef POS_DEBUG |
951 | int iru, icu; |
952 | int iu = bNumber(aUnder, iru, icu); |
953 | int irt, ict; |
954 | int it = bNumber(aTri, irt, ict); |
955 | /*printf("%d %d\n",iu,it);*/ |
956 | printf("rectrileaf under (%d,%d), tri (%d,%d)\n" , |
957 | iru, icu, irt, ict); |
958 | assert (diagonal == thisStruct->diagonal_ + icu * BLOCK); |
959 | #endif |
960 | int i, j, k; |
961 | CoinWorkDouble t00; |
962 | longDouble * aa; |
963 | #ifdef BLOCKUNROLL |
964 | if (nUnder == BLOCK) { |
965 | longDouble * aUnder2 = aUnder - 2; |
966 | aa = aTri - 2 * BLOCK; |
967 | for (j = 0; j < BLOCK; j += 2) { |
968 | CoinWorkDouble t00, t01, t10, t11; |
969 | aa += 2 * BLOCK; |
970 | aUnder2 += 2; |
971 | t00 = aa[j]; |
972 | t01 = aa[j+1]; |
973 | t10 = aa[j+1+BLOCK]; |
974 | for (k = 0; k < BLOCK; ++k) { |
975 | CoinWorkDouble multiplier = work[k]; |
976 | CoinWorkDouble a0 = aUnder2[k * BLOCK]; |
977 | CoinWorkDouble a1 = aUnder2[1 + k * BLOCK]; |
978 | CoinWorkDouble x0 = a0 * multiplier; |
979 | CoinWorkDouble x1 = a1 * multiplier; |
980 | t00 -= a0 * x0; |
981 | t01 -= a1 * x0; |
982 | t10 -= a1 * x1; |
983 | } |
984 | aa[j] = t00; |
985 | aa[j+1] = t01; |
986 | aa[j+1+BLOCK] = t10; |
987 | for (i = j + 2; i < BLOCK; i += 2) { |
988 | t00 = aa[i]; |
989 | t01 = aa[i+BLOCK]; |
990 | t10 = aa[i+1]; |
991 | t11 = aa[i+1+BLOCK]; |
992 | for (k = 0; k < BLOCK; ++k) { |
993 | CoinWorkDouble multiplier = work[k]; |
994 | CoinWorkDouble a0 = aUnder2[k * BLOCK] * multiplier; |
995 | CoinWorkDouble a1 = aUnder2[1 + k * BLOCK] * multiplier; |
996 | t00 -= aUnder[i + k * BLOCK] * a0; |
997 | t01 -= aUnder[i + k * BLOCK] * a1; |
998 | t10 -= aUnder[i + 1 + k * BLOCK] * a0; |
999 | t11 -= aUnder[i + 1 + k * BLOCK] * a1; |
1000 | } |
1001 | aa[i] = t00; |
1002 | aa[i+BLOCK] = t01; |
1003 | aa[i+1] = t10; |
1004 | aa[i+1+BLOCK] = t11; |
1005 | } |
1006 | } |
1007 | } else { |
1008 | #endif |
1009 | aa = aTri - BLOCK; |
1010 | for (j = 0; j < nUnder; j ++) { |
1011 | aa += BLOCK; |
1012 | for (i = j; i < nUnder; i++) { |
1013 | t00 = aa[i]; |
1014 | for (k = 0; k < BLOCK; ++k) { |
1015 | CoinWorkDouble multiplier = work[k]; |
1016 | t00 -= aUnder[i + k * BLOCK] * aUnder[j + k * BLOCK] * multiplier; |
1017 | } |
1018 | aa[i] = t00; |
1019 | } |
1020 | } |
1021 | #ifdef BLOCKUNROLL |
1022 | } |
1023 | #endif |
1024 | } |
1025 | /* Leaf recursive rectangle rectangle update, |
1026 | nUnder is number of rows in iBlock, |
1027 | nUnderK is number of rows in kBlock |
1028 | */ |
1029 | void |
1030 | ClpCholeskyCrecRecLeaf(/*ClpCholeskyDenseC * thisStruct,*/ |
1031 | const longDouble * COIN_RESTRICT above, |
1032 | const longDouble * COIN_RESTRICT aUnder, |
1033 | longDouble * COIN_RESTRICT aOther, |
1034 | const longDouble * COIN_RESTRICT work, |
1035 | int nUnder) |
1036 | { |
1037 | #ifdef POS_DEBUG |
1038 | int ira, ica; |
1039 | int ia = bNumber(above, ira, ica); |
1040 | int iru, icu; |
1041 | int iu = bNumber(aUnder, iru, icu); |
1042 | int iro, ico; |
1043 | int io = bNumber(aOther, iro, ico); |
1044 | /*printf("%d %d %d\n",ia,iu,io);*/ |
1045 | printf("recrecleaf above (%d,%d), under (%d,%d), other (%d,%d)\n" , |
1046 | ira, ica, iru, icu, iro, ico); |
1047 | #endif |
1048 | int i, j, k; |
1049 | longDouble * aa; |
1050 | #ifdef BLOCKUNROLL |
1051 | aa = aOther - 4 * BLOCK; |
1052 | if (nUnder == BLOCK) { |
1053 | /*#define INTEL*/ |
1054 | #ifdef INTEL |
1055 | aa += 2 * BLOCK; |
1056 | for (j = 0; j < BLOCK; j += 2) { |
1057 | aa += 2 * BLOCK; |
1058 | for (i = 0; i < BLOCK; i += 2) { |
1059 | CoinWorkDouble t00 = aa[i+0*BLOCK]; |
1060 | CoinWorkDouble t10 = aa[i+1*BLOCK]; |
1061 | CoinWorkDouble t01 = aa[i+1+0*BLOCK]; |
1062 | CoinWorkDouble t11 = aa[i+1+1*BLOCK]; |
1063 | for (k = 0; k < BLOCK; k++) { |
1064 | CoinWorkDouble multiplier = work[k]; |
1065 | CoinWorkDouble a00 = aUnder[i+k*BLOCK] * multiplier; |
1066 | CoinWorkDouble a01 = aUnder[i+1+k*BLOCK] * multiplier; |
1067 | t00 -= a00 * above[j + 0 + k * BLOCK]; |
1068 | t10 -= a00 * above[j + 1 + k * BLOCK]; |
1069 | t01 -= a01 * above[j + 0 + k * BLOCK]; |
1070 | t11 -= a01 * above[j + 1 + k * BLOCK]; |
1071 | } |
1072 | aa[i+0*BLOCK] = t00; |
1073 | aa[i+1*BLOCK] = t10; |
1074 | aa[i+1+0*BLOCK] = t01; |
1075 | aa[i+1+1*BLOCK] = t11; |
1076 | } |
1077 | } |
1078 | #else |
1079 | for (j = 0; j < BLOCK; j += 4) { |
1080 | aa += 4 * BLOCK; |
1081 | for (i = 0; i < BLOCK; i += 4) { |
1082 | CoinWorkDouble t00 = aa[i+0+0*BLOCK]; |
1083 | CoinWorkDouble t10 = aa[i+0+1*BLOCK]; |
1084 | CoinWorkDouble t20 = aa[i+0+2*BLOCK]; |
1085 | CoinWorkDouble t30 = aa[i+0+3*BLOCK]; |
1086 | CoinWorkDouble t01 = aa[i+1+0*BLOCK]; |
1087 | CoinWorkDouble t11 = aa[i+1+1*BLOCK]; |
1088 | CoinWorkDouble t21 = aa[i+1+2*BLOCK]; |
1089 | CoinWorkDouble t31 = aa[i+1+3*BLOCK]; |
1090 | CoinWorkDouble t02 = aa[i+2+0*BLOCK]; |
1091 | CoinWorkDouble t12 = aa[i+2+1*BLOCK]; |
1092 | CoinWorkDouble t22 = aa[i+2+2*BLOCK]; |
1093 | CoinWorkDouble t32 = aa[i+2+3*BLOCK]; |
1094 | CoinWorkDouble t03 = aa[i+3+0*BLOCK]; |
1095 | CoinWorkDouble t13 = aa[i+3+1*BLOCK]; |
1096 | CoinWorkDouble t23 = aa[i+3+2*BLOCK]; |
1097 | CoinWorkDouble t33 = aa[i+3+3*BLOCK]; |
1098 | const longDouble * COIN_RESTRICT aUnderNow = aUnder + i; |
1099 | const longDouble * COIN_RESTRICT aboveNow = above + j; |
1100 | for (k = 0; k < BLOCK; k++) { |
1101 | CoinWorkDouble multiplier = work[k]; |
1102 | CoinWorkDouble a00 = aUnderNow[0] * multiplier; |
1103 | CoinWorkDouble a01 = aUnderNow[1] * multiplier; |
1104 | CoinWorkDouble a02 = aUnderNow[2] * multiplier; |
1105 | CoinWorkDouble a03 = aUnderNow[3] * multiplier; |
1106 | t00 -= a00 * aboveNow[0]; |
1107 | t10 -= a00 * aboveNow[1]; |
1108 | t20 -= a00 * aboveNow[2]; |
1109 | t30 -= a00 * aboveNow[3]; |
1110 | t01 -= a01 * aboveNow[0]; |
1111 | t11 -= a01 * aboveNow[1]; |
1112 | t21 -= a01 * aboveNow[2]; |
1113 | t31 -= a01 * aboveNow[3]; |
1114 | t02 -= a02 * aboveNow[0]; |
1115 | t12 -= a02 * aboveNow[1]; |
1116 | t22 -= a02 * aboveNow[2]; |
1117 | t32 -= a02 * aboveNow[3]; |
1118 | t03 -= a03 * aboveNow[0]; |
1119 | t13 -= a03 * aboveNow[1]; |
1120 | t23 -= a03 * aboveNow[2]; |
1121 | t33 -= a03 * aboveNow[3]; |
1122 | aUnderNow += BLOCK; |
1123 | aboveNow += BLOCK; |
1124 | } |
1125 | aa[i+0+0*BLOCK] = t00; |
1126 | aa[i+0+1*BLOCK] = t10; |
1127 | aa[i+0+2*BLOCK] = t20; |
1128 | aa[i+0+3*BLOCK] = t30; |
1129 | aa[i+1+0*BLOCK] = t01; |
1130 | aa[i+1+1*BLOCK] = t11; |
1131 | aa[i+1+2*BLOCK] = t21; |
1132 | aa[i+1+3*BLOCK] = t31; |
1133 | aa[i+2+0*BLOCK] = t02; |
1134 | aa[i+2+1*BLOCK] = t12; |
1135 | aa[i+2+2*BLOCK] = t22; |
1136 | aa[i+2+3*BLOCK] = t32; |
1137 | aa[i+3+0*BLOCK] = t03; |
1138 | aa[i+3+1*BLOCK] = t13; |
1139 | aa[i+3+2*BLOCK] = t23; |
1140 | aa[i+3+3*BLOCK] = t33; |
1141 | } |
1142 | } |
1143 | #endif |
1144 | } else { |
1145 | int odd = nUnder & 1; |
1146 | int n = nUnder - odd; |
1147 | for (j = 0; j < BLOCK; j += 4) { |
1148 | aa += 4 * BLOCK; |
1149 | for (i = 0; i < n; i += 2) { |
1150 | CoinWorkDouble t00 = aa[i+0*BLOCK]; |
1151 | CoinWorkDouble t10 = aa[i+1*BLOCK]; |
1152 | CoinWorkDouble t20 = aa[i+2*BLOCK]; |
1153 | CoinWorkDouble t30 = aa[i+3*BLOCK]; |
1154 | CoinWorkDouble t01 = aa[i+1+0*BLOCK]; |
1155 | CoinWorkDouble t11 = aa[i+1+1*BLOCK]; |
1156 | CoinWorkDouble t21 = aa[i+1+2*BLOCK]; |
1157 | CoinWorkDouble t31 = aa[i+1+3*BLOCK]; |
1158 | const longDouble * COIN_RESTRICT aUnderNow = aUnder + i; |
1159 | const longDouble * COIN_RESTRICT aboveNow = above + j; |
1160 | for (k = 0; k < BLOCK; k++) { |
1161 | CoinWorkDouble multiplier = work[k]; |
1162 | CoinWorkDouble a00 = aUnderNow[0] * multiplier; |
1163 | CoinWorkDouble a01 = aUnderNow[1] * multiplier; |
1164 | t00 -= a00 * aboveNow[0]; |
1165 | t10 -= a00 * aboveNow[1]; |
1166 | t20 -= a00 * aboveNow[2]; |
1167 | t30 -= a00 * aboveNow[3]; |
1168 | t01 -= a01 * aboveNow[0]; |
1169 | t11 -= a01 * aboveNow[1]; |
1170 | t21 -= a01 * aboveNow[2]; |
1171 | t31 -= a01 * aboveNow[3]; |
1172 | aUnderNow += BLOCK; |
1173 | aboveNow += BLOCK; |
1174 | } |
1175 | aa[i+0*BLOCK] = t00; |
1176 | aa[i+1*BLOCK] = t10; |
1177 | aa[i+2*BLOCK] = t20; |
1178 | aa[i+3*BLOCK] = t30; |
1179 | aa[i+1+0*BLOCK] = t01; |
1180 | aa[i+1+1*BLOCK] = t11; |
1181 | aa[i+1+2*BLOCK] = t21; |
1182 | aa[i+1+3*BLOCK] = t31; |
1183 | } |
1184 | if (odd) { |
1185 | CoinWorkDouble t0 = aa[n+0*BLOCK]; |
1186 | CoinWorkDouble t1 = aa[n+1*BLOCK]; |
1187 | CoinWorkDouble t2 = aa[n+2*BLOCK]; |
1188 | CoinWorkDouble t3 = aa[n+3*BLOCK]; |
1189 | CoinWorkDouble a0; |
1190 | for (k = 0; k < BLOCK; k++) { |
1191 | a0 = aUnder[n+k*BLOCK] * work[k]; |
1192 | t0 -= a0 * above[j + 0 + k * BLOCK]; |
1193 | t1 -= a0 * above[j + 1 + k * BLOCK]; |
1194 | t2 -= a0 * above[j + 2 + k * BLOCK]; |
1195 | t3 -= a0 * above[j + 3 + k * BLOCK]; |
1196 | } |
1197 | aa[n+0*BLOCK] = t0; |
1198 | aa[n+1*BLOCK] = t1; |
1199 | aa[n+2*BLOCK] = t2; |
1200 | aa[n+3*BLOCK] = t3; |
1201 | } |
1202 | } |
1203 | } |
1204 | #else |
1205 | aa = aOther - BLOCK; |
1206 | for (j = 0; j < BLOCK; j ++) { |
1207 | aa += BLOCK; |
1208 | for (i = 0; i < nUnder; i++) { |
1209 | CoinWorkDouble t00 = aa[i+0*BLOCK]; |
1210 | for (k = 0; k < BLOCK; k++) { |
1211 | CoinWorkDouble a00 = aUnder[i+k*BLOCK] * work[k]; |
1212 | t00 -= a00 * above[j + k * BLOCK]; |
1213 | } |
1214 | aa[i] = t00; |
1215 | } |
1216 | } |
1217 | #endif |
1218 | } |
1219 | /* Uses factorization to solve. */ |
1220 | void |
1221 | ClpCholeskyDense::solve (CoinWorkDouble * region) |
1222 | { |
1223 | #ifdef CHOL_COMPARE |
1224 | double * region2 = NULL; |
1225 | if (numberRows_ < 200) { |
1226 | longDouble * xx = sparseFactor_; |
1227 | longDouble * yy = diagonal_; |
1228 | diagonal_ = sparseFactor_ + 40000; |
1229 | sparseFactor_ = diagonal_ + numberRows_; |
1230 | region2 = ClpCopyOfArray(region, numberRows_); |
1231 | int iRow, iColumn; |
1232 | int addOffset = numberRows_ - 1; |
1233 | longDouble * work = sparseFactor_ - 1; |
1234 | for (iColumn = 0; iColumn < numberRows_; iColumn++) { |
1235 | double value = region2[iColumn]; |
1236 | for (iRow = iColumn + 1; iRow < numberRows_; iRow++) |
1237 | region2[iRow] -= value * work[iRow]; |
1238 | addOffset--; |
1239 | work += addOffset; |
1240 | } |
1241 | for (iColumn = numberRows_ - 1; iColumn >= 0; iColumn--) { |
1242 | double value = region2[iColumn] * diagonal_[iColumn]; |
1243 | work -= addOffset; |
1244 | addOffset++; |
1245 | for (iRow = iColumn + 1; iRow < numberRows_; iRow++) |
1246 | value -= region2[iRow] * work[iRow]; |
1247 | region2[iColumn] = value; |
1248 | } |
1249 | sparseFactor_ = xx; |
1250 | diagonal_ = yy; |
1251 | } |
1252 | #endif |
1253 | /*longDouble * xx = sparseFactor_;*/ |
1254 | /*longDouble * yy = diagonal_;*/ |
1255 | /*diagonal_ = sparseFactor_ + 40000;*/ |
1256 | /*sparseFactor_=diagonal_ + numberRows_;*/ |
1257 | int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT; |
1258 | /* later align on boundary*/ |
1259 | longDouble * a = sparseFactor_ + BLOCKSQ * numberBlocks; |
1260 | int iBlock; |
1261 | longDouble * aa = a; |
1262 | int iColumn; |
1263 | for (iBlock = 0; iBlock < numberBlocks; iBlock++) { |
1264 | int nChunk; |
1265 | int jBlock; |
1266 | int iDo = iBlock * BLOCK; |
1267 | int base = iDo; |
1268 | if (iDo + BLOCK > numberRows_) { |
1269 | nChunk = numberRows_ - iDo; |
1270 | } else { |
1271 | nChunk = BLOCK; |
1272 | } |
1273 | solveF1(aa, nChunk, region + iDo); |
1274 | for (jBlock = iBlock + 1; jBlock < numberBlocks; jBlock++) { |
1275 | base += BLOCK; |
1276 | aa += BLOCKSQ; |
1277 | if (base + BLOCK > numberRows_) { |
1278 | nChunk = numberRows_ - base; |
1279 | } else { |
1280 | nChunk = BLOCK; |
1281 | } |
1282 | solveF2(aa, nChunk, region + iDo, region + base); |
1283 | } |
1284 | aa += BLOCKSQ; |
1285 | } |
1286 | /* do diagonal outside*/ |
1287 | for (iColumn = 0; iColumn < numberRows_; iColumn++) |
1288 | region[iColumn] *= diagonal_[iColumn]; |
1289 | int offset = ((numberBlocks * (numberBlocks + 1)) >> 1); |
1290 | aa = a + number_entries(offset - 1); |
1291 | int lBase = (numberBlocks - 1) * BLOCK; |
1292 | for (iBlock = numberBlocks - 1; iBlock >= 0; iBlock--) { |
1293 | int nChunk; |
1294 | int jBlock; |
1295 | int triBase = iBlock * BLOCK; |
1296 | int iBase = lBase; |
1297 | for (jBlock = iBlock + 1; jBlock < numberBlocks; jBlock++) { |
1298 | if (iBase + BLOCK > numberRows_) { |
1299 | nChunk = numberRows_ - iBase; |
1300 | } else { |
1301 | nChunk = BLOCK; |
1302 | } |
1303 | solveB2(aa, nChunk, region + triBase, region + iBase); |
1304 | iBase -= BLOCK; |
1305 | aa -= BLOCKSQ; |
1306 | } |
1307 | if (triBase + BLOCK > numberRows_) { |
1308 | nChunk = numberRows_ - triBase; |
1309 | } else { |
1310 | nChunk = BLOCK; |
1311 | } |
1312 | solveB1(aa, nChunk, region + triBase); |
1313 | aa -= BLOCKSQ; |
1314 | } |
1315 | #ifdef CHOL_COMPARE |
1316 | if (numberRows_ < 200) { |
1317 | for (int i = 0; i < numberRows_; i++) { |
1318 | assert(CoinAbs(region[i] - region2[i]) < 1.0e-3); |
1319 | } |
1320 | delete [] region2; |
1321 | } |
1322 | #endif |
1323 | } |
1324 | /* Forward part of solve 1*/ |
1325 | void |
1326 | ClpCholeskyDense::solveF1(longDouble * a, int n, CoinWorkDouble * region) |
1327 | { |
1328 | int j, k; |
1329 | CoinWorkDouble t00; |
1330 | for (j = 0; j < n; j ++) { |
1331 | t00 = region[j]; |
1332 | for (k = 0; k < j; ++k) { |
1333 | t00 -= region[k] * a[j + k * BLOCK]; |
1334 | } |
1335 | /*t00*=a[j + j * BLOCK];*/ |
1336 | region[j] = t00; |
1337 | } |
1338 | } |
1339 | /* Forward part of solve 2*/ |
1340 | void |
1341 | ClpCholeskyDense::solveF2(longDouble * a, int n, CoinWorkDouble * region, CoinWorkDouble * region2) |
1342 | { |
1343 | int j, k; |
1344 | #ifdef BLOCKUNROLL |
1345 | if (n == BLOCK) { |
1346 | for (k = 0; k < BLOCK; k += 4) { |
1347 | CoinWorkDouble t0 = region2[0]; |
1348 | CoinWorkDouble t1 = region2[1]; |
1349 | CoinWorkDouble t2 = region2[2]; |
1350 | CoinWorkDouble t3 = region2[3]; |
1351 | t0 -= region[0] * a[0 + 0 * BLOCK]; |
1352 | t1 -= region[0] * a[1 + 0 * BLOCK]; |
1353 | t2 -= region[0] * a[2 + 0 * BLOCK]; |
1354 | t3 -= region[0] * a[3 + 0 * BLOCK]; |
1355 | |
1356 | t0 -= region[1] * a[0 + 1 * BLOCK]; |
1357 | t1 -= region[1] * a[1 + 1 * BLOCK]; |
1358 | t2 -= region[1] * a[2 + 1 * BLOCK]; |
1359 | t3 -= region[1] * a[3 + 1 * BLOCK]; |
1360 | |
1361 | t0 -= region[2] * a[0 + 2 * BLOCK]; |
1362 | t1 -= region[2] * a[1 + 2 * BLOCK]; |
1363 | t2 -= region[2] * a[2 + 2 * BLOCK]; |
1364 | t3 -= region[2] * a[3 + 2 * BLOCK]; |
1365 | |
1366 | t0 -= region[3] * a[0 + 3 * BLOCK]; |
1367 | t1 -= region[3] * a[1 + 3 * BLOCK]; |
1368 | t2 -= region[3] * a[2 + 3 * BLOCK]; |
1369 | t3 -= region[3] * a[3 + 3 * BLOCK]; |
1370 | |
1371 | t0 -= region[4] * a[0 + 4 * BLOCK]; |
1372 | t1 -= region[4] * a[1 + 4 * BLOCK]; |
1373 | t2 -= region[4] * a[2 + 4 * BLOCK]; |
1374 | t3 -= region[4] * a[3 + 4 * BLOCK]; |
1375 | |
1376 | t0 -= region[5] * a[0 + 5 * BLOCK]; |
1377 | t1 -= region[5] * a[1 + 5 * BLOCK]; |
1378 | t2 -= region[5] * a[2 + 5 * BLOCK]; |
1379 | t3 -= region[5] * a[3 + 5 * BLOCK]; |
1380 | |
1381 | t0 -= region[6] * a[0 + 6 * BLOCK]; |
1382 | t1 -= region[6] * a[1 + 6 * BLOCK]; |
1383 | t2 -= region[6] * a[2 + 6 * BLOCK]; |
1384 | t3 -= region[6] * a[3 + 6 * BLOCK]; |
1385 | |
1386 | t0 -= region[7] * a[0 + 7 * BLOCK]; |
1387 | t1 -= region[7] * a[1 + 7 * BLOCK]; |
1388 | t2 -= region[7] * a[2 + 7 * BLOCK]; |
1389 | t3 -= region[7] * a[3 + 7 * BLOCK]; |
1390 | #if BLOCK>8 |
1391 | t0 -= region[8] * a[0 + 8 * BLOCK]; |
1392 | t1 -= region[8] * a[1 + 8 * BLOCK]; |
1393 | t2 -= region[8] * a[2 + 8 * BLOCK]; |
1394 | t3 -= region[8] * a[3 + 8 * BLOCK]; |
1395 | |
1396 | t0 -= region[9] * a[0 + 9 * BLOCK]; |
1397 | t1 -= region[9] * a[1 + 9 * BLOCK]; |
1398 | t2 -= region[9] * a[2 + 9 * BLOCK]; |
1399 | t3 -= region[9] * a[3 + 9 * BLOCK]; |
1400 | |
1401 | t0 -= region[10] * a[0 + 10 * BLOCK]; |
1402 | t1 -= region[10] * a[1 + 10 * BLOCK]; |
1403 | t2 -= region[10] * a[2 + 10 * BLOCK]; |
1404 | t3 -= region[10] * a[3 + 10 * BLOCK]; |
1405 | |
1406 | t0 -= region[11] * a[0 + 11 * BLOCK]; |
1407 | t1 -= region[11] * a[1 + 11 * BLOCK]; |
1408 | t2 -= region[11] * a[2 + 11 * BLOCK]; |
1409 | t3 -= region[11] * a[3 + 11 * BLOCK]; |
1410 | |
1411 | t0 -= region[12] * a[0 + 12 * BLOCK]; |
1412 | t1 -= region[12] * a[1 + 12 * BLOCK]; |
1413 | t2 -= region[12] * a[2 + 12 * BLOCK]; |
1414 | t3 -= region[12] * a[3 + 12 * BLOCK]; |
1415 | |
1416 | t0 -= region[13] * a[0 + 13 * BLOCK]; |
1417 | t1 -= region[13] * a[1 + 13 * BLOCK]; |
1418 | t2 -= region[13] * a[2 + 13 * BLOCK]; |
1419 | t3 -= region[13] * a[3 + 13 * BLOCK]; |
1420 | |
1421 | t0 -= region[14] * a[0 + 14 * BLOCK]; |
1422 | t1 -= region[14] * a[1 + 14 * BLOCK]; |
1423 | t2 -= region[14] * a[2 + 14 * BLOCK]; |
1424 | t3 -= region[14] * a[3 + 14 * BLOCK]; |
1425 | |
1426 | t0 -= region[15] * a[0 + 15 * BLOCK]; |
1427 | t1 -= region[15] * a[1 + 15 * BLOCK]; |
1428 | t2 -= region[15] * a[2 + 15 * BLOCK]; |
1429 | t3 -= region[15] * a[3 + 15 * BLOCK]; |
1430 | #endif |
1431 | region2[0] = t0; |
1432 | region2[1] = t1; |
1433 | region2[2] = t2; |
1434 | region2[3] = t3; |
1435 | region2 += 4; |
1436 | a += 4; |
1437 | } |
1438 | } else { |
1439 | #endif |
1440 | for (k = 0; k < n; ++k) { |
1441 | CoinWorkDouble t00 = region2[k]; |
1442 | for (j = 0; j < BLOCK; j ++) { |
1443 | t00 -= region[j] * a[k + j * BLOCK]; |
1444 | } |
1445 | region2[k] = t00; |
1446 | } |
1447 | #ifdef BLOCKUNROLL |
1448 | } |
1449 | #endif |
1450 | } |
1451 | /* Backward part of solve 1*/ |
1452 | void |
1453 | ClpCholeskyDense::solveB1(longDouble * a, int n, CoinWorkDouble * region) |
1454 | { |
1455 | int j, k; |
1456 | CoinWorkDouble t00; |
1457 | for (j = n - 1; j >= 0; j --) { |
1458 | t00 = region[j]; |
1459 | for (k = j + 1; k < n; ++k) { |
1460 | t00 -= region[k] * a[k + j * BLOCK]; |
1461 | } |
1462 | /*t00*=a[j + j * BLOCK];*/ |
1463 | region[j] = t00; |
1464 | } |
1465 | } |
1466 | /* Backward part of solve 2*/ |
1467 | void |
1468 | ClpCholeskyDense::solveB2(longDouble * a, int n, CoinWorkDouble * region, CoinWorkDouble * region2) |
1469 | { |
1470 | int j, k; |
1471 | #ifdef BLOCKUNROLL |
1472 | if (n == BLOCK) { |
1473 | for (j = 0; j < BLOCK; j += 4) { |
1474 | CoinWorkDouble t0 = region[0]; |
1475 | CoinWorkDouble t1 = region[1]; |
1476 | CoinWorkDouble t2 = region[2]; |
1477 | CoinWorkDouble t3 = region[3]; |
1478 | t0 -= region2[0] * a[0 + 0*BLOCK]; |
1479 | t1 -= region2[0] * a[0 + 1*BLOCK]; |
1480 | t2 -= region2[0] * a[0 + 2*BLOCK]; |
1481 | t3 -= region2[0] * a[0 + 3*BLOCK]; |
1482 | |
1483 | t0 -= region2[1] * a[1 + 0*BLOCK]; |
1484 | t1 -= region2[1] * a[1 + 1*BLOCK]; |
1485 | t2 -= region2[1] * a[1 + 2*BLOCK]; |
1486 | t3 -= region2[1] * a[1 + 3*BLOCK]; |
1487 | |
1488 | t0 -= region2[2] * a[2 + 0*BLOCK]; |
1489 | t1 -= region2[2] * a[2 + 1*BLOCK]; |
1490 | t2 -= region2[2] * a[2 + 2*BLOCK]; |
1491 | t3 -= region2[2] * a[2 + 3*BLOCK]; |
1492 | |
1493 | t0 -= region2[3] * a[3 + 0*BLOCK]; |
1494 | t1 -= region2[3] * a[3 + 1*BLOCK]; |
1495 | t2 -= region2[3] * a[3 + 2*BLOCK]; |
1496 | t3 -= region2[3] * a[3 + 3*BLOCK]; |
1497 | |
1498 | t0 -= region2[4] * a[4 + 0*BLOCK]; |
1499 | t1 -= region2[4] * a[4 + 1*BLOCK]; |
1500 | t2 -= region2[4] * a[4 + 2*BLOCK]; |
1501 | t3 -= region2[4] * a[4 + 3*BLOCK]; |
1502 | |
1503 | t0 -= region2[5] * a[5 + 0*BLOCK]; |
1504 | t1 -= region2[5] * a[5 + 1*BLOCK]; |
1505 | t2 -= region2[5] * a[5 + 2*BLOCK]; |
1506 | t3 -= region2[5] * a[5 + 3*BLOCK]; |
1507 | |
1508 | t0 -= region2[6] * a[6 + 0*BLOCK]; |
1509 | t1 -= region2[6] * a[6 + 1*BLOCK]; |
1510 | t2 -= region2[6] * a[6 + 2*BLOCK]; |
1511 | t3 -= region2[6] * a[6 + 3*BLOCK]; |
1512 | |
1513 | t0 -= region2[7] * a[7 + 0*BLOCK]; |
1514 | t1 -= region2[7] * a[7 + 1*BLOCK]; |
1515 | t2 -= region2[7] * a[7 + 2*BLOCK]; |
1516 | t3 -= region2[7] * a[7 + 3*BLOCK]; |
1517 | #if BLOCK>8 |
1518 | |
1519 | t0 -= region2[8] * a[8 + 0*BLOCK]; |
1520 | t1 -= region2[8] * a[8 + 1*BLOCK]; |
1521 | t2 -= region2[8] * a[8 + 2*BLOCK]; |
1522 | t3 -= region2[8] * a[8 + 3*BLOCK]; |
1523 | |
1524 | t0 -= region2[9] * a[9 + 0*BLOCK]; |
1525 | t1 -= region2[9] * a[9 + 1*BLOCK]; |
1526 | t2 -= region2[9] * a[9 + 2*BLOCK]; |
1527 | t3 -= region2[9] * a[9 + 3*BLOCK]; |
1528 | |
1529 | t0 -= region2[10] * a[10 + 0*BLOCK]; |
1530 | t1 -= region2[10] * a[10 + 1*BLOCK]; |
1531 | t2 -= region2[10] * a[10 + 2*BLOCK]; |
1532 | t3 -= region2[10] * a[10 + 3*BLOCK]; |
1533 | |
1534 | t0 -= region2[11] * a[11 + 0*BLOCK]; |
1535 | t1 -= region2[11] * a[11 + 1*BLOCK]; |
1536 | t2 -= region2[11] * a[11 + 2*BLOCK]; |
1537 | t3 -= region2[11] * a[11 + 3*BLOCK]; |
1538 | |
1539 | t0 -= region2[12] * a[12 + 0*BLOCK]; |
1540 | t1 -= region2[12] * a[12 + 1*BLOCK]; |
1541 | t2 -= region2[12] * a[12 + 2*BLOCK]; |
1542 | t3 -= region2[12] * a[12 + 3*BLOCK]; |
1543 | |
1544 | t0 -= region2[13] * a[13 + 0*BLOCK]; |
1545 | t1 -= region2[13] * a[13 + 1*BLOCK]; |
1546 | t2 -= region2[13] * a[13 + 2*BLOCK]; |
1547 | t3 -= region2[13] * a[13 + 3*BLOCK]; |
1548 | |
1549 | t0 -= region2[14] * a[14 + 0*BLOCK]; |
1550 | t1 -= region2[14] * a[14 + 1*BLOCK]; |
1551 | t2 -= region2[14] * a[14 + 2*BLOCK]; |
1552 | t3 -= region2[14] * a[14 + 3*BLOCK]; |
1553 | |
1554 | t0 -= region2[15] * a[15 + 0*BLOCK]; |
1555 | t1 -= region2[15] * a[15 + 1*BLOCK]; |
1556 | t2 -= region2[15] * a[15 + 2*BLOCK]; |
1557 | t3 -= region2[15] * a[15 + 3*BLOCK]; |
1558 | #endif |
1559 | region[0] = t0; |
1560 | region[1] = t1; |
1561 | region[2] = t2; |
1562 | region[3] = t3; |
1563 | a += 4 * BLOCK; |
1564 | region += 4; |
1565 | } |
1566 | } else { |
1567 | #endif |
1568 | for (j = 0; j < BLOCK; j ++) { |
1569 | CoinWorkDouble t00 = region[j]; |
1570 | for (k = 0; k < n; ++k) { |
1571 | t00 -= region2[k] * a[k + j * BLOCK]; |
1572 | } |
1573 | region[j] = t00; |
1574 | } |
1575 | #ifdef BLOCKUNROLL |
1576 | } |
1577 | #endif |
1578 | } |
1579 | |