1 | /* $Id: CoinOslFactorization2.cpp 1448 2011-06-19 15:34:41Z stefan $ */ |
2 | /* |
3 | Copyright (C) 1987, 2009, International Business Machines |
4 | Corporation and others. All Rights Reserved. |
5 | |
6 | This code is licensed under the terms of the Eclipse Public License (EPL). |
7 | */ |
8 | /* |
9 | CLP_OSL - if defined use osl |
10 | 0 - don't unroll 2 and 3 - don't use in Gomory |
11 | 1 - don't unroll - do use in Gomory |
12 | 2 - unroll - don't use in Gomory |
13 | 3 - unroll and use in Gomory |
14 | */ |
15 | #include "CoinOslFactorization.hpp" |
16 | #include "CoinOslC.h" |
17 | #include "CoinFinite.hpp" |
18 | |
19 | #ifndef NDEBUG |
20 | extern int ets_count; |
21 | extern int ets_check; |
22 | #endif |
23 | #ifdef COIN_USE_RESTRICT |
24 | # define COIN_RESTRICT2 __restrict |
25 | #else |
26 | # define COIN_RESTRICT2 |
27 | #endif |
28 | static int c_ekkshfpo_scan2zero(const EKKfactinfo * COIN_RESTRICT2 fact,const int * COIN_RESTRICT mpermu, |
29 | double *COIN_RESTRICT worki, double *COIN_RESTRICT worko, int * COIN_RESTRICT mptr) |
30 | { |
31 | |
32 | /* Local variables */ |
33 | int irow; |
34 | double tolerance = fact->zeroTolerance; |
35 | int nin=fact->nrow; |
36 | int * COIN_RESTRICT mptrX=mptr; |
37 | if ((nin&1)!=0) { |
38 | irow=1; |
39 | if (fact->packedMode) { |
40 | int irow0= *mpermu; |
41 | double dval; |
42 | assert (irow0>=1&&irow0<=nin); |
43 | mpermu++; |
44 | dval=worki[irow0]; |
45 | if (NOT_ZERO(dval)) { |
46 | worki[irow0]=0.0; |
47 | if (fabs(dval) >= tolerance) { |
48 | *(worko++)=dval; |
49 | *(mptrX++) = 0; |
50 | } |
51 | } |
52 | } else { |
53 | int irow0= *mpermu; |
54 | double dval; |
55 | assert (irow0>=1&&irow0<=nin); |
56 | mpermu++; |
57 | dval=worki[irow0]; |
58 | if (NOT_ZERO(dval)) { |
59 | worki[irow0]=0.0; |
60 | if (fabs(dval) >= tolerance) { |
61 | *worko=dval; |
62 | *(mptrX++) = 0; |
63 | } |
64 | } |
65 | worko++; |
66 | } |
67 | } else { |
68 | irow=0; |
69 | } |
70 | if (fact->packedMode) { |
71 | for (; irow < nin; irow+=2) { |
72 | int irow0,irow1; |
73 | double dval0,dval1; |
74 | irow0=mpermu[0]; |
75 | irow1=mpermu[1]; |
76 | assert (irow0>=1&&irow0<=nin); |
77 | assert (irow1>=1&&irow1<=nin); |
78 | dval0=worki[irow0]; |
79 | dval1=worki[irow1]; |
80 | if (NOT_ZERO(dval0)) { |
81 | worki[irow0]=0.0; |
82 | if (fabs(dval0) >= tolerance) { |
83 | *(worko++)=dval0; |
84 | *(mptrX++) = irow+0; |
85 | } |
86 | } |
87 | if (NOT_ZERO(dval1)) { |
88 | worki[irow1]=0.0; |
89 | if (fabs(dval1) >= tolerance) { |
90 | *(worko++)=dval1; |
91 | *(mptrX++) = irow+1; |
92 | } |
93 | } |
94 | mpermu+=2; |
95 | } |
96 | } else { |
97 | for (; irow < nin; irow+=2) { |
98 | int irow0,irow1; |
99 | double dval0,dval1; |
100 | irow0=mpermu[0]; |
101 | irow1=mpermu[1]; |
102 | assert (irow0>=1&&irow0<=nin); |
103 | assert (irow1>=1&&irow1<=nin); |
104 | dval0=worki[irow0]; |
105 | dval1=worki[irow1]; |
106 | if (NOT_ZERO(dval0)) { |
107 | worki[irow0]=0.0; |
108 | if (fabs(dval0) >= tolerance) { |
109 | worko[0]=dval0; |
110 | *(mptrX++) = irow+0; |
111 | } |
112 | } |
113 | if (NOT_ZERO(dval1)) { |
114 | worki[irow1]=0.0; |
115 | if (fabs(dval1) >= tolerance) { |
116 | worko[1]=dval1; |
117 | *(mptrX++) = irow+1; |
118 | } |
119 | } |
120 | mpermu+=2; |
121 | worko+=2; |
122 | } |
123 | } |
124 | return static_cast<int>(mptrX-mptr); |
125 | } |
126 | /* |
127 | * c_ekkshfpi_list executes the following loop: |
128 | * |
129 | * for (k=nincol, i=1; k; k--, i++) { |
130 | * int ipt = mptr[i]; |
131 | * int irow = mpermu[ipt]; |
132 | * worko[mpermu[irow]] = worki[i]; |
133 | * worki[i] = 0.0; |
134 | * } |
135 | */ |
136 | static int c_ekkshfpi_list(const int *COIN_RESTRICT mpermu, |
137 | double *COIN_RESTRICT worki, |
138 | double *COIN_RESTRICT worko, |
139 | const int * COIN_RESTRICT mptr, int nincol, |
140 | int * lastNonZero) |
141 | { |
142 | int i,k,irow0,irow1; |
143 | int first=COIN_INT_MAX; |
144 | int last=0; |
145 | /* worko was zeroed out outside */ |
146 | k = nincol; |
147 | i = 0; |
148 | if ((k&1)!=0) { |
149 | int ipt=mptr[i]; |
150 | irow0=mpermu[ipt]; |
151 | first = CoinMin(irow0,first); |
152 | last = CoinMax(irow0,last); |
153 | i++; |
154 | worko[irow0]=*worki; |
155 | *worki++=0.0; |
156 | } |
157 | k=k>>1; |
158 | for (; k; k--) { |
159 | int ipt0 = mptr[i]; |
160 | int ipt1 = mptr[i+1]; |
161 | irow0 = mpermu[ipt0]; |
162 | irow1 = mpermu[ipt1]; |
163 | i+=2; |
164 | first = CoinMin(irow0,first); |
165 | last = CoinMax(irow0,last); |
166 | first = CoinMin(irow1,first); |
167 | last = CoinMax(irow1,last); |
168 | worko[irow0] = worki[0]; |
169 | worko[irow1] = worki[1]; |
170 | worki[0]=0.0; |
171 | worki[1]=0.0; |
172 | worki+=2; |
173 | } |
174 | *lastNonZero=last; |
175 | return first; |
176 | } |
177 | /* |
178 | * c_ekkshfpi_list2 executes the following loop: |
179 | * |
180 | * for (k=nincol, i=1; k; k--, i++) { |
181 | * int ipt = mptr[i]; |
182 | * int irow = mpermu[ipt]; |
183 | * worko[mpermu[irow]] = worki[ipt]; |
184 | * worki[ipt] = 0.0; |
185 | * } |
186 | */ |
187 | static int c_ekkshfpi_list2(const int *COIN_RESTRICT mpermu, double *COIN_RESTRICT worki, double *COIN_RESTRICT worko, |
188 | const int * COIN_RESTRICT mptr, int nincol, |
189 | int * lastNonZero) |
190 | { |
191 | #if 1 |
192 | int i,k,irow0,irow1; |
193 | int first=COIN_INT_MAX; |
194 | int last=0; |
195 | /* worko was zeroed out outside */ |
196 | k = nincol; |
197 | i = 0; |
198 | if ((k&1)!=0) { |
199 | int ipt=mptr[i]; |
200 | irow0=mpermu[ipt]; |
201 | first = CoinMin(irow0,first); |
202 | last = CoinMax(irow0,last); |
203 | i++; |
204 | worko[irow0]=worki[ipt]; |
205 | worki[ipt]=0.0; |
206 | } |
207 | k=k>>1; |
208 | for (; k; k--) { |
209 | int ipt0 = mptr[i]; |
210 | int ipt1 = mptr[i+1]; |
211 | irow0 = mpermu[ipt0]; |
212 | irow1 = mpermu[ipt1]; |
213 | i+=2; |
214 | first = CoinMin(irow0,first); |
215 | last = CoinMax(irow0,last); |
216 | first = CoinMin(irow1,first); |
217 | last = CoinMax(irow1,last); |
218 | worko[irow0] = worki[ipt0]; |
219 | worko[irow1] = worki[ipt1]; |
220 | worki[ipt0]=0.0; |
221 | worki[ipt1]=0.0; |
222 | } |
223 | #else |
224 | int first=COIN_INT_MAX; |
225 | int last=0; |
226 | /* worko was zeroed out outside */ |
227 | for (int i=0; i<nincol; i++) { |
228 | int ipt = mptr[i]; |
229 | int irow = mpermu[ipt]; |
230 | first = CoinMin(irow,first); |
231 | last = CoinMax(irow,last); |
232 | worko[irow] = worki[ipt]; |
233 | worki[ipt]=0.0; |
234 | } |
235 | #endif |
236 | *lastNonZero=last; |
237 | return first; |
238 | } |
239 | /* |
240 | * c_ekkshfpi_list3 executes the following loop: |
241 | * |
242 | * for (k=nincol, i=1; k; k--, i++) { |
243 | * int ipt = mptr[i]; |
244 | * int irow = mpermu[ipt]; |
245 | * worko[irow] = worki[i]; |
246 | * worki[i] = 0.0; |
247 | * mptr[i] = mpermu[ipt]; |
248 | * } |
249 | */ |
250 | static void c_ekkshfpi_list3(const int *COIN_RESTRICT mpermu, |
251 | double *COIN_RESTRICT worki, double *COIN_RESTRICT worko, |
252 | int * COIN_RESTRICT mptr, int nincol) |
253 | { |
254 | int i,k,irow0,irow1; |
255 | /* worko was zeroed out outside */ |
256 | k = nincol; |
257 | i = 0; |
258 | if ((k&1)!=0) { |
259 | int ipt=mptr[i]; |
260 | irow0=mpermu[ipt]; |
261 | mptr[i] = irow0; |
262 | i++; |
263 | worko[irow0]=*worki; |
264 | *worki++=0.0; |
265 | } |
266 | k=k>>1; |
267 | for (; k; k--) { |
268 | int ipt0 = mptr[i]; |
269 | int ipt1 = mptr[i+1]; |
270 | irow0 = mpermu[ipt0]; |
271 | irow1 = mpermu[ipt1]; |
272 | mptr[i] = irow0; |
273 | mptr[i+1] = irow1; |
274 | i+=2; |
275 | worko[irow0] = worki[0]; |
276 | worko[irow1] = worki[1]; |
277 | worki[0]=0.0; |
278 | worki[1]=0.0; |
279 | worki+=2; |
280 | } |
281 | } |
282 | static int c_ekkscmv(const EKKfactinfo * COIN_RESTRICT2 fact,int n, double *COIN_RESTRICT dwork, int *COIN_RESTRICT mptr, |
283 | double *COIN_RESTRICT dwork2) |
284 | { |
285 | double tolerance = fact->zeroTolerance; |
286 | int irow; |
287 | const int * COIN_RESTRICT mptrsave = mptr; |
288 | double * COIN_RESTRICT dwhere = dwork+1; |
289 | if ((n&1)!=0) { |
290 | if (NOT_ZERO(*dwhere)) { |
291 | if (fabs(*dwhere) >= tolerance) { |
292 | *++dwork2 = *dwhere; |
293 | *++mptr = SHIFT_INDEX(1); |
294 | } else { |
295 | *dwhere = 0.0; |
296 | } |
297 | } |
298 | dwhere++; |
299 | irow=2; |
300 | } else { |
301 | irow=1; |
302 | } |
303 | for (n=n>>1;n;n--) { |
304 | int second = NOT_ZERO(*(dwhere+1)); |
305 | if (NOT_ZERO(*dwhere)) { |
306 | if (fabs(*dwhere) >= tolerance) { |
307 | *++dwork2 = *dwhere; |
308 | *++mptr = SHIFT_INDEX(irow); |
309 | } else { |
310 | *dwhere = 0.0; |
311 | } |
312 | } |
313 | if (second) { |
314 | if (fabs(*(dwhere+1)) >= tolerance) { |
315 | *++dwork2 = *(dwhere+1); |
316 | *++mptr = SHIFT_INDEX(irow+1); |
317 | } else { |
318 | *(dwhere+1) = 0.0; |
319 | } |
320 | } |
321 | dwhere+=2; |
322 | irow+=2; |
323 | } |
324 | |
325 | return static_cast<int>(mptr-mptrsave); |
326 | } /* c_ekkscmv */ |
327 | double c_ekkputl(const EKKfactinfo * COIN_RESTRICT2 fact, |
328 | const int *COIN_RESTRICT mpt2, |
329 | double *COIN_RESTRICT dwork1, |
330 | double del3, |
331 | int nincol, int nuspik) |
332 | { |
333 | double * COIN_RESTRICT dwork3 = fact->xeeadr+fact->nnentu; |
334 | int * COIN_RESTRICT hrowi = fact->xeradr+fact->nnentu; |
335 | int offset = fact->R_etas_start[fact->nR_etas+1]; |
336 | int *COIN_RESTRICT hrowiR = fact->R_etas_index+offset; |
337 | double *COIN_RESTRICT dluval = fact->R_etas_element+offset; |
338 | int i, j; |
339 | |
340 | /* dwork1 is r', the new R transform |
341 | * dwork3 is the updated incoming column, alpha_p |
342 | * del3 apparently has the pivot of the incoming column (???). |
343 | * Here, we compute the p'th element of R^-1 alpha_p |
344 | * (as described on p. 273), which is just a dot product. |
345 | * I don't know why we subtract. |
346 | */ |
347 | for (i = 1; i <= nuspik; ++i) { |
348 | j = UNSHIFT_INDEX(hrowi[ i]); |
349 | del3 -= dwork3[i] * dwork1[j]; |
350 | } |
351 | |
352 | /* here we finally copy the r' to where we want it, the end */ |
353 | /* also take into account that the p'th row of R^-1 is -(p'th row of R). */ |
354 | /* also zero out dwork1 as we go */ |
355 | for (i = 0; i < nincol; ++i) { |
356 | j = mpt2[i]; |
357 | hrowiR[ - i ] = SHIFT_INDEX(j); |
358 | dluval[ - i ] = -dwork1[j]; |
359 | dwork1[j] = 0.; |
360 | } |
361 | |
362 | return del3; |
363 | } /* c_ekkputl */ |
364 | /* making this static seems to slow code down! |
365 | may be being inlined |
366 | */ |
367 | int c_ekkputl2( const EKKfactinfo * COIN_RESTRICT2 fact, |
368 | double *COIN_RESTRICT dwork1, |
369 | double *del3p, |
370 | int nuspik) |
371 | { |
372 | double * COIN_RESTRICT dwork3 = fact->xeeadr+fact->nnentu; |
373 | int * COIN_RESTRICT hrowi = fact->xeradr+fact->nnentu; |
374 | int offset = fact->R_etas_start[fact->nR_etas+1]; |
375 | int *COIN_RESTRICT hrowiR = fact->R_etas_index+offset; |
376 | double *COIN_RESTRICT dluval = fact->R_etas_element+offset; |
377 | int i, j; |
378 | #if 0 |
379 | int nincol=c_ekksczr(fact,fact->nrow,dwork1,hrowiR); |
380 | #else |
381 | int nrow=fact->nrow; |
382 | const double tolerance = fact->zeroTolerance; |
383 | int * COIN_RESTRICT mptrX=hrowiR; |
384 | for (i = 1; i <= nrow; ++i) { |
385 | if (dwork1[i] != 0.) { |
386 | if (fabs(dwork1[i]) >= tolerance) { |
387 | *(mptrX--) = SHIFT_INDEX(i); |
388 | } else { |
389 | dwork1[i] = 0.0; |
390 | } |
391 | } |
392 | } |
393 | int nincol=static_cast<int>(hrowiR-mptrX); |
394 | #endif |
395 | double del3 = *del3p; |
396 | /* dwork1 is r', the new R transform |
397 | * dwork3 is the updated incoming column, alpha_p |
398 | * del3 apparently has the pivot of the incoming column (???). |
399 | * Here, we compute the p'th element of R^-1 alpha_p |
400 | * (as described on p. 273), which is just a dot product. |
401 | * I don't know why we subtract. |
402 | */ |
403 | for (i = 1; i <= nuspik; ++i) { |
404 | j = UNSHIFT_INDEX(hrowi[ i]); |
405 | del3 -= dwork3[i] * dwork1[j]; |
406 | } |
407 | |
408 | /* here we finally copy the r' to where we want it, the end */ |
409 | /* also take into account that the p'th row of R^-1 is -(p'th row of R). */ |
410 | /* also zero out dwork1 as we go */ |
411 | for (i = 0; i < nincol; ++i) { |
412 | j = UNSHIFT_INDEX(hrowiR[-i]); |
413 | dluval[ - i ] = -dwork1[j]; |
414 | dwork1[j] = 0.; |
415 | } |
416 | |
417 | *del3p = del3; |
418 | return nincol; |
419 | } /* c_ekkputl */ |
420 | static void c_ekkbtj4p_no_dense(const int nrow,const double * COIN_RESTRICT dluval, |
421 | const int * COIN_RESTRICT hrowi, |
422 | const int * COIN_RESTRICT mcstrt, |
423 | double * COIN_RESTRICT dwork1, int ndo,int jpiv) |
424 | { |
425 | int i; |
426 | double dv1; |
427 | int iel; |
428 | int irow; |
429 | int i1,i2; |
430 | |
431 | /* count down to first nonzero */ |
432 | for (i=nrow;i >=1;i--) { |
433 | if (dwork1[i]) { |
434 | break; |
435 | } |
436 | } |
437 | i--; /* as pivot is just 1.0 */ |
438 | if (i>ndo+jpiv) { |
439 | i=ndo+jpiv; |
440 | } |
441 | mcstrt -= jpiv; |
442 | i2=mcstrt[i+1]; |
443 | for (; i > jpiv; --i) { |
444 | double dv1b=0.0; |
445 | int nel; |
446 | i1 = mcstrt[i]; |
447 | nel= i1-i2; |
448 | dv1 = dwork1[i]; |
449 | iel=i2; |
450 | if ((nel&1)!=0) { |
451 | irow = hrowi[iel]; |
452 | dv1b = SHIFT_REF(dwork1, irow) * dluval[iel]; |
453 | iel++; |
454 | } |
455 | for ( ; iel < i1; iel+=2) { |
456 | int irow = hrowi[iel]; |
457 | int irowb = hrowi[iel+1]; |
458 | dv1 += SHIFT_REF(dwork1, irow) * dluval[iel]; |
459 | dv1b += SHIFT_REF(dwork1, irowb) * dluval[iel+1]; |
460 | } |
461 | i2=i1; |
462 | dwork1[i] = dv1+dv1b; |
463 | } |
464 | } /* c_ekkbtj4 */ |
465 | |
466 | static int c_ekkbtj4p_dense(const int nrow,const double * COIN_RESTRICT dluval, |
467 | const int * COIN_RESTRICT hrowi, |
468 | const int * COIN_RESTRICT mcstrt, double * COIN_RESTRICT dwork1, |
469 | int ndenuc, |
470 | int ndo,int jpiv) |
471 | { |
472 | int i; |
473 | int i2; |
474 | |
475 | int last=ndo-ndenuc+1; |
476 | double * COIN_RESTRICT densew = &dwork1[nrow-1]; |
477 | int nincol=0; |
478 | const double * COIN_RESTRICT dlu1; |
479 | dluval--; |
480 | hrowi--; |
481 | /* count down to first nonzero */ |
482 | for (i=nrow;i >=1;i--) { |
483 | if (dwork1[i]) { |
484 | break; |
485 | } |
486 | } |
487 | if (i<ndo+jpiv) { |
488 | int diff = ndo+jpiv-i; |
489 | ndo -= diff; |
490 | densew-=diff; |
491 | nincol=diff; |
492 | } |
493 | i2=mcstrt[ndo+1]; |
494 | dlu1=&dluval[i2+1]; |
495 | for (i = ndo; i >last; i-=2) { |
496 | int k; |
497 | double dv1,dv2; |
498 | const double * COIN_RESTRICT dlu2; |
499 | dv1=densew[1]; |
500 | dlu2=dlu1+nincol; |
501 | dv2=densew[0]; |
502 | for (k=0;k<nincol;k++) { |
503 | #ifdef DEBUG |
504 | int kk=dlu1-dluval; |
505 | int jj = (densew+(nincol-k+1))-dwork1; |
506 | int ll=hrowi[k+kk]; |
507 | if (ll!=jj) abort(); |
508 | #endif |
509 | dv1 += densew[nincol-k+1]*dlu1[k]; |
510 | dv2 += densew[nincol-k+1]*dlu2[k]; |
511 | } |
512 | densew[1]=dv1; |
513 | dlu1=dlu2+nincol; |
514 | dv2 += dv1*dlu1[0]; |
515 | dlu1++; |
516 | nincol+=2; |
517 | densew[0]=dv2; |
518 | densew-=2; |
519 | } |
520 | return i; |
521 | } /* c_ekkbtj4 */ |
522 | |
523 | static void c_ekkbtj4p_after_dense(const double * COIN_RESTRICT dluval, |
524 | const int * COIN_RESTRICT hrowi, |
525 | const int * COIN_RESTRICT mcstrt, |
526 | double * COIN_RESTRICT dwork1, int i,int jpiv) |
527 | { |
528 | int iel; |
529 | mcstrt -= jpiv; |
530 | i += jpiv; |
531 | iel=mcstrt[i+1]; |
532 | for (; i > jpiv+1; i-=2) { |
533 | int i1 = mcstrt[i]; |
534 | double dv1 = dwork1[i]; |
535 | double dv2; |
536 | for (; iel < i1; iel++) { |
537 | int irow = hrowi[iel]; |
538 | dv1 += SHIFT_REF(dwork1, irow) * dluval[iel]; |
539 | } |
540 | i1 = mcstrt[i-1]; |
541 | dv2 = dwork1[i-1]; |
542 | dwork1[i] = dv1; |
543 | for (; iel < i1; iel++) { |
544 | int irow = hrowi[iel]; |
545 | dv2 += SHIFT_REF(dwork1, irow) * dluval[iel]; |
546 | } |
547 | dwork1[i-1] = dv2; |
548 | } |
549 | if (i>jpiv) { |
550 | int i1 = mcstrt[i]; |
551 | double dv1 = dwork1[i]; |
552 | for (; iel < i1; iel++) { |
553 | int irow = hrowi[iel]; |
554 | dv1 += SHIFT_REF(dwork1, irow) * dluval[iel]; |
555 | } |
556 | dwork1[i] = dv1; |
557 | } |
558 | } |
559 | |
560 | static void c_ekkbtj4p(const EKKfactinfo * COIN_RESTRICT2 fact, |
561 | double * COIN_RESTRICT dwork1) |
562 | { |
563 | int lstart=fact->lstart; |
564 | const int * COIN_RESTRICT hpivco = fact->kcpadr; |
565 | const double * COIN_RESTRICT dluval = fact->xeeadr+1; |
566 | const int * COIN_RESTRICT hrowi = fact->xeradr+1; |
567 | const int * COIN_RESTRICT mcstrt = fact->xcsadr+lstart-1; |
568 | int jpiv=hpivco[lstart]-1; |
569 | int ndo=fact->xnetalval; |
570 | /* see if dense enough to unroll */ |
571 | if (fact->ndenuc<5) { |
572 | c_ekkbtj4p_no_dense(fact->nrow,dluval,hrowi,mcstrt,dwork1,ndo,jpiv); |
573 | } else { |
574 | int i = c_ekkbtj4p_dense(fact->nrow,dluval,hrowi,mcstrt,dwork1, |
575 | fact->ndenuc, ndo,jpiv); |
576 | c_ekkbtj4p_after_dense(dluval,hrowi,mcstrt,dwork1,i,jpiv); |
577 | } |
578 | } /* c_ekkbtj4p */ |
579 | |
580 | static int c_ekkbtj4_sparse(const EKKfactinfo * COIN_RESTRICT2 fact, |
581 | double * COIN_RESTRICT dwork1, |
582 | int * COIN_RESTRICT mpt, /* C style */ |
583 | double * COIN_RESTRICT dworko, |
584 | int nincol, int * COIN_RESTRICT spare) |
585 | { |
586 | const int nrow = fact->nrow; |
587 | const int * COIN_RESTRICT hcoli = fact->xecadr; |
588 | const int * COIN_RESTRICT mrstrt = fact->xrsadr+nrow; |
589 | char * COIN_RESTRICT nonzero = fact->nonzero; |
590 | const int * COIN_RESTRICT hpivro = fact->krpadr; |
591 | const double * COIN_RESTRICT de2val = fact->xe2adr-1; |
592 | double tolerance = fact->zeroTolerance; |
593 | double dv; |
594 | int iel; |
595 | |
596 | int k,nStack,kx; |
597 | int nList=0; |
598 | int * COIN_RESTRICT list = spare; |
599 | int * COIN_RESTRICT stack = spare+nrow; |
600 | int * COIN_RESTRICT next = stack+nrow; |
601 | int iPivot,kPivot; |
602 | int iput,nput=0,kput=nrow; |
603 | int j; |
604 | int firstDoRow=fact->firstDoRow; |
605 | |
606 | for (k=0;k<nincol;k++) { |
607 | nStack=1; |
608 | iPivot=mpt[k]; |
609 | if (nonzero[iPivot]!=1&&iPivot>=firstDoRow) { |
610 | stack[0]=iPivot; |
611 | next[0]=mrstrt[iPivot]; |
612 | while (nStack) { |
613 | /* take off stack */ |
614 | kPivot=stack[--nStack]; |
615 | if (nonzero[kPivot]!=1&&kPivot>=firstDoRow) { |
616 | j=next[nStack]; |
617 | if (j==mrstrt[kPivot+1]) { |
618 | /* finished so mark */ |
619 | list[nList++]=kPivot; |
620 | nonzero[kPivot]=1; |
621 | } else { |
622 | kPivot=hcoli[j]; |
623 | /* put back on stack */ |
624 | next[nStack++] ++; |
625 | if (!nonzero[kPivot]) { |
626 | /* and new one */ |
627 | stack[nStack]=kPivot; |
628 | nonzero[kPivot]=2; |
629 | next[nStack++]=mrstrt[kPivot]; |
630 | } |
631 | } |
632 | } else if (kPivot<firstDoRow) { |
633 | list[--kput]=kPivot; |
634 | nonzero[kPivot]=1; |
635 | } |
636 | } |
637 | } else if (nonzero[iPivot]!=1) { |
638 | /* nothing to do (except check size at end) */ |
639 | list[--kput]=iPivot; |
640 | nonzero[iPivot]=1; |
641 | } |
642 | } |
643 | if (fact->packedMode) { |
644 | dworko++; |
645 | for (k=nList-1;k>=0;k--) { |
646 | double dv; |
647 | iPivot = list[k]; |
648 | dv = dwork1[iPivot]; |
649 | dwork1[iPivot]=0.0; |
650 | nonzero[iPivot]=0; |
651 | if (fabs(dv) > tolerance) { |
652 | iput=hpivro[iPivot]; |
653 | kx=mrstrt[iPivot]; |
654 | dworko[nput]=dv; |
655 | for (iel = kx; iel < mrstrt[iPivot+1]; iel++) { |
656 | double dval; |
657 | int irow = hcoli[iel]; |
658 | dval=de2val[iel]; |
659 | dwork1[irow] += dv*dval; |
660 | } |
661 | mpt[nput++]=iput-1; |
662 | } else { |
663 | dwork1[iPivot]=0.0; /* force to zero, not just near zero */ |
664 | } |
665 | } |
666 | /* check remainder */ |
667 | for (k=kput;k<nrow;k++) { |
668 | iPivot = list[k]; |
669 | nonzero[iPivot]=0; |
670 | dv = dwork1[iPivot]; |
671 | dwork1[iPivot]=0.0; |
672 | iput=hpivro[iPivot]; |
673 | if (fabs(dv) > tolerance) { |
674 | dworko[nput]=dv; |
675 | mpt[nput++]=iput-1; |
676 | } |
677 | } |
678 | } else { |
679 | /* not packed */ |
680 | for (k=nList-1;k>=0;k--) { |
681 | double dv; |
682 | iPivot = list[k]; |
683 | dv = dwork1[iPivot]; |
684 | dwork1[iPivot]=0.0; |
685 | nonzero[iPivot]=0; |
686 | if (fabs(dv) > tolerance) { |
687 | iput=hpivro[iPivot]; |
688 | kx=mrstrt[iPivot]; |
689 | dworko[iput]=dv; |
690 | for (iel = kx; iel < mrstrt[iPivot+1]; iel++) { |
691 | double dval; |
692 | int irow = hcoli[iel]; |
693 | dval=de2val[iel]; |
694 | dwork1[irow] += dv*dval; |
695 | } |
696 | mpt[nput++]=iput-1; |
697 | } else { |
698 | dwork1[iPivot]=0.0; /* force to zero, not just near zero */ |
699 | } |
700 | } |
701 | /* check remainder */ |
702 | for (k=kput;k<nrow;k++) { |
703 | iPivot = list[k]; |
704 | nonzero[iPivot]=0; |
705 | dv = dwork1[iPivot]; |
706 | dwork1[iPivot]=0.0; |
707 | iput=hpivro[iPivot]; |
708 | if (fabs(dv) > tolerance) { |
709 | dworko[iput]=dv; |
710 | mpt[nput++]=iput-1; |
711 | } |
712 | } |
713 | } |
714 | |
715 | return (nput); |
716 | } /* c_ekkbtj4 */ |
717 | |
718 | static void c_ekkbtjl(const EKKfactinfo * COIN_RESTRICT2 fact, |
719 | double * COIN_RESTRICT dwork1) |
720 | { |
721 | int i, j, k, k1; |
722 | int l1; |
723 | const double * COIN_RESTRICT dluval = fact->R_etas_element; |
724 | const int * COIN_RESTRICT hrowi = fact->R_etas_index; |
725 | const int * COIN_RESTRICT mcstrt = fact->R_etas_start; |
726 | const int * COIN_RESTRICT hpivco = fact->hpivcoR; |
727 | int ndo=fact->nR_etas; |
728 | #ifndef UNROLL1 |
729 | #define UNROLL1 4 |
730 | #endif |
731 | #if UNROLL1>2 |
732 | int l2; |
733 | #endif |
734 | int kn; |
735 | double dv; |
736 | int iel; |
737 | int ipiv; |
738 | int knext; |
739 | |
740 | knext = mcstrt[ndo + 1]; |
741 | #if UNROLL1>2 |
742 | for (i = ndo; i > 0; --i) { |
743 | k1 = knext; |
744 | knext = mcstrt[i]; |
745 | ipiv = hpivco[i]; |
746 | dv = dwork1[ipiv]; |
747 | /* fast floating */ |
748 | k = knext - k1; |
749 | kn = k >> 2; |
750 | iel = k1 + 1; |
751 | if (dv != 0.) { |
752 | l1 = (k & 1) != 0; |
753 | l2 = (k & 2) != 0; |
754 | for (j = 1; j <= kn; j++) { |
755 | int irow0 = hrowi[iel + 0]; |
756 | int irow1 = hrowi[iel + 1]; |
757 | int irow2 = hrowi[iel + 2]; |
758 | int irow3 = hrowi[iel + 3]; |
759 | double dval0 = dv * dluval[iel + 0] + SHIFT_REF(dwork1, irow0); |
760 | double dval1 = dv * dluval[iel + 1] + SHIFT_REF(dwork1, irow1); |
761 | double dval2 = dv * dluval[iel + 2] + SHIFT_REF(dwork1, irow2); |
762 | double dval3 = dv * dluval[iel + 3] + SHIFT_REF(dwork1, irow3); |
763 | SHIFT_REF(dwork1, irow0) = dval0; |
764 | SHIFT_REF(dwork1, irow1) = dval1; |
765 | SHIFT_REF(dwork1, irow2) = dval2; |
766 | SHIFT_REF(dwork1, irow3) = dval3; |
767 | iel+=4; |
768 | } |
769 | if (l1) { |
770 | int irow0 = hrowi[iel]; |
771 | SHIFT_REF(dwork1, irow0) += dv* dluval[iel]; |
772 | ++iel; |
773 | } |
774 | if (l2) { |
775 | int irow0 = hrowi[iel + 0]; |
776 | int irow1 = hrowi[iel + 1]; |
777 | SHIFT_REF(dwork1, irow0) += dv* dluval[iel]; |
778 | SHIFT_REF(dwork1, irow1) += dv* dluval[iel+1]; |
779 | } |
780 | } |
781 | } |
782 | #else |
783 | for (i = ndo; i > 0; --i) { |
784 | k1 = knext; |
785 | knext = mcstrt[i]; |
786 | ipiv = hpivco[i]; |
787 | dv = dwork1[ipiv]; |
788 | k = knext - k1; |
789 | kn = k >> 1; |
790 | iel = k1 + 1; |
791 | if (dv != 0.) { |
792 | l1 = (k & 1) != 0; |
793 | for (j = 1; j <= kn; j++) { |
794 | int irow0 = hrowi[iel + 0]; |
795 | int irow1 = hrowi[iel + 1]; |
796 | double dval0 = dv * dluval[iel + 0] + SHIFT_REF(dwork1, irow0); |
797 | double dval1 = dv * dluval[iel + 1] + SHIFT_REF(dwork1, irow1); |
798 | SHIFT_REF(dwork1, irow0) = dval0; |
799 | SHIFT_REF(dwork1, irow1) = dval1; |
800 | iel+=2; |
801 | } |
802 | if (l1) { |
803 | int irow0 = hrowi[iel]; |
804 | SHIFT_REF(dwork1, irow0) += dv* dluval[iel]; |
805 | ++iel; |
806 | } |
807 | } |
808 | } |
809 | #endif |
810 | } /* c_ekkbtjl */ |
811 | |
812 | static int c_ekkbtjl_sparse(const EKKfactinfo * COIN_RESTRICT2 fact, |
813 | double * COIN_RESTRICT dwork1, |
814 | int * COIN_RESTRICT mpt , int nincol) |
815 | { |
816 | const double * COIN_RESTRICT dluval = fact->R_etas_element; |
817 | const int * COIN_RESTRICT hrowi = fact->R_etas_index; |
818 | const int * COIN_RESTRICT mcstrt = fact->R_etas_start; |
819 | const int * COIN_RESTRICT hpivco = fact->hpivcoR; |
820 | char * COIN_RESTRICT nonzero = fact->nonzero; |
821 | int ndo=fact->nR_etas; |
822 | int i, j, k1; |
823 | double dv; |
824 | int ipiv; |
825 | int irow0, irow1; |
826 | int knext; |
827 | int number=nincol; |
828 | |
829 | /* ------------------------------------------- */ |
830 | /* adjust back */ |
831 | hrowi++; |
832 | dluval++; |
833 | |
834 | /* DO ANY ROW TRANSFORMATIONS */ |
835 | |
836 | /* Function Body */ |
837 | knext = mcstrt[ndo + 1]; |
838 | for (i = ndo; i > 0; --i) { |
839 | k1 = knext; |
840 | knext = mcstrt[i]; |
841 | ipiv = hpivco[i]; |
842 | dv = dwork1[ipiv]; |
843 | if (dv) { |
844 | for (j = k1; j <knext-1; j+=2) { |
845 | irow0 = hrowi[j]; |
846 | irow1 = hrowi[j+1]; |
847 | SHIFT_REF(dwork1, irow0) += dv * dluval[j]; |
848 | SHIFT_REF(dwork1, irow1) += dv * dluval[j+1]; |
849 | if (!nonzero[irow0]) { |
850 | nonzero[irow0]=1; |
851 | mpt[++number]=UNSHIFT_INDEX(irow0); |
852 | } |
853 | if (!nonzero[irow1]) { |
854 | nonzero[irow1]=1; |
855 | mpt[++number]=UNSHIFT_INDEX(irow1); |
856 | } |
857 | } |
858 | if (j<knext) { |
859 | irow0 = hrowi[j]; |
860 | SHIFT_REF(dwork1, irow0) += dv * dluval[j]; |
861 | if (!nonzero[irow0]) { |
862 | nonzero[irow0]=1; |
863 | mpt[++number]=UNSHIFT_INDEX(irow0); |
864 | } |
865 | } |
866 | } |
867 | } |
868 | return (number); |
869 | } /* c_ekkbtjl */ |
870 | |
871 | |
872 | |
873 | static void c_ekkbtju_dense(const int nrow, |
874 | const double * COIN_RESTRICT dluval, |
875 | const int * COIN_RESTRICT hrowi, |
876 | const int * COIN_RESTRICT mcstrt, |
877 | int * COIN_RESTRICT hpivco, |
878 | double * COIN_RESTRICT dwork1, |
879 | int * COIN_RESTRICT start,int last,int offset, |
880 | double * COIN_RESTRICT densew) |
881 | { |
882 | /* Local variables */ |
883 | int ipiv1,ipiv2; |
884 | int save=hpivco[last]; |
885 | |
886 | hpivco[last]=nrow+1; |
887 | |
888 | ipiv1=*start; |
889 | ipiv2=hpivco[ipiv1]; |
890 | while(ipiv2<last) { |
891 | int iel,k; |
892 | const int kx1 = mcstrt[ipiv1]; |
893 | const int kx2 = mcstrt[ipiv2]; |
894 | const int nel1 = hrowi[kx1-1]; |
895 | const int nel2 = hrowi[kx2-1]; |
896 | const double dpiv1 = dluval[kx1-1]; |
897 | const double dpiv2 = dluval[kx2-1]; |
898 | const int n1 = offset+ipiv1; /* number in dense part */ |
899 | const int nsparse1=nel1-n1; |
900 | const int nsparse2=nel2-n1-(ipiv2-ipiv1); |
901 | const int k1 = kx1+nsparse1; |
902 | const int k2 = kx2+nsparse2; |
903 | const double *dlu1 = &dluval[k1]; |
904 | const double *dlu2 = &dluval[k2]; |
905 | |
906 | double dv1 = dwork1[ipiv1]; |
907 | double dv2 = dwork1[ipiv2]; |
908 | |
909 | for (iel = kx1; iel < k1; ++iel) { |
910 | dv1 -= SHIFT_REF(dwork1, hrowi[iel]) * dluval[iel]; |
911 | } |
912 | for (iel = kx2; iel < k2; ++iel) { |
913 | dv2 -= SHIFT_REF(dwork1, hrowi[iel]) * dluval[iel]; |
914 | } |
915 | for (k=0;k<n1;k++) { |
916 | dv1 -= dlu1[k] * densew[k]; |
917 | dv2 -= dlu2[k] * densew[k]; |
918 | } |
919 | dv1 *= dpiv1; |
920 | dv2 -= dlu2[n1] * dv1; |
921 | dwork1[ipiv1] = dv1; |
922 | dwork1[ipiv2] = dv2*dpiv2; |
923 | ipiv1 = hpivco[ipiv2]; |
924 | ipiv2 = hpivco[ipiv1]; |
925 | } |
926 | hpivco[last]=save; |
927 | |
928 | *start=ipiv1; |
929 | return; |
930 | } |
931 | /* about 8-10% of execution time is spent in this routine */ |
932 | static int c_ekkbtju_aux(const double * COIN_RESTRICT dluval, |
933 | const int * COIN_RESTRICT hrowi, |
934 | const int * COIN_RESTRICT mcstrt, |
935 | const int * COIN_RESTRICT hpivco, |
936 | double * COIN_RESTRICT dwork1, |
937 | int ipiv, int loop_end) |
938 | { |
939 | #define UNROLL2 2 |
940 | #ifndef UNROLL2 |
941 | #if CLP_OSL==2||CLP_OSL==3 |
942 | #define UNROLL2 2 |
943 | #else |
944 | #define UNROLL2 1 |
945 | #endif |
946 | #endif |
947 | while (ipiv<=loop_end) { |
948 | int kx = mcstrt[ipiv]; |
949 | const int nel = hrowi[kx-1]; |
950 | #if UNROLL2<2 |
951 | const int kxe = kx + nel; |
952 | #endif |
953 | |
954 | double dv = dwork1[ipiv]; /* rhs */ |
955 | #if UNROLL2>1 |
956 | const int * hrowi2=hrowi+kx; |
957 | const int * hrowi2end=hrowi2+nel; |
958 | const double * dluval2=dluval+kx; |
959 | #else |
960 | int iel; |
961 | #endif |
962 | const double dpiv = dluval[kx-1]; /* inverse of pivot */ |
963 | |
964 | |
965 | /* subtract terms whose unknowns have been solved for */ |
966 | |
967 | /* a significant proportion of these loops may not modify dv at all. |
968 | * However, it seems to be just as expensive to check if the loop |
969 | * would modify dv as it is to just do it. |
970 | * The only difference would be that dluval wouldn't be referenced |
971 | * for those loops, would might save some cache paging, |
972 | * but unfortunately the code generated to search for zeros (on AIX) |
973 | * is *worse* than code that just multiplies by dval. |
974 | */ |
975 | #if UNROLL2<2 |
976 | for (iel = kx; iel < kxe; ++iel) { |
977 | const int irow = hrowi[iel]; |
978 | const double dval=dluval[iel]; |
979 | dv -= SHIFT_REF(dwork1, irow) * dval; |
980 | } |
981 | |
982 | dwork1[ipiv] = dv * dpiv; /* divide by the pivot */ |
983 | #else |
984 | if ((nel&1)!=0) { |
985 | int irow = *hrowi2; |
986 | double dval=*dluval2; |
987 | dv -= SHIFT_REF(dwork1, irow) * dval; |
988 | hrowi2++; |
989 | dluval2++; |
990 | } |
991 | for (; hrowi2 < hrowi2end; hrowi2 +=2,dluval2 +=2) { |
992 | int irow0 = hrowi2[0]; |
993 | int irow1 = hrowi2[1]; |
994 | double dval0=dluval2[0]; |
995 | double dval1=dluval2[1]; |
996 | double d0=SHIFT_REF(dwork1, irow0); |
997 | double d1=SHIFT_REF(dwork1, irow1); |
998 | dv -= d0 * dval0; |
999 | dv -= d1 * dval1; |
1000 | } |
1001 | dwork1[ipiv] = dv * dpiv; /* divide by the pivot */ |
1002 | #endif |
1003 | |
1004 | ipiv=hpivco[ipiv]; |
1005 | } |
1006 | |
1007 | return (ipiv); |
1008 | } |
1009 | |
1010 | /* |
1011 | * We are given the upper diagonal matrix U from the LU factorization |
1012 | * and a rhs dwork1. |
1013 | * This solves the system U x = dwork1 |
1014 | * by back substitution, overwriting dwork1 with the solution x. |
1015 | * |
1016 | * It does this in textbook style by solving the equations "bottom" up, |
1017 | * so for each equation one new unknown is solved for by subtracting |
1018 | * from the rhs the sum of the terms whose unknowns have been solved for, |
1019 | * then dividing by the coefficient of the new unknown. |
1020 | * |
1021 | * Since we update the U matrix using F-T, the order of the columns |
1022 | * changes slightly each iteration. Initially, hpivco[i] == i+1, |
1023 | * and each iteration (generally) introduces one element where this |
1024 | * is no longer true. However, because we periodically refactorize, |
1025 | * it is much more common for hpivco[i] == i+1 than not. |
1026 | * |
1027 | * The one quirk is that value referred to as the pivot is actually |
1028 | * the reciprocal of the pivot, to avoid a division. |
1029 | * |
1030 | * Solving in this fashion is inappropriate if there are frequently |
1031 | * cases where all unknowns in an equation have value zero. |
1032 | * This seems to happen frequently if the sparsity of the rhs is, say, 10%. |
1033 | */ |
1034 | static void c_ekkbtju(const EKKfactinfo * COIN_RESTRICT2 fact, |
1035 | double * COIN_RESTRICT dwork1, |
1036 | int ipiv) |
1037 | { |
1038 | const int nrow = fact->nrow; |
1039 | double * COIN_RESTRICT dluval = fact->xeeadr; |
1040 | int * COIN_RESTRICT hrowi = fact->xeradr; |
1041 | int * COIN_RESTRICT mcstrt = fact->xcsadr; |
1042 | int * COIN_RESTRICT hpivco_new = fact->kcpadr+1; |
1043 | int ndenuc=fact->ndenuc; |
1044 | int first_dense = fact->first_dense; |
1045 | int last_dense = fact->last_dense; |
1046 | |
1047 | const int has_dense = (first_dense<last_dense && |
1048 | mcstrt[ipiv]<=mcstrt[last_dense]); |
1049 | |
1050 | /* Parameter adjustments */ |
1051 | /* dluval and hrowi were NOT decremented here. |
1052 | I believe that they are used as C-style arrays below. |
1053 | At this point, I am going to convert them from Fortran- to C-style |
1054 | here by incrementing them; at some later time, I will convert their |
1055 | uses in this file to Fortran-style. |
1056 | */ |
1057 | dluval++; |
1058 | hrowi++; |
1059 | |
1060 | if (has_dense) |
1061 | ipiv = c_ekkbtju_aux(dluval, hrowi, mcstrt, hpivco_new, dwork1, ipiv, |
1062 | first_dense - 1); |
1063 | |
1064 | if (has_dense) { |
1065 | int n=0; |
1066 | int firstDense = nrow-ndenuc+1; |
1067 | double *densew = &dwork1[firstDense]; |
1068 | |
1069 | /* check first dense to see where in triangle it is */ |
1070 | int last=first_dense; |
1071 | int j=mcstrt[last]-1; |
1072 | int k1=j; |
1073 | int k2=j+hrowi[j]; |
1074 | |
1075 | for (j=k2;j>k1;j--) { |
1076 | int irow=UNSHIFT_INDEX(hrowi[j]); |
1077 | if (irow<firstDense) { |
1078 | break; |
1079 | } else { |
1080 | #ifdef DEBUG |
1081 | if (irow!=last-1) { |
1082 | abort(); |
1083 | } |
1084 | #endif |
1085 | last=irow; |
1086 | n++; |
1087 | } |
1088 | } |
1089 | c_ekkbtju_dense(nrow,dluval,hrowi,mcstrt,const_cast<int *> (hpivco_new), |
1090 | dwork1,&ipiv,last_dense, n - first_dense, densew); |
1091 | } |
1092 | |
1093 | (void) c_ekkbtju_aux(dluval, hrowi, mcstrt, hpivco_new, dwork1, ipiv, nrow); |
1094 | } /* c_ekkbtju */ |
1095 | |
1096 | |
1097 | /* |
1098 | * mpt / *nincolp contain the indices of nonzeros in dwork1. |
1099 | * nonzero contains the same information as a byte-mask. |
1100 | * |
1101 | * currently, erase_nonzero is true iff this is called from c_ekketsj. |
1102 | */ |
1103 | static int c_ekkbtju_sparse(const EKKfactinfo * COIN_RESTRICT2 fact, |
1104 | double * COIN_RESTRICT dwork1, |
1105 | int * COIN_RESTRICT mpt, int nincol, |
1106 | int * COIN_RESTRICT spare) |
1107 | { |
1108 | const double * COIN_RESTRICT dluval = fact->xeeadr+1; |
1109 | const int * COIN_RESTRICT mcstrt = fact->xcsadr; |
1110 | char * COIN_RESTRICT nonzero = fact->nonzero; |
1111 | const int * COIN_RESTRICT hcoli = fact->xecadr; |
1112 | const int * COIN_RESTRICT mrstrt = fact->xrsadr; |
1113 | const int * COIN_RESTRICT hinrow = fact->xrnadr; |
1114 | const double * COIN_RESTRICT de2val = fact->xe2adr-1; |
1115 | int i; |
1116 | int iPivot; |
1117 | int nList=0; |
1118 | int nStack,k,kx; |
1119 | const int nrow=fact->nrow; |
1120 | const double tolerance = fact->zeroTolerance; |
1121 | int * COIN_RESTRICT list = spare; |
1122 | int * COIN_RESTRICT stack = spare+nrow; |
1123 | int * COIN_RESTRICT next = stack+nrow; |
1124 | /* |
1125 | * Examine all nonzero elements and determine which elements may be |
1126 | * nonzero in the result. |
1127 | * Any row in U that contains terms that may have nonzero variable values |
1128 | * may produce a nonzero value. |
1129 | */ |
1130 | for (k=0;k<nincol;k++) { |
1131 | nStack=1; |
1132 | iPivot=mpt[k]; |
1133 | stack[0]=iPivot; |
1134 | next[0]=0; |
1135 | while (nStack) { |
1136 | int kPivot,ninrow,j; |
1137 | /* take off stack */ |
1138 | kPivot=stack[--nStack]; |
1139 | /*printf("nStack %d kPivot %d, ninrow %d, j %d, nList %d\n", |
1140 | nStack,kPivot,hinrow[kPivot], |
1141 | next[nStack],nList);*/ |
1142 | if (nonzero[kPivot]!=1) { |
1143 | ninrow = hinrow[kPivot]; |
1144 | j=next[nStack]; |
1145 | if (j!=ninrow) { |
1146 | kx = mrstrt[kPivot]; |
1147 | kPivot=hcoli[kx+j]; |
1148 | /* put back on stack */ |
1149 | next[nStack++] ++; |
1150 | if (!nonzero[kPivot]) { |
1151 | /* and new one */ |
1152 | stack[nStack]=kPivot; |
1153 | nonzero[kPivot]=2; |
1154 | next[nStack++]=0; |
1155 | } |
1156 | } else { |
1157 | /* finished so mark */ |
1158 | list[nList++]=kPivot; |
1159 | nonzero[kPivot]=1; |
1160 | } |
1161 | } |
1162 | } |
1163 | } |
1164 | |
1165 | i=nList-1; |
1166 | nList=0; |
1167 | for (;i>=0;i--) { |
1168 | double dpiv; |
1169 | double dv; |
1170 | iPivot = list[i]; |
1171 | kx = mcstrt[iPivot]; |
1172 | dpiv = dluval[kx-1]; |
1173 | dv = dpiv * dwork1[iPivot]; |
1174 | nonzero[iPivot] = 0; |
1175 | if (fabs(dv)>=tolerance) { |
1176 | int iel; |
1177 | int krx = mrstrt[iPivot]; |
1178 | int krxe = krx+hinrow[iPivot]; |
1179 | dwork1[iPivot]=dv; |
1180 | mpt[nList++]=iPivot; |
1181 | for (iel = krx; iel < krxe; iel++) { |
1182 | int irow0 = hcoli[iel]; |
1183 | double dval=de2val[iel]; |
1184 | dwork1[irow0] -= dv*dval; |
1185 | } |
1186 | } else { |
1187 | dwork1[iPivot]=0.0; |
1188 | } |
1189 | } |
1190 | |
1191 | return (nList); |
1192 | } /* c_ekkbtjuRow */ |
1193 | |
1194 | /* |
1195 | * dpermu is supposed to be zeroed on entry to this routine. |
1196 | * It is used as a working buffer. |
1197 | * The input vector dwork1 is permuted into dpermu, operated on, |
1198 | * and the answer is permuted back into dwork1, zeroing dpermu in |
1199 | * the process. |
1200 | */ |
1201 | /* |
1202 | * nincol > 0 ==> mpt contains indices of non-zeros in dpermu |
1203 | * |
1204 | * first_nonzero contains index of first (last??)nonzero; |
1205 | * only used if nincol==0. |
1206 | * |
1207 | * dpermu contains permuted input; dwork1 is now zero |
1208 | */ |
1209 | int c_ekkbtrn(const EKKfactinfo * COIN_RESTRICT2 fact, |
1210 | double * COIN_RESTRICT dwork1, |
1211 | int * COIN_RESTRICT mpt, int first_nonzero) |
1212 | { |
1213 | double * COIN_RESTRICT dpermu = fact->kadrpm; |
1214 | const int * COIN_RESTRICT mpermu=fact->mpermu; |
1215 | const int * COIN_RESTRICT hpivco_new= fact->kcpadr+1; |
1216 | |
1217 | const int nrow = fact->nrow; |
1218 | int i; |
1219 | int nincol; |
1220 | /* find the first non-zero input */ |
1221 | int ipiv; |
1222 | if (first_nonzero) { |
1223 | ipiv = first_nonzero; |
1224 | #if 1 |
1225 | if (c_ekk_IsSet(fact->bitArray,ipiv)) { |
1226 | /* slack */ |
1227 | int lastSlack = fact->lastSlack; |
1228 | int firstDo=hpivco_new[lastSlack]; |
1229 | assert (dpermu[ipiv]); |
1230 | while (ipiv!=firstDo) { |
1231 | assert (c_ekk_IsSet(fact->bitArray,ipiv)); |
1232 | if (dpermu[ipiv]) |
1233 | dpermu[ipiv]=-dpermu[ipiv]; |
1234 | ipiv=hpivco_new[ipiv]; |
1235 | } |
1236 | } |
1237 | #endif |
1238 | } else { |
1239 | int lastSlack = fact->numberSlacks; |
1240 | ipiv=hpivco_new[0]; |
1241 | for (i=0;i<lastSlack;i++) { |
1242 | int next_piv = hpivco_new[ipiv]; |
1243 | assert (c_ekk_IsSet(fact->bitArray,ipiv)); |
1244 | if (dpermu[ipiv]) { |
1245 | break; |
1246 | } else { |
1247 | ipiv=next_piv; |
1248 | } |
1249 | } |
1250 | |
1251 | /* usually, there is a non-zero slack entry... */ |
1252 | if (i==lastSlack) { |
1253 | /* but if there isn't... */ |
1254 | for (;i<nrow;i++) { |
1255 | if (!dpermu[ipiv]) { |
1256 | ipiv=hpivco_new[ipiv]; |
1257 | } else { |
1258 | break; |
1259 | } |
1260 | } |
1261 | } else { |
1262 | /* reverse signs for slacks */ |
1263 | for (;i<lastSlack;i++) { |
1264 | assert (c_ekk_IsSet(fact->bitArray,ipiv)); |
1265 | if (dpermu[ipiv]) |
1266 | dpermu[ipiv]=-dpermu[ipiv]; |
1267 | ipiv=hpivco_new[ipiv]; |
1268 | } |
1269 | assert (!c_ekk_IsSet(fact->bitArray,ipiv)||ipiv>fact->nrow); |
1270 | |
1271 | /* this is presumably the first non-zero non slack */ |
1272 | /*ipiv=firstDo;*/ |
1273 | } |
1274 | } |
1275 | if (ipiv<=fact->nrow) { |
1276 | /* skipBtju is always (?) 0 first the first call, |
1277 | * ipiv tends to be >nrow for the second */ |
1278 | |
1279 | /* DO U */ |
1280 | c_ekkbtju(fact,dpermu, |
1281 | ipiv); |
1282 | } |
1283 | |
1284 | |
1285 | /* DO ROW ETAS IN L */ |
1286 | c_ekkbtjl(fact, dpermu); |
1287 | c_ekkbtj4p(fact,dpermu); |
1288 | |
1289 | /* dwork1[mpermu] = dpermu; dpermu = 0; mpt = indices of non-zeros */ |
1290 | nincol = |
1291 | c_ekkshfpo_scan2zero(fact,&mpermu[1],dpermu,&dwork1[1],&mpt[1]); |
1292 | |
1293 | /* dpermu should be zero now */ |
1294 | #ifdef DEBUG |
1295 | for (i=1;i<=nrow ;i++ ) { |
1296 | if (dpermu[i]) { |
1297 | abort(); |
1298 | } /* endif */ |
1299 | } /* endfor */ |
1300 | #endif |
1301 | return (nincol); |
1302 | } /* c_ekkbtrn */ |
1303 | |
1304 | static int c_ekkbtrn0_new(const EKKfactinfo * COIN_RESTRICT2 fact, |
1305 | double * COIN_RESTRICT dwork1, |
1306 | int * COIN_RESTRICT mpt, int nincol, |
1307 | int * COIN_RESTRICT spare) |
1308 | { |
1309 | double * COIN_RESTRICT dpermu = fact->kadrpm; |
1310 | const int * COIN_RESTRICT mpermu=fact->mpermu; |
1311 | const int * COIN_RESTRICT hpivro = fact->krpadr; |
1312 | |
1313 | const int nrow = fact->nrow; |
1314 | |
1315 | int i; |
1316 | char * nonzero=fact->nonzero; |
1317 | int doSparse=1; |
1318 | |
1319 | /* so: dpermu must contain room for: |
1320 | * nrow doubles, followed by |
1321 | * nrow ints (mpermu), followed by |
1322 | * nrow ints (the inverse permutation), followed by |
1323 | * an unused area (?) of nrow ints, followed by |
1324 | * nrow chars (this non-zero array). |
1325 | * |
1326 | * and apparently the first nrow elements of nonzero are expected |
1327 | * to already be zero. |
1328 | */ |
1329 | #ifdef DEBUG |
1330 | for (i=1;i<=nrow ;i++ ) { |
1331 | if (nonzero[i]) { |
1332 | abort(); |
1333 | } /* endif */ |
1334 | } /* endfor */ |
1335 | #endif |
1336 | /* now nonzero[i]==1 iff there is an entry for i in mpt */ |
1337 | |
1338 | nincol=c_ekkbtju_sparse(fact, dpermu, |
1339 | &mpt[1], nincol, |
1340 | spare); |
1341 | |
1342 | /* the vector may have more nonzero elements now */ |
1343 | /* DO ROW ETAS IN L */ |
1344 | #define DENSE_THRESHOLD (nincol*10+100) |
1345 | if (DENSE_THRESHOLD>nrow) { |
1346 | doSparse=0; |
1347 | c_ekkbtjl(fact, dpermu); |
1348 | } else { |
1349 | /* set nonzero */ |
1350 | for(i=0;i<nincol;i++) { |
1351 | int j=mpt[i+1]; |
1352 | nonzero[j]=1; |
1353 | } |
1354 | nincol = |
1355 | c_ekkbtjl_sparse(fact, |
1356 | dpermu, |
1357 | mpt, |
1358 | nincol); |
1359 | for(i=0;i<nincol;i++) { |
1360 | int j=mpt[i+1]; |
1361 | nonzero[j]=0; |
1362 | } |
1363 | if (DENSE_THRESHOLD>nrow) { |
1364 | doSparse=0; |
1365 | #ifdef DEBUG |
1366 | for (i=1;i<=nrow;i++) { |
1367 | if (nonzero[i]) { |
1368 | abort(); |
1369 | } |
1370 | } |
1371 | #endif |
1372 | } |
1373 | } |
1374 | if (!doSparse) { |
1375 | c_ekkbtj4p(fact,dpermu); |
1376 | /* dwork1[mpermu] = dpermu; dpermu = 0; mpt = indices of non-zeros */ |
1377 | nincol = |
1378 | c_ekkshfpo_scan2zero(fact,&mpermu[1],dpermu,&dwork1[1],&mpt[1]); |
1379 | |
1380 | /* dpermu should be zero now */ |
1381 | #ifdef DEBUG |
1382 | for (i=1;i<=nrow ;i++ ) { |
1383 | if (dpermu[i]) { |
1384 | abort(); |
1385 | } /* endif */ |
1386 | } /* endfor */ |
1387 | #endif |
1388 | } else { |
1389 | /* still sparse */ |
1390 | if (fact->nnentl) { |
1391 | nincol = |
1392 | c_ekkbtj4_sparse(fact, |
1393 | dpermu, |
1394 | &mpt[1], |
1395 | dwork1, |
1396 | nincol,spare); |
1397 | } else { |
1398 | double tolerance=fact->zeroTolerance; |
1399 | int irow; |
1400 | int nput=0; |
1401 | if (fact->packedMode) { |
1402 | for (i = 0; i <nincol; i++) { |
1403 | int irow0; |
1404 | double dval; |
1405 | irow=mpt[i+1]; |
1406 | dval=dpermu[irow]; |
1407 | if (NOT_ZERO(dval)) { |
1408 | if (fabs(dval) >= tolerance) { |
1409 | irow0= hpivro[irow]; |
1410 | dwork1[1+nput]=dval; |
1411 | mpt[1 + nput++]=irow0-1; |
1412 | } |
1413 | dpermu[irow]=0.0; |
1414 | } |
1415 | } |
1416 | } else { |
1417 | for (i = 0; i <nincol; i++) { |
1418 | int irow0; |
1419 | double dval; |
1420 | irow=mpt[i+1]; |
1421 | dval=dpermu[irow]; |
1422 | if (NOT_ZERO(dval)) { |
1423 | if (fabs(dval) >= tolerance) { |
1424 | irow0= hpivro[irow]; |
1425 | dwork1[irow0]=dval; |
1426 | mpt[1 + nput++]=irow0-1; |
1427 | } |
1428 | dpermu[irow]=0.0; |
1429 | } |
1430 | } |
1431 | } |
1432 | nincol=nput; |
1433 | } |
1434 | } |
1435 | |
1436 | |
1437 | return (nincol); |
1438 | } /* c_ekkbtrn */ |
1439 | |
1440 | |
1441 | /* returns c_ekkbtrn(fact, dwork1, mpt) |
1442 | * |
1443 | * but since mpt[1..nincol] contains the indices of non-zeros in dwork1, |
1444 | * we can do faster. |
1445 | */ |
1446 | static int c_ekkbtrn_mpt(const EKKfactinfo * COIN_RESTRICT2 fact, |
1447 | double * COIN_RESTRICT dwork1, |
1448 | int * COIN_RESTRICT mpt, int nincol,int * COIN_RESTRICT spare) |
1449 | { |
1450 | double * COIN_RESTRICT dpermu = fact->kadrpm; |
1451 | const int nrow = fact->nrow; |
1452 | |
1453 | const int * COIN_RESTRICT mpermu=fact->mpermu; |
1454 | /*const int *mrstrt = fact->xrsadr;*/ |
1455 | |
1456 | #ifdef DEBUG |
1457 | int i; |
1458 | memset(spare,'A',3*nrow*sizeof(int)); |
1459 | { |
1460 | |
1461 | for (i=1;i<=nrow;i++) { |
1462 | if (dpermu[i]) { |
1463 | abort(); |
1464 | } |
1465 | } |
1466 | } |
1467 | #endif |
1468 | |
1469 | |
1470 | int i; |
1471 | #ifdef DEBUG |
1472 | for (i=1;i<=nrow;i++) { |
1473 | if (fact->nonzero[i]||dpermu[i]) { |
1474 | abort(); |
1475 | } |
1476 | } |
1477 | #endif |
1478 | assert (fact->if_sparse_update>0&&mpt&&fact->rows_ok) ; |
1479 | |
1480 | /* read the input vector from mpt/dwork1; |
1481 | * permute it into dpermu; |
1482 | * construct a nonzero mask in nonzero; |
1483 | * overwrite mpt with the permuted indices; |
1484 | * clear the dwork1 vector. |
1485 | */ |
1486 | for (i=0;i<nincol;i++) { |
1487 | int irow=mpt[i+1]; |
1488 | int jrow=mpermu[irow]; |
1489 | dpermu[jrow]=dwork1[irow]; |
1490 | /*nonzero[jrow-1]=1; this is done in btrn0 */ |
1491 | mpt[i+1]=jrow; |
1492 | dwork1[irow]=0.0; |
1493 | } |
1494 | |
1495 | if (DENSE_THRESHOLD<nrow) { |
1496 | nincol = c_ekkbtrn0_new(fact, dwork1, mpt, nincol,spare); |
1497 | } else { |
1498 | nincol = c_ekkbtrn(fact, dwork1, mpt, 0); |
1499 | } |
1500 | #ifdef DEBUG |
1501 | { |
1502 | |
1503 | for (i=1;i<=nrow;i++) { |
1504 | if (dpermu[i]) { |
1505 | abort(); |
1506 | } |
1507 | } |
1508 | if (fact->if_sparse_update>0) { |
1509 | for (i=1;i<=nrow;i++) { |
1510 | if (fact->nonzero[i]) { |
1511 | abort(); |
1512 | } |
1513 | } |
1514 | } |
1515 | } |
1516 | #endif |
1517 | return nincol; |
1518 | } |
1519 | |
1520 | /* returns c_ekkbtrn(fact, dwork1, mpt) |
1521 | * |
1522 | * but since (dwork1[i]!=0) == (i==ipivrw), |
1523 | * we can do faster. |
1524 | */ |
1525 | int c_ekkbtrn_ipivrw(const EKKfactinfo * COIN_RESTRICT2 fact, |
1526 | double * COIN_RESTRICT dwork1, |
1527 | int * COIN_RESTRICT mpt, int ipivrw,int * COIN_RESTRICT spare) |
1528 | { |
1529 | double * COIN_RESTRICT dpermu = fact->kadrpm; |
1530 | const int nrow = fact->nrow; |
1531 | |
1532 | const int * COIN_RESTRICT mpermu=fact->mpermu; |
1533 | const double * COIN_RESTRICT dluval = fact->xeeadr; |
1534 | const int * COIN_RESTRICT mrstrt = fact->xrsadr; |
1535 | const int * COIN_RESTRICT hinrow = fact->xrnadr; |
1536 | const int * COIN_RESTRICT hcoli = fact->xecadr; |
1537 | const int * COIN_RESTRICT mcstrt = fact->xcsadr; |
1538 | |
1539 | int nincol; |
1540 | |
1541 | #ifdef DEBUG |
1542 | int i; |
1543 | for (i=1;i<=nrow ;i++ ) { |
1544 | if (dpermu[i]) { |
1545 | abort(); |
1546 | } /* endif */ |
1547 | } /* endfor */ |
1548 | #endif |
1549 | |
1550 | if (fact->if_sparse_update>0&&mpt&& fact->rows_ok) { |
1551 | mpt[1] = ipivrw; |
1552 | nincol = c_ekkbtrn_mpt(fact, dwork1, mpt, 1,spare); |
1553 | } else { |
1554 | int ipiv; |
1555 | int kpivrw = mpermu[ipivrw]; |
1556 | dpermu[kpivrw]=dwork1[ipivrw]; |
1557 | dwork1[ipivrw]=0.0; |
1558 | |
1559 | if (fact->rows_ok) { |
1560 | /* !fact->if_sparse_update |
1561 | * but we still have rowwise info, |
1562 | * so we may as well use it to do the slack row |
1563 | */ |
1564 | int iipivrw=nrow+1; |
1565 | int itest = fact->nnentu+1; |
1566 | int k=mrstrt[kpivrw]; |
1567 | int lastInRow= k+hinrow[kpivrw]; |
1568 | double dpiv,dv; |
1569 | for (;k<lastInRow;k++) { |
1570 | int icol=hcoli[k]; |
1571 | int start=mcstrt[icol]; |
1572 | if (start<itest) { |
1573 | iipivrw=icol; |
1574 | itest=start; |
1575 | } |
1576 | } |
1577 | /* do missed pivot */ |
1578 | itest=mcstrt[kpivrw]; |
1579 | dpiv=dluval[itest]; |
1580 | dv=dpermu[kpivrw]; |
1581 | dv*=dpiv; |
1582 | dpermu[kpivrw]=dv; |
1583 | ipiv=iipivrw; |
1584 | } else { |
1585 | /* no luck - c_ekkbtju will slog through slacks (?) */ |
1586 | ipiv=kpivrw; |
1587 | } |
1588 | /* nincol not read */ |
1589 | /* not sparse */ |
1590 | /* do slacks */ |
1591 | if (ipiv<=fact->nrow) { |
1592 | if (c_ekk_IsSet(fact->bitArray,ipiv)) { |
1593 | const int * hpivco_new= fact->kcpadr+1; |
1594 | int lastSlack = fact->lastSlack; |
1595 | int firstDo=hpivco_new[lastSlack]; |
1596 | /* slack */ |
1597 | /* need pivot row of first nonslack */ |
1598 | dpermu[ipiv]=-dpermu[ipiv]; |
1599 | #ifndef NDEBUG |
1600 | while (1) { |
1601 | assert (c_ekk_IsSet(fact->bitArray,ipiv)); |
1602 | ipiv=hpivco_new[ipiv]; |
1603 | if (ipiv>fact->nrow||ipiv==firstDo) |
1604 | break; |
1605 | } |
1606 | assert (!c_ekk_IsSet(fact->bitArray,ipiv)||ipiv>fact->nrow); |
1607 | assert (ipiv==firstDo); |
1608 | #endif |
1609 | ipiv=firstDo; |
1610 | } |
1611 | } |
1612 | nincol = c_ekkbtrn(fact, dwork1, mpt, ipiv); |
1613 | } |
1614 | |
1615 | return nincol; |
1616 | } |
1617 | /* |
1618 | * Does work associated with eq. 3.7: |
1619 | * r' = u' U^-1 |
1620 | * |
1621 | * where u' (can't write the overbar) is the p'th row of U, without |
1622 | * the entry for column p. (here, jpivrw is p). |
1623 | * We solve this as for btju. We know |
1624 | * r' U = u' |
1625 | * |
1626 | * so we solve from low index to hi, determining the next value u_i' |
1627 | * by doing the dot-product of r' and the i'th column of U (excluding |
1628 | * element i itself), subtracting that from u'_i, and dividing by |
1629 | * U_ii (we store the reciprocal, so here we multiply). |
1630 | * |
1631 | * Now, in principle dwork1 should be initialized to the p'th row of U. |
1632 | * Instead, it is initially zeroed and filled in as we go along. |
1633 | * Of the entries in u' that we reference during a dot product with |
1634 | * a column of U, either |
1635 | * the entry is 0 by definition, since it is < p, or |
1636 | * it has already been set by a previous iteration, or |
1637 | * it is p. |
1638 | * |
1639 | * Because of this, we know that all elements < p will be zero; |
1640 | * that's why we start with p (kpivrw). |
1641 | |
1642 | * While we do this product, we also zero out the p'th row. |
1643 | */ |
1644 | static void c_ekketju_aux(EKKfactinfo * COIN_RESTRICT2 fact,int sparse, |
1645 | double * COIN_RESTRICT dluval, int * COIN_RESTRICT hrowi, |
1646 | const int * COIN_RESTRICT mcstrt, const int * COIN_RESTRICT hpivco, |
1647 | double * COIN_RESTRICT dwork1, |
1648 | int *ipivp, int jpivrw, int stop_col) |
1649 | { |
1650 | int ipiv = *ipivp; |
1651 | if (1&&ipiv<stop_col&&c_ekk_IsSet(fact->bitArray,ipiv)) { |
1652 | /* slack */ |
1653 | int lastSlack = fact->lastSlack; |
1654 | int firstDo=hpivco[lastSlack]; |
1655 | while (1) { |
1656 | assert (c_ekk_IsSet(fact->bitArray,ipiv)); |
1657 | dwork1[ipiv] = -dwork1[ipiv]; |
1658 | ipiv=hpivco[ipiv]; /* next column - generally ipiv+1 */ |
1659 | if (ipiv==firstDo||ipiv>=stop_col) |
1660 | break; |
1661 | } |
1662 | } |
1663 | |
1664 | while(ipiv<stop_col) { |
1665 | double dv = dwork1[ipiv]; |
1666 | int kx = mcstrt[ipiv]; |
1667 | int nel = hrowi[kx]; |
1668 | double dpiv = dluval[kx]; |
1669 | int kcs = kx + 1; |
1670 | int kce = kx + nel; |
1671 | int iel; |
1672 | |
1673 | for (iel = kcs; iel <= kce; ++iel) { |
1674 | int irow = hrowi[iel]; |
1675 | irow = UNSHIFT_INDEX(irow); |
1676 | dv -= dwork1[irow] * dluval[iel]; |
1677 | if (irow == jpivrw) { |
1678 | break; |
1679 | } |
1680 | } |
1681 | |
1682 | /* assuming the p'th row is sparse, |
1683 | * this branch will be infrequently taken */ |
1684 | if (iel <= kce) { |
1685 | int irow = hrowi[iel]; |
1686 | /* irow == jpivrw */ |
1687 | dv += dluval[iel]; |
1688 | |
1689 | if (sparse) { |
1690 | /* delete this entry by overwriting it with the last */ |
1691 | --nel; |
1692 | hrowi[kx] = nel; |
1693 | hrowi[iel] = hrowi[kce]; |
1694 | #ifdef CLP_REUSE_ETAS |
1695 | double temp=dluval[iel]; |
1696 | dluval[iel] = dluval[kce]; |
1697 | hrowi[kce]=jpivrw; |
1698 | dluval[kce]=temp; |
1699 | #else |
1700 | dluval[iel] = dluval[kce]; |
1701 | #endif |
1702 | kce--; |
1703 | } else { |
1704 | /* we can't delete an entry from a dense column, |
1705 | * so we just zero it out */ |
1706 | dluval[iel]=0.0; |
1707 | iel++; |
1708 | } |
1709 | |
1710 | /* finish up the remaining entries; same as above loop, but no check */ |
1711 | for (; iel <= kce; ++iel) { |
1712 | irow = UNSHIFT_INDEX(hrowi[iel]); |
1713 | dv -= dwork1[irow] * dluval[iel]; |
1714 | } |
1715 | } |
1716 | dwork1[ipiv] = dv * dpiv; /* divide by pivot */ |
1717 | ipiv=hpivco[ipiv]; /* next column - generally ipiv+1 */ |
1718 | } |
1719 | |
1720 | /* ? is it guaranteed that ipiv==stop_col at this point?? */ |
1721 | *ipivp = ipiv; |
1722 | } |
1723 | |
1724 | /* dwork1 is assumed to be zeroed on entry */ |
1725 | static void c_ekketju(EKKfactinfo * COIN_RESTRICT2 fact,double *dluval, int *hrowi, |
1726 | const int * COIN_RESTRICT mcstrt, const int * COIN_RESTRICT hpivco, |
1727 | double * COIN_RESTRICT dwork1, |
1728 | int kpivrw, int first_dense , int last_dense) |
1729 | { |
1730 | int ipiv = hpivco[kpivrw]; |
1731 | int jpivrw = SHIFT_INDEX(kpivrw); |
1732 | |
1733 | const int nrow = fact->nrow; |
1734 | |
1735 | if (first_dense < last_dense && |
1736 | mcstrt[ipiv] <= mcstrt[last_dense]) { |
1737 | /* There are dense columns, and |
1738 | * some dense columns precede the pivot column */ |
1739 | |
1740 | /* first do any sparse columns "on the left" */ |
1741 | c_ekketju_aux(fact, true, dluval, hrowi, mcstrt, hpivco, dwork1, |
1742 | &ipiv, jpivrw, first_dense); |
1743 | |
1744 | /* then do dense columns */ |
1745 | c_ekketju_aux(fact, false, dluval, hrowi, mcstrt, hpivco, dwork1, |
1746 | &ipiv, jpivrw, last_dense+1); |
1747 | |
1748 | /* final sparse columns "on the right" ...*/ |
1749 | } |
1750 | /* ...are the same as sparse columns if there are no dense */ |
1751 | c_ekketju_aux(fact, true, dluval, hrowi, mcstrt, hpivco, dwork1, |
1752 | &ipiv, jpivrw, nrow+1); |
1753 | } /* c_ekketju */ |
1754 | |
1755 | |
1756 | |
1757 | |
1758 | |
1759 | |
1760 | |
1761 | |
1762 | |
1763 | |
1764 | |
1765 | |
1766 | /*#define PRINT_DEBUG*/ |
1767 | /* dwork1 is assumed to be zeroed on entry */ |
1768 | int c_ekketsj(/*const*/ EKKfactinfo * COIN_RESTRICT2 fact, |
1769 | double * COIN_RESTRICT dwork1, |
1770 | int * COIN_RESTRICT mpt2, double dalpha, int orig_nincol, |
1771 | int npivot, int *nuspikp, |
1772 | const int ipivrw,int * spare) |
1773 | { |
1774 | int nuspik = *nuspikp; |
1775 | |
1776 | int * COIN_RESTRICT mpermu=fact->mpermu; |
1777 | |
1778 | int * COIN_RESTRICT hcoli = fact->xecadr; |
1779 | double * COIN_RESTRICT dluval = fact->xeeadr; |
1780 | int * COIN_RESTRICT mrstrt = fact->xrsadr; |
1781 | int * COIN_RESTRICT hrowi = fact->xeradr; |
1782 | int * COIN_RESTRICT mcstrt = fact->xcsadr; |
1783 | int * COIN_RESTRICT hinrow = fact->xrnadr; |
1784 | /*int *hincol = fact->xcnadr; |
1785 | int *hpivro = fact->krpadr;*/ |
1786 | int * COIN_RESTRICT hpivco = fact->kcpadr; |
1787 | double * COIN_RESTRICT de2val = fact->xe2adr; |
1788 | |
1789 | const int nrow = fact->nrow; |
1790 | const int ifRowCopy = fact->rows_ok; |
1791 | |
1792 | int i, j=-1, k, i1, i2, k1; |
1793 | int kc, iel; |
1794 | double del3; |
1795 | int nroom; |
1796 | bool ifrows= (mrstrt[1] != 0); |
1797 | int kpivrw, jpivrw; |
1798 | int first_dense_mcstrt,last_dense_mcstrt; |
1799 | int nnentl; /* includes row stuff */ |
1800 | int doSparse=(fact->if_sparse_update>0); |
1801 | #ifdef MORE_DEBUG |
1802 | { |
1803 | const int * COIN_RESTRICT hrowi = fact->R_etas_index; |
1804 | const int * COIN_RESTRICT mcstrt = fact->R_etas_start; |
1805 | int ndo=fact->nR_etas; |
1806 | int knext; |
1807 | |
1808 | knext = mcstrt[ndo + 1]; |
1809 | for (int i = ndo; i > 0; --i) { |
1810 | int k1 = knext; |
1811 | knext = mcstrt[i]; |
1812 | for (int j = k1+1; j < knext; j++) { |
1813 | assert (hrowi[j]>0&&hrowi[j]<100000); |
1814 | } |
1815 | } |
1816 | } |
1817 | #endif |
1818 | |
1819 | int mcstrt_piv; |
1820 | int nincol=0; |
1821 | int * COIN_RESTRICT hpivco_new=fact->kcpadr+1; |
1822 | int * COIN_RESTRICT back=fact->back; |
1823 | int irtcod = 0; |
1824 | |
1825 | /* Parameter adjustments */ |
1826 | de2val--; |
1827 | |
1828 | /* Function Body */ |
1829 | if (!ifRowCopy) { |
1830 | doSparse=0; |
1831 | fact->if_sparse_update=-abs(fact->if_sparse_update); |
1832 | } |
1833 | if (npivot==1) { |
1834 | fact->num_resets=0; |
1835 | } |
1836 | kpivrw = mpermu[ipivrw]; |
1837 | #if 0 //ndef NDEBUG |
1838 | ets_count++; |
1839 | if (ets_check>=0&&ets_count>=ets_check) { |
1840 | printf("trouble\n" ); |
1841 | } |
1842 | #endif |
1843 | mcstrt_piv=mcstrt[kpivrw]; |
1844 | /* ndenuc - top has number deleted */ |
1845 | if (fact->ndenuc) { |
1846 | first_dense_mcstrt = mcstrt[fact->first_dense]; |
1847 | last_dense_mcstrt = mcstrt[fact->last_dense]; |
1848 | } else { |
1849 | first_dense_mcstrt=0; |
1850 | last_dense_mcstrt=0; |
1851 | } |
1852 | { |
1853 | int kdnspt = fact->nnetas - fact->nnentl; |
1854 | |
1855 | i1 = ((kdnspt - 1) + fact->R_etas_start[fact->nR_etas + 1]); |
1856 | /*i1 = -99999999;*/ |
1857 | |
1858 | /* fact->R_etas_start[fact->nR_etas + 1] is -(the number of els in R) */ |
1859 | nnentl = fact->nnetas - ((kdnspt - 1) + fact->R_etas_start[fact->nR_etas + 1]); |
1860 | } |
1861 | fact->demark=fact->nnentu+nnentl; |
1862 | jpivrw = SHIFT_INDEX(kpivrw); |
1863 | |
1864 | #ifdef CLP_REUSE_ETAS |
1865 | double del3Orig=0.0; |
1866 | #endif |
1867 | if (nuspik < 0) { |
1868 | goto L7000; |
1869 | } else if (nuspik == 0) { |
1870 | del3 = 0.; |
1871 | } else { |
1872 | del3 = 0.; |
1873 | i1 = fact->nnentu + 1; |
1874 | i2 = fact->nnentu + nuspik; |
1875 | if (fact->sortedEta) { |
1876 | /* binary search */ |
1877 | if (hrowi[i2] == jpivrw) { |
1878 | /* sitting right on the end - easy */ |
1879 | del3 = dluval[i2]; |
1880 | --nuspik; |
1881 | } else { |
1882 | bool foundit = true; |
1883 | |
1884 | /* binary search - sort of implies hrowi is sorted */ |
1885 | i = i1; |
1886 | if (hrowi[i] != jpivrw) { |
1887 | while (1) { |
1888 | i = (i1 + i2) >>1; |
1889 | if (i == i1) { |
1890 | foundit = false; |
1891 | break; |
1892 | } |
1893 | if (hrowi[i] < jpivrw) { |
1894 | i1 = i; |
1895 | } else if (hrowi[i] > jpivrw) { |
1896 | i2 = i; |
1897 | } |
1898 | else |
1899 | break; |
1900 | } |
1901 | } |
1902 | /* ??? what if we didn't find it? */ |
1903 | |
1904 | if (foundit) { |
1905 | del3 = dluval[i]; |
1906 | --nuspik; |
1907 | /* remove it and move the last element into its place */ |
1908 | hrowi[i] = hrowi[nuspik + fact->nnentu+1]; |
1909 | dluval[i] = dluval[nuspik + fact->nnentu+1]; |
1910 | } |
1911 | } |
1912 | } else { |
1913 | /* search */ |
1914 | for (i=i1;i<=i2;i++) { |
1915 | if (hrowi[i] == jpivrw) { |
1916 | del3 = dluval[i]; |
1917 | --nuspik; |
1918 | /* remove it and move the last element into its place */ |
1919 | hrowi[i] = hrowi[i2]; |
1920 | dluval[i] = dluval[i2]; |
1921 | break; |
1922 | } |
1923 | } |
1924 | } |
1925 | } |
1926 | #ifdef CLP_REUSE_ETAS |
1927 | del3Orig=del3; |
1928 | #endif |
1929 | |
1930 | /* OLD COLUMN POINTERS */ |
1931 | /* **************************************************************** */ |
1932 | if (!ifRowCopy) { |
1933 | /* old method */ |
1934 | /* DO U */ |
1935 | c_ekketju(fact,dluval, hrowi, mcstrt, hpivco_new, |
1936 | dwork1, kpivrw,fact->first_dense, |
1937 | fact->last_dense); |
1938 | } else { |
1939 | |
1940 | /* could take out of old column but lets try being crude */ |
1941 | /* try taking out */ |
1942 | if (fact->xe2adr != 0&&doSparse) { |
1943 | |
1944 | /* |
1945 | * There is both a column and row representation of U. |
1946 | * For each row in the kpivrw'th column of the col U rep, |
1947 | * find its position in the U row rep and remove it |
1948 | * by overwriting it with the last element. |
1949 | */ |
1950 | int k1x = mcstrt[kpivrw]; |
1951 | int nel = hrowi[k1x]; /* yes, this is the nel, for the pivot */ |
1952 | int k2x = k1x + nel; |
1953 | |
1954 | for (k = k1x + 1; k <= k2x; ++k) { |
1955 | int irow = UNSHIFT_INDEX(hrowi[k]); |
1956 | int kx = mrstrt[irow]; |
1957 | int nel = hinrow[irow]-1; |
1958 | hinrow[irow]=nel; |
1959 | |
1960 | int jlast = kx + nel; |
1961 | for (int iel=kx;iel<jlast;iel++) { |
1962 | if (kpivrw==hcoli[iel]) { |
1963 | hcoli[iel] = hcoli[jlast]; |
1964 | de2val[iel] = de2val[jlast]; |
1965 | break; |
1966 | } |
1967 | } |
1968 | } |
1969 | } else if (ifRowCopy) { |
1970 | /* still take out */ |
1971 | int k1x = mcstrt[kpivrw]; |
1972 | int nel = hrowi[k1x]; /* yes, this is the nel, for the pivot */ |
1973 | int k2x = k1x + nel; |
1974 | |
1975 | for (k = k1x + 1; k <= k2x; ++k) { |
1976 | int irow = UNSHIFT_INDEX(hrowi[k]); |
1977 | int kx = mrstrt[irow]; |
1978 | int nel = hinrow[irow]-1; |
1979 | hinrow[irow]=nel; |
1980 | int jlast = kx + nel ; |
1981 | for (;kx<jlast;kx++) { |
1982 | if (kpivrw==hcoli[kx]) { |
1983 | hcoli[kx] = hcoli[jlast]; |
1984 | break; |
1985 | } |
1986 | } |
1987 | } |
1988 | } |
1989 | |
1990 | /* add to row version */ |
1991 | /* the updated column (alpha_p) was written to entries |
1992 | * nnentu+1..nnentu+nuspik by routine c_ekkftrn_ft. |
1993 | * That was just an intermediate value of the usual ftrn. |
1994 | */ |
1995 | i1 = fact->nnentu + 1; |
1996 | i2 = fact->nnentu + nuspik; |
1997 | int * COIN_RESTRICT eta_last=mpermu+nrow*2+3; |
1998 | int * COIN_RESTRICT eta_next=eta_last+nrow+2; |
1999 | if (fact->xe2adr == 0||!doSparse) { |
2000 | /* we have column indices by row, but not the actual values */ |
2001 | for (iel = i1; iel <= i2; ++iel) { |
2002 | int irow = UNSHIFT_INDEX(hrowi[iel]); |
2003 | int iput = hinrow[irow]; |
2004 | int kput = mrstrt[irow]; |
2005 | int nextRow=eta_next[irow]; |
2006 | assert (kput>0); |
2007 | kput += iput; |
2008 | if (kput < mrstrt[nextRow]) { |
2009 | /* there is room - append the pivot column; |
2010 | * this corresponds making alpha_p the rightmost column of U (p. 268)*/ |
2011 | hinrow[irow] = iput + 1; |
2012 | hcoli[kput] = kpivrw; |
2013 | } else { |
2014 | /* no room - switch off */ |
2015 | doSparse=0; |
2016 | /* possible kpivrw 1 */ |
2017 | k1 = mrstrt[kpivrw]; |
2018 | mrstrt[1]=-1; |
2019 | fact->rows_ok = false; |
2020 | goto L1226; |
2021 | } |
2022 | } |
2023 | } else { |
2024 | if (! doSparse) { |
2025 | /* we have both column indices and values by row */ |
2026 | /* just like loop above, but with extra assign to de2val */ |
2027 | for (iel = i1; iel <= i2; ++iel) { |
2028 | int irow = UNSHIFT_INDEX(hrowi[iel]); |
2029 | int iput = hinrow[irow]; |
2030 | int kput = mrstrt[irow]; |
2031 | int nextRow=eta_next[irow]; |
2032 | assert (kput>0); |
2033 | kput += iput; |
2034 | if (kput < mrstrt[nextRow]) { |
2035 | hinrow[irow] = iput + 1; |
2036 | hcoli[kput] = kpivrw; |
2037 | de2val[kput] = dluval[iel]; |
2038 | } else { |
2039 | /* no room - switch off */ |
2040 | doSparse=0; |
2041 | /* possible kpivrw 1 */ |
2042 | k1 = mrstrt[kpivrw]; |
2043 | mrstrt[1]=-1; |
2044 | fact->rows_ok = false; |
2045 | goto L1226; |
2046 | } |
2047 | } |
2048 | } else { |
2049 | for (iel = i1; iel <= i2; ++iel) { |
2050 | int j,k; |
2051 | int irow = UNSHIFT_INDEX(hrowi[iel]); |
2052 | int iput = hinrow[irow]; |
2053 | k=mrstrt[irow]+iput; |
2054 | j=eta_next[irow]; |
2055 | if (k >= mrstrt[j]) { |
2056 | /* no room - can we make some? */ |
2057 | int klast=eta_last[nrow+1]; |
2058 | int jput=mrstrt[klast]+hinrow[klast]+2; |
2059 | int distance=mrstrt[nrow+1]-jput; |
2060 | if (iput+1<distance) { |
2061 | /* this presumably copies the row to the end */ |
2062 | int jn,jl; |
2063 | int kstart=mrstrt[irow]; |
2064 | int nin=hinrow[irow]; |
2065 | /* out */ |
2066 | jn=eta_next[irow]; |
2067 | jl=eta_last[irow]; |
2068 | eta_next[jl]=jn; |
2069 | eta_last[jn]=jl; |
2070 | /* in */ |
2071 | eta_next[klast]=irow; |
2072 | eta_last[nrow+1]=irow; |
2073 | eta_last[irow]=klast; |
2074 | eta_next[irow]=nrow+1; |
2075 | mrstrt[irow]=jput; |
2076 | #if 0 |
2077 | memcpy(&hcoli[jput],&hcoli[kstart],nin*sizeof(int)); |
2078 | memcpy(&de2val[jput],&de2val[kstart],nin*sizeof(double)); |
2079 | #else |
2080 | c_ekkscpy(nin,hcoli+kstart,hcoli+jput); |
2081 | c_ekkdcpy(nin, |
2082 | (de2val+kstart),(de2val+jput)); |
2083 | #endif |
2084 | k=jput+iput; |
2085 | } else { |
2086 | /* shuffle down */ |
2087 | int spare=(fact->nnetas-fact->nnentu-fact->nnentl-3); |
2088 | if (spare>nrow<<1) { |
2089 | /* presumbly, this compacts the rows */ |
2090 | int jrow,jput; |
2091 | if (1) { |
2092 | if (fact->num_resets<1000000) { |
2093 | int etasize =CoinMax(4*fact->nnentu+ |
2094 | (fact->nnetas-fact->nnentl)+1000,fact->eta_size); |
2095 | if (ifrows) { |
2096 | fact->num_resets++; |
2097 | if (npivot>40&&fact->num_resets<<4>npivot) { |
2098 | fact->eta_size=static_cast<int>(1.05*fact->eta_size); |
2099 | fact->num_resets=1000000; |
2100 | } |
2101 | } else { |
2102 | fact->eta_size=static_cast<int>(1.1*fact->eta_size); |
2103 | fact->num_resets=1000000; |
2104 | } |
2105 | fact->eta_size=CoinMin(fact->eta_size,etasize); |
2106 | if (fact->maxNNetas>0&&fact->eta_size> |
2107 | fact->maxNNetas) { |
2108 | fact->eta_size=fact->maxNNetas; |
2109 | } |
2110 | } |
2111 | } |
2112 | jrow=eta_next[0]; |
2113 | jput=1; |
2114 | for (j=0;j<nrow;j++) { |
2115 | int k,nin=hinrow[jrow]; |
2116 | k=mrstrt[jrow]; |
2117 | mrstrt[jrow]=jput; |
2118 | for (;nin;nin--) { |
2119 | hcoli[jput]=hcoli[k]; |
2120 | de2val[jput++]=de2val[k++]; |
2121 | } |
2122 | jrow=eta_next[jrow]; |
2123 | } |
2124 | if (spare>nrow<<3) { |
2125 | spare=3; |
2126 | } else if (spare>nrow<<2) { |
2127 | spare=1; |
2128 | } else { |
2129 | spare=0; |
2130 | } |
2131 | jput+=nrow*spare; |
2132 | jrow=eta_last[nrow+1]; |
2133 | for (j=0;j<nrow;j++) { |
2134 | int k,nin=hinrow[jrow]; |
2135 | k=mrstrt[jrow]+nin; |
2136 | jput-=spare; |
2137 | for (;nin;nin--) { |
2138 | hcoli[--jput]=hcoli[--k]; |
2139 | de2val[jput]=de2val[k]; |
2140 | } |
2141 | mrstrt[jrow]=jput; |
2142 | jrow=eta_last[jrow]; |
2143 | } |
2144 | /* set up for copy below */ |
2145 | k=mrstrt[irow]+iput; |
2146 | } else { |
2147 | /* no room - switch off */ |
2148 | doSparse=0; |
2149 | /* possible kpivrw 1 */ |
2150 | k1 = mrstrt[kpivrw]; |
2151 | mrstrt[1]=-1; |
2152 | fact->rows_ok = false; |
2153 | goto L1226; |
2154 | } |
2155 | } |
2156 | } |
2157 | /* now we have room - append the new value */ |
2158 | hinrow[irow] = iput + 1; |
2159 | hcoli[k] = kpivrw; |
2160 | de2val[k] = dluval[iel]; |
2161 | } |
2162 | } |
2163 | } |
2164 | |
2165 | /* TAKE OUT ALL ELEMENTS IN PIVOT ROW */ |
2166 | k1 = mrstrt[kpivrw]; |
2167 | |
2168 | L1226: |
2169 | { |
2170 | int k2 = k1 + hinrow[kpivrw] - 1; |
2171 | |
2172 | /* "delete" the row */ |
2173 | hinrow[kpivrw] = 0; |
2174 | j = 0; |
2175 | if (doSparse) { |
2176 | /* remove pivot row entries from the corresponding columns */ |
2177 | for (k = k1; k <= k2; ++k) { |
2178 | int icol = hcoli[k]; |
2179 | int kx = mcstrt[icol]; |
2180 | int nel = hrowi[kx]; |
2181 | for (iel = kx + 1; iel <= kx+nel; iel ++) { |
2182 | if (hrowi[iel] == jpivrw) { |
2183 | break; |
2184 | } |
2185 | } |
2186 | if (iel <= kx+nel) { |
2187 | /* this has to happen, right?? */ |
2188 | |
2189 | /* copy the element into a temporary */ |
2190 | dwork1[icol] = dluval[iel]; |
2191 | mpt2[nincol++]=icol; |
2192 | /*nonzero[icol-1]=1;*/ |
2193 | |
2194 | hrowi[kx]=nel-1; /* column is shorter by one */ |
2195 | j=1; |
2196 | hrowi[iel]=hrowi[kx+nel]; |
2197 | dluval[iel]=dluval[kx+nel]; |
2198 | #ifdef CLP_REUSE_ETAS |
2199 | hrowi[kx+nel]=jpivrw; |
2200 | dluval[kx+nel]=dwork1[icol]; |
2201 | #endif |
2202 | } |
2203 | } |
2204 | if (j != 0) { |
2205 | /* now compute r', the new R transform */ |
2206 | orig_nincol=c_ekkbtju_sparse(fact, dwork1, |
2207 | mpt2, nincol, |
2208 | spare); |
2209 | dwork1[kpivrw]=0.0; |
2210 | } |
2211 | } else { |
2212 | /* row version isn't ok (?) */ |
2213 | for (k = k1; k <= k2; ++k) { |
2214 | int icol = hcoli[k]; |
2215 | int kx = mcstrt[icol]; |
2216 | int nel = hrowi[kx]; |
2217 | j = kx+nel; |
2218 | int iel; |
2219 | for (iel=kx+1;iel<=j;iel++) { |
2220 | if (hrowi[iel]==jpivrw) |
2221 | break; |
2222 | } |
2223 | dwork1[icol] = dluval[iel]; |
2224 | if (kx<first_dense_mcstrt || kx>last_dense_mcstrt) { |
2225 | hrowi[kx] = nel - 1; /* shorten column */ |
2226 | /* not packing - presumably column isn't sorted */ |
2227 | hrowi[iel] = hrowi[j]; |
2228 | dluval[iel] = dluval[j]; |
2229 | #ifdef CLP_REUSE_ETAS |
2230 | hrowi[j]=jpivrw; |
2231 | dluval[j]=dwork1[icol]; |
2232 | #endif |
2233 | } else { |
2234 | /* dense element - just zero it */ |
2235 | dluval[iel]=0.0; |
2236 | } |
2237 | } |
2238 | if (j != 0) { |
2239 | /* Find first nonzero */ |
2240 | int ipiv = hpivco_new[kpivrw]; |
2241 | while(ipiv<=nrow) { |
2242 | if (!dwork1[ipiv]) { |
2243 | ipiv=hpivco_new[ipiv]; |
2244 | } else { |
2245 | break; |
2246 | } |
2247 | } |
2248 | if (ipiv<=nrow) { |
2249 | /* DO U */ |
2250 | /* now compute r', the new R transform */ |
2251 | c_ekkbtju(fact, dwork1, |
2252 | ipiv); |
2253 | } |
2254 | } |
2255 | } |
2256 | } |
2257 | } |
2258 | |
2259 | if (kpivrw==fact->first_dense) { |
2260 | /* increase until valid pivot */ |
2261 | fact->first_dense=hpivco_new[fact->first_dense]; |
2262 | } else if (kpivrw==fact->last_dense) { |
2263 | fact->last_dense=back[fact->last_dense]; |
2264 | } |
2265 | if (fact->first_dense==fact->last_dense) { |
2266 | fact->ndenuc=0; |
2267 | fact->first_dense=0; |
2268 | fact->last_dense=-1; |
2269 | } |
2270 | if (! (ifRowCopy && j==0)) { |
2271 | |
2272 | /* increase amount of work on Etas */ |
2273 | |
2274 | /* **************************************************************** */ |
2275 | /* DO ROW ETAS IN L */ |
2276 | { |
2277 | if (!doSparse) { |
2278 | dwork1[kpivrw] = 0.; |
2279 | #if 0 |
2280 | orig_nincol=c_ekksczr(fact,nrow, dwork1, mpt2); |
2281 | del3=c_ekkputl(fact, mpt2, dwork1, del3, |
2282 | orig_nincol, nuspik); |
2283 | #else |
2284 | orig_nincol=c_ekkputl2(fact, |
2285 | dwork1, &del3, |
2286 | nuspik); |
2287 | #endif |
2288 | } else { |
2289 | del3=c_ekkputl(fact, mpt2, |
2290 | dwork1, del3, |
2291 | orig_nincol, nuspik); |
2292 | } |
2293 | } |
2294 | if (orig_nincol != 0) { |
2295 | /* STORE AS A ROW VECTOR */ |
2296 | int n = fact->nR_etas+1; |
2297 | int i1 = fact->R_etas_start[n]; |
2298 | fact->nR_etas=n; |
2299 | fact->R_etas_start[n + 1] = i1 - orig_nincol; |
2300 | hpivco[fact->nR_etas + nrow+3] = kpivrw; |
2301 | } |
2302 | } |
2303 | |
2304 | /* CHECK DEL3 AGAINST DALPHA/DOUT */ |
2305 | { |
2306 | int kx = mcstrt[kpivrw]; |
2307 | double dout = dluval[kx]; |
2308 | double dcheck = fabs(dalpha / dout); |
2309 | double difference=0.0; |
2310 | if (fabs(del3) > CoinMin(1.0e-8,fact->drtpiv*0.99999)) { |
2311 | double checkTolerance; |
2312 | if ( fact->npivots < 2 ) { |
2313 | checkTolerance = 1.0e-5; |
2314 | } else if ( fact->npivots < 10 ) { |
2315 | checkTolerance = 1.0e-6; |
2316 | } else if ( fact->npivots < 50 ) { |
2317 | checkTolerance = 1.0e-8; |
2318 | } else { |
2319 | checkTolerance = 1.0e-9; |
2320 | } |
2321 | difference = fabs(1.0-fabs(del3)/dcheck); |
2322 | if (difference > 0.1*checkTolerance) { |
2323 | if (difference < checkTolerance|| |
2324 | (difference<1.0e-7&&fact->npivots>=50)) { |
2325 | irtcod=1; |
2326 | #ifdef PRINT_DEBUG |
2327 | printf("mildly bad %g after %d pivots, etsj %g ftncheck %g ftnalpha %g\n" , |
2328 | difference,fact->npivots,del3,dcheck,dalpha); |
2329 | #endif |
2330 | } else { |
2331 | irtcod=2; |
2332 | #ifdef PRINT_DEBUG |
2333 | printf("bad %g after %d pivots, etsj %g ftncheck %g ftnalpha %g\n" , |
2334 | difference,fact->npivots,del3,dcheck,dalpha); |
2335 | #endif |
2336 | } |
2337 | } |
2338 | } else { |
2339 | irtcod=2; |
2340 | #ifdef PRINT_DEBUG |
2341 | printf("bad small %g after %d pivots, etsj %g ftncheck %g ftnalpha %g\n" , |
2342 | difference,fact->npivots,del3,dcheck,dalpha); |
2343 | #endif |
2344 | } |
2345 | if (irtcod>1) |
2346 | goto L8000; |
2347 | fact->npivots++; |
2348 | } |
2349 | |
2350 | mcstrt[kpivrw] = fact->nnentu; |
2351 | #ifdef CLP_REUSE_ETAS |
2352 | { |
2353 | int * putSeq = fact->xrsadr+2*fact->nrowmx+2; |
2354 | int * position = putSeq+fact->maxinv; |
2355 | int * putStart = position+fact->maxinv; |
2356 | putStart[fact->nrow+fact->npivots-1]=fact->nnentu; |
2357 | } |
2358 | #endif |
2359 | dluval[fact->nnentu] = 1. / del3; /* new pivot */ |
2360 | hrowi[fact->nnentu] = nuspik; /* new nelems */ |
2361 | #ifndef NDEBUG |
2362 | { |
2363 | int lastSlack = fact->lastSlack; |
2364 | int firstDo=hpivco_new[lastSlack]; |
2365 | int ipiv=hpivco_new[0]; |
2366 | int now = fact->numberSlacks; |
2367 | if (now) { |
2368 | while (1) { |
2369 | if (ipiv>fact->nrow||ipiv==firstDo) |
2370 | break; |
2371 | assert (c_ekk_IsSet(fact->bitArray,ipiv)); |
2372 | ipiv=hpivco_new[ipiv]; |
2373 | } |
2374 | if (ipiv<=fact->nrow) { |
2375 | while (1) { |
2376 | if (ipiv>fact->nrow) |
2377 | break; |
2378 | assert (!c_ekk_IsSet(fact->bitArray,ipiv)); |
2379 | ipiv=hpivco_new[ipiv]; |
2380 | } |
2381 | } |
2382 | } |
2383 | } |
2384 | #endif |
2385 | { |
2386 | /* do new hpivco */ |
2387 | int inext=hpivco_new[kpivrw]; |
2388 | int iback=back[kpivrw]; |
2389 | if (inext!=nrow+1) { |
2390 | int ilast=back[nrow+1]; |
2391 | hpivco_new[iback]=inext; |
2392 | back[inext]=iback; |
2393 | assert (hpivco_new[ilast]==nrow+1); |
2394 | hpivco_new[ilast]=kpivrw; |
2395 | back[kpivrw]=ilast; |
2396 | hpivco_new[kpivrw]=nrow+1; |
2397 | back[nrow+1]=kpivrw; |
2398 | } |
2399 | } |
2400 | { |
2401 | int lastSlack = fact->lastSlack; |
2402 | int now = fact->numberSlacks; |
2403 | if (now&&mcstrt_piv<=mcstrt[lastSlack]) { |
2404 | if (c_ekk_IsSet(fact->bitArray,kpivrw)) { |
2405 | /*printf("piv %d lastSlack %d\n",mcstrt_piv,lastSlack);*/ |
2406 | fact->numberSlacks--; |
2407 | now--; |
2408 | /* one less slack */ |
2409 | c_ekk_Unset(fact->bitArray,kpivrw); |
2410 | if (now&&kpivrw==lastSlack) { |
2411 | int i; |
2412 | int ipiv,jpiv; |
2413 | ipiv=hpivco_new[0]; |
2414 | for (i=0;i<now-1;i++) |
2415 | ipiv=hpivco_new[ipiv]; |
2416 | lastSlack=ipiv; |
2417 | jpiv=hpivco_new[ipiv]; |
2418 | assert (c_ekk_IsSet(fact->bitArray,ipiv)); |
2419 | assert (!c_ekk_IsSet(fact->bitArray,jpiv)||jpiv>fact->nrow); |
2420 | fact->lastSlack = lastSlack; |
2421 | } else if (!now) { |
2422 | fact->lastSlack=0; |
2423 | } |
2424 | } |
2425 | } |
2426 | fact->firstNonSlack=hpivco_new[lastSlack]; |
2427 | #ifndef NDEBUG |
2428 | { |
2429 | int lastSlack = fact->lastSlack; |
2430 | int firstDo=hpivco_new[lastSlack]; |
2431 | int ipiv=hpivco_new[0]; |
2432 | int now = fact->numberSlacks; |
2433 | if (now) { |
2434 | while (1) { |
2435 | if (ipiv>fact->nrow||ipiv==firstDo) |
2436 | break; |
2437 | assert (c_ekk_IsSet(fact->bitArray,ipiv)); |
2438 | ipiv=hpivco_new[ipiv]; |
2439 | } |
2440 | if (ipiv<=fact->nrow) { |
2441 | while (1) { |
2442 | if (ipiv>fact->nrow) |
2443 | break; |
2444 | assert (!c_ekk_IsSet(fact->bitArray,ipiv)); |
2445 | ipiv=hpivco_new[ipiv]; |
2446 | } |
2447 | } |
2448 | } |
2449 | } |
2450 | #endif |
2451 | } |
2452 | fact->nnentu += nuspik; |
2453 | #ifdef CLP_REUSE_ETAS |
2454 | if (fact->first_dense>=fact->last_dense) { |
2455 | // save |
2456 | fact->nnentu++; |
2457 | dluval[fact->nnentu]=del3Orig; |
2458 | hrowi[fact->nnentu]=kpivrw; |
2459 | int * putSeq = fact->xrsadr+2*fact->nrowmx+2; |
2460 | int * position = putSeq+fact->maxinv; |
2461 | int * putStart = position+fact->maxinv; |
2462 | int nnentu_at_factor=putStart[fact->nrow]&0x7fffffff; |
2463 | //putStart[fact->nrow+fact->npivots]=fact->nnentu+1; |
2464 | int where; |
2465 | if (mcstrt_piv<nnentu_at_factor) { |
2466 | // original LU |
2467 | where=kpivrw-1; |
2468 | } else { |
2469 | // could do binary search |
2470 | int * look = putStart+fact->nrow; |
2471 | for (where=fact->npivots-1;where>=0;where--) { |
2472 | if (mcstrt_piv==(look[where]&0x7fffffff)) |
2473 | break; |
2474 | } |
2475 | assert (where>=0); |
2476 | where += fact->nrow; |
2477 | } |
2478 | position[fact->npivots-1]=where; |
2479 | if (orig_nincol == 0) { |
2480 | // flag |
2481 | putStart[fact->nrow+fact->npivots-1] |= 0x80000000; |
2482 | } |
2483 | } |
2484 | #endif |
2485 | { |
2486 | int kdnspt = fact->nnetas - fact->nnentl; |
2487 | |
2488 | /* fact->R_etas_start[fact->nR_etas + 1] is -(the number of els in R) */ |
2489 | nnentl = fact->nnetas - ((kdnspt - 1) + fact->R_etas_start[fact->nR_etas + 1]); |
2490 | } |
2491 | fact->demark = (fact->nnentu + nnentl) - fact->demark; |
2492 | |
2493 | /* if need to redo row version */ |
2494 | if (! fact->rows_ok&&fact->first_dense>=fact->last_dense) { |
2495 | int =10000; |
2496 | int spareSpace; |
2497 | if (fact->if_sparse_update>0) { |
2498 | spareSpace=(fact->nnetas-fact->nnentu-fact->nnentl); |
2499 | } else { |
2500 | /* missing out nnentl stuff */ |
2501 | spareSpace=fact->nnetas-fact->nnentu; |
2502 | } |
2503 | /* save clean row copy if enough room */ |
2504 | nroom = spareSpace / nrow; |
2505 | |
2506 | if ((fact->nnentu<<3)>150*fact->maxinv) { |
2507 | extraSpace=150*fact->maxinv; |
2508 | } else { |
2509 | extraSpace=fact->nnentu<<3; |
2510 | } |
2511 | |
2512 | ifrows = false; |
2513 | if (fact->nnetas>fact->nnentu+fact->nnentl+extraSpace) { |
2514 | ifrows = true; |
2515 | } |
2516 | if (nroom < 5) { |
2517 | ifrows = false; |
2518 | } |
2519 | |
2520 | if (nroom > CoinMin(50, fact->maxinv - (fact->iterno - fact->iterin))) { |
2521 | ifrows = true; |
2522 | } |
2523 | |
2524 | #ifdef PRINT_DEBUGx |
2525 | printf(" redoing row copy %d %d %d\n" ,ifrows,nroom,spareSpace); |
2526 | #endif |
2527 | if (1) { |
2528 | if (fact->num_resets<1000000) { |
2529 | if (ifrows) { |
2530 | fact->num_resets++; |
2531 | if (npivot>40&&fact->num_resets<<4>npivot) { |
2532 | fact->eta_size=static_cast<int>(1.05*fact->eta_size); |
2533 | fact->num_resets=1000000; |
2534 | } |
2535 | } else { |
2536 | fact->eta_size=static_cast<int>(1.1*fact->eta_size); |
2537 | fact->num_resets=1000000; |
2538 | } |
2539 | if (fact->maxNNetas>0&&fact->eta_size> |
2540 | fact->maxNNetas) { |
2541 | fact->eta_size=fact->maxNNetas; |
2542 | } |
2543 | } |
2544 | } |
2545 | fact->rows_ok = ifrows; |
2546 | if (ifrows) { |
2547 | int ibase = 1; |
2548 | c_ekkizero(nrow,&hinrow[1]); |
2549 | for (i = 1; i <= nrow; ++i) { |
2550 | int kx = mcstrt[i]; |
2551 | int nel = hrowi[kx]; |
2552 | int kcs = kx + 1; |
2553 | int kce = kx + nel; |
2554 | for (kc = kcs; kc <= kce; ++kc) { |
2555 | int irow = UNSHIFT_INDEX(hrowi[kc]); |
2556 | if (dluval[kc]) { |
2557 | hinrow[irow]++; |
2558 | } |
2559 | } |
2560 | } |
2561 | int * eta_last=mpermu+nrow*2+3; |
2562 | int * eta_next=eta_last+nrow+2; |
2563 | eta_next[0]=1; |
2564 | for (i = 1; i <= nrow; ++i) { |
2565 | eta_next[i]=i+1; |
2566 | eta_last[i]=i-1; |
2567 | mrstrt[i] = ibase; |
2568 | ibase = ibase + hinrow[i] + nroom; |
2569 | hinrow[i] = 0; |
2570 | } |
2571 | eta_last[nrow+1]=nrow; |
2572 | //eta_next[nrow+1]=nrow+2; |
2573 | mrstrt[nrow+1]=ibase; |
2574 | if (fact->xe2adr == 0) { |
2575 | for (i = 1; i <= nrow; ++i) { |
2576 | int kx = mcstrt[i]; |
2577 | int nel = hrowi[kx]; |
2578 | int kcs = kx + 1; |
2579 | int kce = kx + nel; |
2580 | for (kc = kcs; kc <= kce; ++kc) { |
2581 | if (dluval[kc]) { |
2582 | int irow = UNSHIFT_INDEX(hrowi[kc]); |
2583 | int iput = hinrow[irow]; |
2584 | assert (irow); |
2585 | hcoli[mrstrt[irow] + iput] = i; |
2586 | hinrow[irow] = iput + 1; |
2587 | } |
2588 | } |
2589 | } |
2590 | } else { |
2591 | for (i = 1; i <= nrow; ++i) { |
2592 | int kx = mcstrt[i]; |
2593 | int nel = hrowi[kx]; |
2594 | int kcs = kx + 1; |
2595 | int kce = kx + nel; |
2596 | for (kc = kcs; kc <= kce; ++kc) { |
2597 | int irow = UNSHIFT_INDEX(hrowi[kc]); |
2598 | int iput = hinrow[irow]; |
2599 | hcoli[mrstrt[irow] + iput] = i; |
2600 | de2val[mrstrt[irow] + iput] = dluval[kc]; |
2601 | hinrow[irow] = iput + 1; |
2602 | } |
2603 | } |
2604 | } |
2605 | } else { |
2606 | mrstrt[1] = 0; |
2607 | if (fact->if_sparse_update>0&&fact->iterno-fact->iterin>100) { |
2608 | goto L7000; |
2609 | } |
2610 | } |
2611 | } |
2612 | goto L8000; |
2613 | |
2614 | /* OUT OF SPACE - COULD PACK DOWN */ |
2615 | L7000: |
2616 | irtcod = 1; |
2617 | #ifdef PRINT_DEBUG |
2618 | printf(" out of space\n" ); |
2619 | #endif |
2620 | if (1) { |
2621 | if ((npivot<<3)<fact->nbfinv) { |
2622 | /* low on space */ |
2623 | if (npivot<10) { |
2624 | fact->eta_size=fact->eta_size<<1; |
2625 | } else { |
2626 | double ratio=fact->nbfinv; |
2627 | double ratio2=npivot<<3; |
2628 | ratio=ratio/ratio2; |
2629 | if (ratio>2.0) { |
2630 | ratio=2.0; |
2631 | } /* endif */ |
2632 | fact->eta_size=static_cast<int>(ratio*fact->eta_size); |
2633 | } /* endif */ |
2634 | } else { |
2635 | fact->eta_size=static_cast<int>(1.05*fact->eta_size); |
2636 | } /* endif */ |
2637 | if (fact->maxNNetas>0&&fact->eta_size> |
2638 | fact->maxNNetas) { |
2639 | fact->eta_size=fact->maxNNetas; |
2640 | } |
2641 | } |
2642 | |
2643 | /* ================= IF ERROR SHOULD WE GET RID OF LAST ITERATION??? */ |
2644 | L8000: |
2645 | |
2646 | *nuspikp = nuspik; |
2647 | #ifdef MORE_DEBUG |
2648 | for (int i=1;i<=fact->nrow;i++) { |
2649 | int kx=mcstrt[i]; |
2650 | int nel=hrowi[kx]; |
2651 | for (int j=0;j<nel;j++) { |
2652 | assert (i!=hrowi[j+kx+1]); |
2653 | } |
2654 | } |
2655 | #endif |
2656 | #ifdef CLP_REUSE_ETAS |
2657 | fact->save_nnentu=fact->nnentu; |
2658 | #endif |
2659 | return (irtcod); |
2660 | } /* c_ekketsj */ |
2661 | static void c_ekkftj4p(const EKKfactinfo * COIN_RESTRICT2 fact, |
2662 | double * COIN_RESTRICT dwork1, int firstNonZero) |
2663 | { |
2664 | /* this is where the L factors start, because this is the place |
2665 | * where c_ekktria starts laying them down (see initialization of xnetal). |
2666 | */ |
2667 | int lstart=fact->lstart; |
2668 | const int * COIN_RESTRICT hpivco = fact->kcpadr; |
2669 | int firstLRow = hpivco[lstart]; |
2670 | if (firstNonZero>firstLRow) { |
2671 | lstart += firstNonZero-firstLRow; |
2672 | } |
2673 | assert (firstLRow==fact->firstLRow); |
2674 | int jpiv=hpivco[lstart]; |
2675 | const double * COIN_RESTRICT dluval = fact->xeeadr; |
2676 | const int * COIN_RESTRICT hrowi = fact->xeradr; |
2677 | const int * COIN_RESTRICT mcstrt = fact->xcsadr+lstart; |
2678 | int ndo=fact->xnetal-lstart; |
2679 | int i, iel; |
2680 | |
2681 | /* find first non-zero */ |
2682 | for (i=0;i<ndo;i++) { |
2683 | if (dwork1[i+jpiv]!=0.0) |
2684 | break; |
2685 | } |
2686 | for (; i < ndo; ++i) { |
2687 | double dv = dwork1[i+jpiv]; |
2688 | |
2689 | if (dv != 0.) { |
2690 | int kce1 = mcstrt[i + 1] ; |
2691 | |
2692 | for (iel = mcstrt[i]; iel > kce1; --iel) { |
2693 | int irow0 = hrowi[iel]; |
2694 | SHIFT_REF(dwork1, irow0) += dv * dluval[iel]; |
2695 | } |
2696 | } |
2697 | } |
2698 | |
2699 | } /* c_ekkftj4p */ |
2700 | |
2701 | /* |
2702 | * This version is more efficient for input columns that are sparse. |
2703 | * It is instructive to consider the case of an especially sparse column, |
2704 | * which is a slack. The slack for row r has exactly one non-zero element, |
2705 | * in row r, which is +-1.0. Let pr = mpermu[r]. |
2706 | * In this case, nincol==1 and mpt[0] == pr on entry. |
2707 | * if mpt[0] == pr <= jpiv |
2708 | * then this slack is completely unaffected by L; |
2709 | * this is reflected by the fact that save_where = last |
2710 | * after the first loop, so none of the remaining loops |
2711 | * ever execute, |
2712 | * else if mpt[0] == pr > jpiv, but pr-jpiv > ndo |
2713 | * then the slack is also unaffected by L, this time because |
2714 | * its row is "after" L. During factorization, it may |
2715 | * be the case that the first part of the basis is upper |
2716 | * triangular (c_ekktria), but it may also be the case that the |
2717 | * last part of the basis is upper triangular (in which case the |
2718 | * L triangle gets "chopped off" on the right). In both cases, |
2719 | * no L entries are required. Since in this case the tests |
2720 | * (i<=ndo) will fail (and dwork1[ipiv]==1.0), the code will |
2721 | * do nothing. |
2722 | * else if mpt[0] == pr > jpiv and pr-jpiv <= ndo |
2723 | * then the slack *is* affected by L. |
2724 | * the for-loop inside the second while-loop will discover |
2725 | * that none of the factors for the corresponding column of L |
2726 | * are non-zero in the slack column, so last will not be incremented. |
2727 | * We multiply the eta-vector, and the last loop does nothing. |
2728 | */ |
2729 | static int c_ekkftj4_sparse(const EKKfactinfo * COIN_RESTRICT2 fact, |
2730 | double * COIN_RESTRICT dwork1, int * COIN_RESTRICT mpt, |
2731 | int nincol,int * COIN_RESTRICT spare) |
2732 | { |
2733 | const int nrow = fact->nrow; |
2734 | /* this is where the L factors start, because this is the place |
2735 | * where c_ekktria starts laying them down (see initialization of xnetal). |
2736 | */ |
2737 | int lstart=fact->lstart; |
2738 | const int * COIN_RESTRICT hpivco = fact->kcpadr; |
2739 | const double * COIN_RESTRICT dluval = fact->xeeadr; |
2740 | const int * COIN_RESTRICT hrowi = fact->xeradr; |
2741 | const int * COIN_RESTRICT mcstrt = fact->xcsadr+lstart-1; |
2742 | double tolerance = fact->zeroTolerance; |
2743 | int jpiv=hpivco[lstart]-1; |
2744 | char * COIN_RESTRICT nonzero=fact->nonzero; |
2745 | int ndo=fact->xnetalval; |
2746 | int k,nStack; |
2747 | int nList=0; |
2748 | int iPivot; |
2749 | int * COIN_RESTRICT list = spare; |
2750 | int * COIN_RESTRICT stack = spare+nrow; |
2751 | int * COIN_RESTRICT next = stack+nrow; |
2752 | double dv; |
2753 | int iel; |
2754 | int nput=0,kput=nrow; |
2755 | int check=jpiv+ndo+1; |
2756 | const int * COIN_RESTRICT mcstrt2 = mcstrt-jpiv; |
2757 | |
2758 | for (k=0;k<nincol;k++) { |
2759 | nStack=1; |
2760 | iPivot=mpt[k]; |
2761 | if (nonzero[iPivot]!=1&&iPivot>jpiv&&iPivot<check) { |
2762 | stack[0]=iPivot; |
2763 | next[0]=mcstrt2[iPivot+1]+1; |
2764 | while (nStack) { |
2765 | int kPivot,j; |
2766 | /* take off stack */ |
2767 | kPivot=stack[--nStack]; |
2768 | if (nonzero[kPivot]!=1&&kPivot>jpiv&&kPivot<check) { |
2769 | j=next[nStack]; |
2770 | if (j>mcstrt2[kPivot]) { |
2771 | /* finished so mark */ |
2772 | list[nList++]=kPivot; |
2773 | nonzero[kPivot]=1; |
2774 | } else { |
2775 | kPivot=UNSHIFT_INDEX(hrowi[j]); |
2776 | /* put back on stack */ |
2777 | next[nStack++] ++; |
2778 | if (!nonzero[kPivot]) { |
2779 | /* and new one */ |
2780 | stack[nStack]=kPivot; |
2781 | nonzero[kPivot]=2; |
2782 | next[nStack++]=mcstrt2[kPivot+1]+1; |
2783 | } |
2784 | } |
2785 | } else if (kPivot>=check) { |
2786 | list[--kput]=kPivot; |
2787 | nonzero[kPivot]=1; |
2788 | } |
2789 | } |
2790 | } else if (nonzero[iPivot]!=1) { |
2791 | /* nothing to do (except check size at end) */ |
2792 | list[--kput]=iPivot; |
2793 | nonzero[iPivot]=1; |
2794 | } |
2795 | } |
2796 | for (k=nList-1;k>=0;k--) { |
2797 | double dv; |
2798 | iPivot = list[k]; |
2799 | dv = dwork1[iPivot]; |
2800 | nonzero[iPivot]=0; |
2801 | if (fabs(dv) > tolerance) { |
2802 | /* the same code as in c_ekkftj4p */ |
2803 | int kce1 = mcstrt2[iPivot + 1]; |
2804 | for (iel = mcstrt2[iPivot]; iel > kce1; --iel) { |
2805 | int irow0 = hrowi[iel]; |
2806 | SHIFT_REF(dwork1, irow0) += dv * dluval[iel]; |
2807 | } |
2808 | mpt[nput++]=iPivot; |
2809 | } else { |
2810 | dwork1[iPivot]=0.0; /* force to zero, not just near zero */ |
2811 | } |
2812 | } |
2813 | /* check remainder */ |
2814 | for (k=kput;k<nrow;k++) { |
2815 | iPivot = list[k]; |
2816 | nonzero[iPivot]=0; |
2817 | dv = dwork1[iPivot]; |
2818 | if (fabs(dv) > tolerance) { |
2819 | mpt[nput++]=iPivot; |
2820 | } else { |
2821 | dwork1[iPivot]=0.0; /* force to zero, not just near zero */ |
2822 | } |
2823 | } |
2824 | |
2825 | return (nput); |
2826 | } /* c_ekkftj4 */ |
2827 | /* |
2828 | * This applies the R transformations of the F-T LU update procedure, |
2829 | * equation 3.11 on p. 270 in the 1972 Math Programming paper. |
2830 | * Note that since the non-zero off-diagonal elements are in a row, |
2831 | * multiplying an R by a column is a reduction, not like applying |
2832 | * L or U. |
2833 | * |
2834 | * Note that this may introduce new non-zeros in dwork1, |
2835 | * since an hpivco entry may correspond to a zero element, |
2836 | * and that some non-zeros in dwork1 may be cancelled. |
2837 | */ |
2838 | static int c_ekkftjl_sparse3(const EKKfactinfo * COIN_RESTRICT2 fact, |
2839 | double * COIN_RESTRICT dwork1, |
2840 | int * COIN_RESTRICT mpt, |
2841 | int * COIN_RESTRICT hput, double * COIN_RESTRICT dluput , |
2842 | int nincol) |
2843 | { |
2844 | int i; |
2845 | int knext; |
2846 | int ipiv; |
2847 | double dv; |
2848 | const double * COIN_RESTRICT dluval = fact->R_etas_element+1; |
2849 | const int * COIN_RESTRICT hrowi = fact->R_etas_index+1; |
2850 | const int * COIN_RESTRICT mcstrt = fact->R_etas_start; |
2851 | int ndo=fact->nR_etas; |
2852 | double tolerance = fact->zeroTolerance; |
2853 | const int * COIN_RESTRICT hpivco = fact->hpivcoR; |
2854 | /* and make cleaner */ |
2855 | hput++; |
2856 | dluput++; |
2857 | |
2858 | /* DO ANY ROW TRANSFORMATIONS */ |
2859 | |
2860 | /* Function Body */ |
2861 | /* mpt has correct list of nonzeros */ |
2862 | if (ndo != 0) { |
2863 | knext = mcstrt[1]; |
2864 | for (i = 1; i <= ndo; ++i) { |
2865 | int k1 = knext; /* == mcstrt[i] */ |
2866 | int iel; |
2867 | ipiv = hpivco[i]; |
2868 | dv = dwork1[ipiv]; |
2869 | bool onList = (dv!=0.0); |
2870 | knext = mcstrt[i + 1]; |
2871 | |
2872 | for (iel = knext ; iel < k1; ++iel) { |
2873 | int irow = hrowi[iel]; |
2874 | dv += SHIFT_REF(dwork1, irow) * dluval[iel]; |
2875 | } |
2876 | /* (1) if dwork[ipiv] == 0.0, then this may add a non-zero. |
2877 | * (2) if dwork[ipiv] != 0.0, then this may cancel out a non-zero. |
2878 | */ |
2879 | if (onList) { |
2880 | if (fabs(dv) > tolerance) { |
2881 | dwork1[ipiv]=dv; |
2882 | } else { |
2883 | dwork1[ipiv] = 1.0e-128; |
2884 | } |
2885 | } else { |
2886 | if (fabs(dv) > tolerance) { |
2887 | /* put on list if not there */ |
2888 | mpt[nincol++]=ipiv; |
2889 | dwork1[ipiv]=dv; |
2890 | } |
2891 | } |
2892 | } |
2893 | } |
2894 | knext=0; |
2895 | for (i=0; i<nincol; i++) { |
2896 | ipiv=mpt[i]; |
2897 | dv=dwork1[ipiv]; |
2898 | if (fabs(dv) > tolerance) { |
2899 | hput[knext]=SHIFT_INDEX(ipiv); |
2900 | dluput[knext]=dv; |
2901 | mpt[knext++]=ipiv; |
2902 | } else { |
2903 | dwork1[ipiv]=0.0; |
2904 | } |
2905 | } |
2906 | return knext; |
2907 | } /* c_ekkftjl */ |
2908 | |
2909 | static int c_ekkftjl_sparse2(const EKKfactinfo * COIN_RESTRICT2 fact, |
2910 | double * COIN_RESTRICT dwork1, |
2911 | int * COIN_RESTRICT mpt, |
2912 | int nincol) |
2913 | { |
2914 | double tolerance = fact->zeroTolerance; |
2915 | const double * COIN_RESTRICT dluval = fact->R_etas_element+1; |
2916 | const int * COIN_RESTRICT hrowi = fact->R_etas_index+1; |
2917 | const int * COIN_RESTRICT mcstrt = fact->R_etas_start; |
2918 | int ndo=fact->nR_etas; |
2919 | const int * COIN_RESTRICT hpivco = fact->hpivcoR; |
2920 | int i; |
2921 | int knext; |
2922 | int ipiv; |
2923 | double dv; |
2924 | |
2925 | /* DO ANY ROW TRANSFORMATIONS */ |
2926 | |
2927 | /* Function Body */ |
2928 | /* mpt has correct list of nonzeros */ |
2929 | if (ndo != 0) { |
2930 | knext = mcstrt[1]; |
2931 | for (i = 1; i <= ndo; ++i) { |
2932 | int k1 = knext; /* == mcstrt[i] */ |
2933 | int iel; |
2934 | ipiv = hpivco[i]; |
2935 | dv = dwork1[ipiv]; |
2936 | bool onList = (dv!=0.0); |
2937 | knext = mcstrt[i + 1]; |
2938 | |
2939 | for (iel = knext ; iel < k1; ++iel) { |
2940 | int irow = hrowi[iel]; |
2941 | dv += SHIFT_REF(dwork1, irow) * dluval[iel]; |
2942 | } |
2943 | /* (1) if dwork[ipiv] == 0.0, then this may add a non-zero. |
2944 | * (2) if dwork[ipiv] != 0.0, then this may cancel out a non-zero. |
2945 | */ |
2946 | if (onList) { |
2947 | if (fabs(dv) > tolerance) { |
2948 | dwork1[ipiv]=dv; |
2949 | } else { |
2950 | dwork1[ipiv] = 1.0e-128; |
2951 | } |
2952 | } else { |
2953 | if (fabs(dv) > tolerance) { |
2954 | /* put on list if not there */ |
2955 | mpt[nincol++]=ipiv; |
2956 | dwork1[ipiv]=dv; |
2957 | } |
2958 | } |
2959 | } |
2960 | } |
2961 | knext=0; |
2962 | for (i=0; i<nincol; i++) { |
2963 | ipiv=mpt[i]; |
2964 | dv=dwork1[ipiv]; |
2965 | if (fabs(dv) > tolerance) { |
2966 | mpt[knext++]=ipiv; |
2967 | } else { |
2968 | dwork1[ipiv]=0.0; |
2969 | } |
2970 | } |
2971 | return knext; |
2972 | } /* c_ekkftjl */ |
2973 | |
2974 | static void c_ekkftjl(const EKKfactinfo * COIN_RESTRICT2 fact, |
2975 | double * COIN_RESTRICT dwork1) |
2976 | { |
2977 | double tolerance = fact->zeroTolerance; |
2978 | const double * COIN_RESTRICT dluval = fact->R_etas_element+1; |
2979 | const int * COIN_RESTRICT hrowi = fact->R_etas_index+1; |
2980 | const int * COIN_RESTRICT mcstrt = fact->R_etas_start; |
2981 | int ndo=fact->nR_etas; |
2982 | const int * COIN_RESTRICT hpivco = fact->hpivcoR; |
2983 | int i; |
2984 | int knext; |
2985 | |
2986 | /* DO ANY ROW TRANSFORMATIONS */ |
2987 | |
2988 | /* Function Body */ |
2989 | if (ndo != 0) { |
2990 | /* |
2991 | * The following three lines are here just to ensure that this |
2992 | * new formulation of the loop has exactly the same effect |
2993 | * as the original. |
2994 | */ |
2995 | { |
2996 | int ipiv = hpivco[1]; |
2997 | double dv = dwork1[ipiv]; |
2998 | dwork1[ipiv] = (fabs(dv) > tolerance) ? dv : 0.0; |
2999 | } |
3000 | |
3001 | knext = mcstrt[1]; |
3002 | for (i = 1; i <= ndo; ++i) { |
3003 | int k1 = knext; /* == mcstrt[i] */ |
3004 | int ipiv = hpivco[i]; |
3005 | double dv = dwork1[ipiv]; |
3006 | int iel; |
3007 | //#define UNROLL3 2 |
3008 | #ifndef UNROLL3 |
3009 | #if CLP_OSL==2||CLP_OSL==3 |
3010 | #define UNROLL3 2 |
3011 | #else |
3012 | #define UNROLL3 1 |
3013 | #endif |
3014 | #endif |
3015 | knext = mcstrt[i + 1]; |
3016 | |
3017 | #if UNROLL3<2 |
3018 | for (iel = knext ; iel < k1; ++iel) { |
3019 | int irow = hrowi[iel]; |
3020 | dv += SHIFT_REF(dwork1, irow) * dluval[iel]; |
3021 | } |
3022 | #else |
3023 | iel = knext; |
3024 | if (((k1-knext)&1)!=0) { |
3025 | int irow = hrowi[iel]; |
3026 | dv += SHIFT_REF(dwork1, irow) * dluval[iel]; |
3027 | iel++; |
3028 | } |
3029 | for ( ; iel < k1; iel+=2) { |
3030 | int irow0 = hrowi[iel]; |
3031 | double dval0 = dluval[iel]; |
3032 | int irow1 = hrowi[iel+1]; |
3033 | double dval1 = dluval[iel+1]; |
3034 | dv += SHIFT_REF(dwork1, irow0) * dval0; |
3035 | dv += SHIFT_REF(dwork1, irow1) * dval1; |
3036 | } |
3037 | #endif |
3038 | /* (1) if dwork[ipiv] == 0.0, then this may add a non-zero. |
3039 | * (2) if dwork[ipiv] != 0.0, then this may cancel out a non-zero. |
3040 | */ |
3041 | dwork1[ipiv] = (fabs(dv) > tolerance) ? dv : 0.0; |
3042 | } |
3043 | } |
3044 | } /* c_ekkftjl */ |
3045 | /* this assumes it is ok to reference back[loop_limit] */ |
3046 | /* another 3 seconds from a ~570 second run can be trimmed |
3047 | * by using two routines, one with scan==true and the other false, |
3048 | * since that eliminates the branch instructions involving them |
3049 | * entirely. This was how the code was originally written. |
3050 | * However, I'm still hoping that eventually we can use |
3051 | * C++ templates to do that for us automatically. |
3052 | */ |
3053 | static void |
3054 | c_ekkftjup_scan_aux(const EKKfactinfo * COIN_RESTRICT2 fact, |
3055 | double * COIN_RESTRICT dwork1, double * COIN_RESTRICT dworko , |
3056 | int loop_limit, int *ip, int ** mptp) |
3057 | { |
3058 | const double * COIN_RESTRICT dluval = fact->xeeadr+1; |
3059 | const int * COIN_RESTRICT hrowi = fact->xeradr+1; |
3060 | const int * COIN_RESTRICT mcstrt = fact->xcsadr; |
3061 | const int * COIN_RESTRICT hpivro = fact->krpadr; |
3062 | const int * COIN_RESTRICT back=fact->back; |
3063 | double tolerance = fact->zeroTolerance; |
3064 | int ipiv = *ip; |
3065 | double dv = dwork1[ipiv]; |
3066 | |
3067 | int * mptX = *mptp; |
3068 | assert (mptX); |
3069 | while (ipiv != loop_limit) { |
3070 | int next_ipiv = back[ipiv]; |
3071 | |
3072 | dwork1[ipiv] = 0.0; |
3073 | #ifndef UNROLL4 |
3074 | #define UNROLL4 2 |
3075 | #endif |
3076 | /* invariant: dv == dwork1[ipiv] */ |
3077 | |
3078 | /* in the case of world.mps with dual, this condition is true |
3079 | * only 20-60% of the time. */ |
3080 | if (fabs(dv) > tolerance) { |
3081 | const int kx = mcstrt[ipiv]; |
3082 | const int nel = hrowi[kx-1]; |
3083 | const double dpiv = dluval[kx-1]; |
3084 | #if UNROLL4>1 |
3085 | const int * hrowi2=hrowi+kx; |
3086 | const int * hrowi2end=hrowi2+nel; |
3087 | const double * dluval2=dluval+kx; |
3088 | #else |
3089 | int iel; |
3090 | #endif |
3091 | |
3092 | dv*=dpiv; |
3093 | |
3094 | /* |
3095 | * The following loop is the unrolled version of this: |
3096 | * |
3097 | * for (iel = kx+1; iel <= kx + nel; iel++) { |
3098 | * SHIFT_REF(dwork1, hrowi[iel]) -= dv * dluval[iel]; |
3099 | * } |
3100 | */ |
3101 | #if UNROLL4<2 |
3102 | iel = kx; |
3103 | if (nel&1) { |
3104 | int irow = hrowi[iel]; |
3105 | double dval=dluval[iel]; |
3106 | SHIFT_REF(dwork1, irow) -= dv*dval; |
3107 | iel++; |
3108 | } |
3109 | for (; iel < kx + nel; iel+=2) { |
3110 | int irow0 = hrowi[iel]; |
3111 | int irow1 = hrowi[iel+1]; |
3112 | double dval0=dluval[iel]; |
3113 | double dval1=dluval[iel+1]; |
3114 | double d0=SHIFT_REF(dwork1, irow0); |
3115 | double d1=SHIFT_REF(dwork1, irow1); |
3116 | |
3117 | d0-=dv*dval0; |
3118 | d1-=dv*dval1; |
3119 | SHIFT_REF(dwork1, irow0)=d0; |
3120 | SHIFT_REF(dwork1, irow1)=d1; |
3121 | } /* end loop */ |
3122 | #else |
3123 | if ((nel&1)!=0) { |
3124 | int irow = *hrowi2; |
3125 | double dval=*dluval2; |
3126 | SHIFT_REF(dwork1, irow) -= dv*dval; |
3127 | hrowi2++; |
3128 | dluval2++; |
3129 | } |
3130 | for (; hrowi2 < hrowi2end; hrowi2 +=2,dluval2 +=2) { |
3131 | int irow0 = hrowi2[0]; |
3132 | int irow1 = hrowi2[1]; |
3133 | double dval0=dluval2[0]; |
3134 | double dval1=dluval2[1]; |
3135 | double d0=SHIFT_REF(dwork1, irow0); |
3136 | double d1=SHIFT_REF(dwork1, irow1); |
3137 | |
3138 | d0-=dv*dval0; |
3139 | d1-=dv*dval1; |
3140 | SHIFT_REF(dwork1, irow0)=d0; |
3141 | SHIFT_REF(dwork1, irow1)=d1; |
3142 | } |
3143 | #endif |
3144 | /* put this down here so that dv is less likely to cause a stall */ |
3145 | if (fabs(dv) >= tolerance) { |
3146 | int iput=hpivro[ipiv]; |
3147 | dworko[iput]=dv; |
3148 | *mptX++=iput-1; |
3149 | } |
3150 | } |
3151 | |
3152 | dv = dwork1[next_ipiv]; |
3153 | ipiv=next_ipiv; |
3154 | } /* endwhile */ |
3155 | |
3156 | *mptp = mptX; |
3157 | *ip = ipiv; |
3158 | } |
3159 | static void c_ekkftjup_aux3(const EKKfactinfo * COIN_RESTRICT2 fact, |
3160 | double * COIN_RESTRICT dwork1, double * COIN_RESTRICT dworko, |
3161 | const int * COIN_RESTRICT back, |
3162 | const int * COIN_RESTRICT hpivro, |
3163 | int *ipivp, int loop_limit, |
3164 | int **mptXp) |
3165 | |
3166 | { |
3167 | double tolerance = fact->zeroTolerance; |
3168 | int ipiv = *ipivp; |
3169 | if (ipiv!=loop_limit) { |
3170 | int *mptX = *mptXp; |
3171 | |
3172 | double dv = dwork1[ipiv]; |
3173 | |
3174 | do { |
3175 | int next_ipiv = back[ipiv]; |
3176 | double next_dv=dwork1[next_ipiv]; |
3177 | |
3178 | dwork1[ipiv]=0.0; |
3179 | |
3180 | if (fabs(dv)>=tolerance) { |
3181 | int iput=hpivro[ipiv]; |
3182 | dworko[iput]=dv; |
3183 | *mptX++=iput-1; |
3184 | } |
3185 | |
3186 | ipiv = next_ipiv; |
3187 | dv = next_dv; |
3188 | } while (ipiv!=loop_limit); |
3189 | |
3190 | *mptXp = mptX; |
3191 | *ipivp = ipiv; |
3192 | } |
3193 | } |
3194 | static void c_ekkftju_dense(const double *dluval, |
3195 | const int * COIN_RESTRICT hrowi, |
3196 | const int * COIN_RESTRICT mcstrt, |
3197 | const int * COIN_RESTRICT back, |
3198 | double * COIN_RESTRICT dwork1, |
3199 | int * start, int last, |
3200 | int offset , double *densew) |
3201 | { |
3202 | int ipiv=*start; |
3203 | |
3204 | while (ipiv>last ) { |
3205 | const int ipiv1=ipiv; |
3206 | double dv1=dwork1[ipiv1]; |
3207 | ipiv=back[ipiv]; |
3208 | if (fabs(dv1) > 1.0e-14) { |
3209 | const int kx1 = mcstrt[ipiv1]; |
3210 | const int nel1 = hrowi[kx1-1]; |
3211 | const double dpiv1 = dluval[kx1-1]; |
3212 | |
3213 | int iel,k; |
3214 | const int n1=offset+ipiv1; /* number in dense part */ |
3215 | |
3216 | const int nsparse1=nel1-n1; |
3217 | const int k1=kx1+nsparse1; |
3218 | const double *dlu1=&dluval[k1]; |
3219 | |
3220 | int ipiv2=back[ipiv1]; |
3221 | const int nskip=ipiv1-ipiv2; |
3222 | |
3223 | dv1*=dpiv1; |
3224 | |
3225 | dwork1[ipiv1]=dv1; |
3226 | |
3227 | for (k = n1 - (nskip-1) -1; k >=0 ; k--) { |
3228 | const double dval = dv1*dlu1[k]; |
3229 | double dv2=densew[k]-dval; |
3230 | ipiv=back[ipiv]; |
3231 | if (fabs(dv2) > 1.0e-14) { |
3232 | const int kx2 = mcstrt[ipiv2]; |
3233 | const int nel2 = hrowi[kx2-1]; |
3234 | const double dpiv2 = dluval[kx2-1]; |
3235 | |
3236 | /* number in dense part is k */ |
3237 | const int nsparse2=nel2-k; |
3238 | |
3239 | const int k2=kx2+nsparse2; |
3240 | const double *dlu2=&dluval[k2]; |
3241 | |
3242 | dv2*=dpiv2; |
3243 | densew[k]=dv2; /* was dwork1[ipiv2]=dv2; */ |
3244 | |
3245 | k--; |
3246 | |
3247 | /* |
3248 | * The following loop is the unrolled version of: |
3249 | * |
3250 | * for (; k >= 0; k--) { |
3251 | * densew[k]-=dv1*dlu1[k]+dv2*dlu2[k]; |
3252 | * } |
3253 | */ |
3254 | if ((k&1)==0) { |
3255 | densew[k]-=dv1*dlu1[k]+dv2*dlu2[k]; |
3256 | k--; |
3257 | } |
3258 | for (; k >=0 ; k-=2) { |
3259 | double da,db; |
3260 | da=densew[k]; |
3261 | db=densew[k-1]; |
3262 | da-=dv1*dlu1[k]; |
3263 | db-=dv1*dlu1[k-1]; |
3264 | da-=dv2*dlu2[k]; |
3265 | db-=dv2*dlu2[k-1]; |
3266 | densew[k]=da; |
3267 | densew[k-1]=db; |
3268 | } |
3269 | /* end loop */ |
3270 | |
3271 | /* |
3272 | * The following loop is the unrolled version of: |
3273 | * |
3274 | * for (iel=kx2+nsparse2-1; iel >= kx2; iel--) { |
3275 | * SHIFT_REF(dwork1, hrowi[iel]) -= dv2*dluval[iel]; |
3276 | * } |
3277 | */ |
3278 | iel=kx2+nsparse2-1; |
3279 | if ((nsparse2&1)!=0) { |
3280 | int irow0 = hrowi[iel]; |
3281 | double dval=dluval[iel]; |
3282 | SHIFT_REF(dwork1,irow0) -= dv2*dval; |
3283 | iel--; |
3284 | } |
3285 | for (; iel >=kx2 ; iel-=2) { |
3286 | double dval0 = dluval[iel]; |
3287 | double dval1 = dluval[iel-1]; |
3288 | int irow0 = hrowi[iel]; |
3289 | int irow1 = hrowi[iel-1]; |
3290 | double d0 = SHIFT_REF(dwork1, irow0); |
3291 | double d1 = SHIFT_REF(dwork1, irow1); |
3292 | |
3293 | d0-=dv2*dval0; |
3294 | d1-=dv2*dval1; |
3295 | SHIFT_REF(dwork1, irow0) = d0; |
3296 | SHIFT_REF(dwork1, irow1) = d1; |
3297 | } |
3298 | /* end loop */ |
3299 | |
3300 | } else { |
3301 | densew[k]=0.0; |
3302 | /* skip if next deleted */ |
3303 | k-=ipiv2-ipiv-1; |
3304 | ipiv2=ipiv; |
3305 | if (ipiv<last) { |
3306 | k--; |
3307 | for (; k >=0 ; k--) { |
3308 | double dval; |
3309 | dval=dv1*dlu1[k]; |
3310 | densew[k]=densew[k]-dval; |
3311 | } |
3312 | } |
3313 | } |
3314 | } |
3315 | |
3316 | /* |
3317 | * The following loop is the unrolled version of: |
3318 | * |
3319 | * for (iel=kx1+nsparse1-1; iel >= kx1; iel--) { |
3320 | * SHIFT_REF(dwork1, hrowi[iel]) -= dv1*dluval[iel]; |
3321 | * } |
3322 | */ |
3323 | iel=kx1+nsparse1-1; |
3324 | if ((nsparse1&1)!=0) { |
3325 | int irow0 = hrowi[iel]; |
3326 | double dval=dluval[iel]; |
3327 | SHIFT_REF(dwork1, irow0) -= dv1*dval; |
3328 | iel--; |
3329 | } |
3330 | for (; iel >=kx1 ; iel-=2) { |
3331 | double dval0=dluval[iel]; |
3332 | double dval1=dluval[iel-1]; |
3333 | int irow0 = hrowi[iel]; |
3334 | int irow1 = hrowi[iel-1]; |
3335 | double d0=SHIFT_REF(dwork1, irow0); |
3336 | double d1=SHIFT_REF(dwork1, irow1); |
3337 | |
3338 | d0-=dv1*dval0; |
3339 | d1-=dv1*dval1; |
3340 | SHIFT_REF(dwork1, irow0) = d0; |
3341 | SHIFT_REF(dwork1, irow1) = d1; |
3342 | } |
3343 | /* end loop */ |
3344 | } else { |
3345 | dwork1[ipiv1]=0.0; |
3346 | } /* endif */ |
3347 | } /* endwhile */ |
3348 | *start=ipiv; |
3349 | } |
3350 | |
3351 | /* do not use return value if mpt==0 */ |
3352 | /* using dual, this is usually called via c_ekkftrn_ft, from c_ekksdul |
3353 | * (so mpt is non-null). |
3354 | * it is generally called every iteration, but sometimes several iterations |
3355 | * are skipped (null moves?). |
3356 | * |
3357 | * generally, back[i] == i-1 (initialized in c_ekkshfv towards the end). |
3358 | */ |
3359 | static int c_ekkftjup(const EKKfactinfo * COIN_RESTRICT2 fact, |
3360 | double * COIN_RESTRICT dwork1, int last, |
3361 | double * COIN_RESTRICT dworko , int * COIN_RESTRICT mpt) |
3362 | { |
3363 | const double * COIN_RESTRICT dluval = fact->xeeadr; |
3364 | const int * COIN_RESTRICT hrowi = fact->xeradr; |
3365 | const int * COIN_RESTRICT mcstrt = fact->xcsadr; |
3366 | const int * COIN_RESTRICT hpivro = fact->krpadr; |
3367 | double tolerance = fact->zeroTolerance; |
3368 | int ndenuc=fact->ndenuc; |
3369 | const int first_dense=fact->first_dense; |
3370 | const int last_dense=fact->last_dense; |
3371 | int i; |
3372 | int * mptX = mpt; |
3373 | |
3374 | const int nrow = fact->nrow; |
3375 | const int * COIN_RESTRICT back=fact->back; |
3376 | int ipiv=back[nrow+1]; |
3377 | |
3378 | if (last_dense>first_dense&&mcstrt[ipiv]>=mcstrt[last_dense]) { |
3379 | c_ekkftjup_scan_aux(fact, |
3380 | dwork1, dworko, last_dense, &ipiv, |
3381 | &mptX); |
3382 | |
3383 | { |
3384 | int j; |
3385 | int n=0; |
3386 | const int firstDense = nrow- ndenuc+1; |
3387 | double *densew = &dwork1[firstDense]; |
3388 | int offset; |
3389 | |
3390 | /* check first dense to see where in triangle it is */ |
3391 | int last=first_dense; |
3392 | const int k1=mcstrt[last]; |
3393 | const int k2=k1+hrowi[k1]; |
3394 | |
3395 | for (j=k2; j>k1; j--) { |
3396 | int irow = UNSHIFT_INDEX(hrowi[j]); |
3397 | if (irow<firstDense) { |
3398 | break; |
3399 | } else { |
3400 | #ifdef DEBUG |
3401 | if (irow!=last-1) { |
3402 | abort(); |
3403 | } |
3404 | #endif |
3405 | last=irow; |
3406 | n++; |
3407 | } |
3408 | } |
3409 | offset=n-first_dense; |
3410 | i=ipiv; |
3411 | /* loop counter i may be modified by this call */ |
3412 | c_ekkftju_dense(&dluval[1],&hrowi[1],mcstrt,back, |
3413 | dwork1, &i, first_dense,offset,densew); |
3414 | |
3415 | c_ekkftjup_aux3(fact,dwork1, dworko, back, hpivro, &ipiv, i, &mptX); |
3416 | } |
3417 | } |
3418 | |
3419 | c_ekkftjup_scan_aux(fact, |
3420 | dwork1, dworko, last, &ipiv, |
3421 | &mptX); |
3422 | |
3423 | if (ipiv!=0) { |
3424 | double dv = dwork1[ipiv]; |
3425 | |
3426 | do { |
3427 | int next_ipiv = back[ipiv]; |
3428 | double next_dv=dwork1[next_ipiv]; |
3429 | |
3430 | dwork1[ipiv]=0.0; |
3431 | |
3432 | if (fabs(dv)>=tolerance) { |
3433 | int iput=hpivro[ipiv]; |
3434 | dworko[iput]=-dv; |
3435 | *mptX++=iput-1; |
3436 | } |
3437 | |
3438 | ipiv = next_ipiv; |
3439 | dv = next_dv; |
3440 | } while (ipiv!=0); |
3441 | |
3442 | } |
3443 | return static_cast<int>(mptX-mpt); |
3444 | } |
3445 | /* this assumes it is ok to reference back[loop_limit] */ |
3446 | /* another 3 seconds from a ~570 second run can be trimmed |
3447 | * by using two routines, one with scan==true and the other false, |
3448 | * since that eliminates the branch instructions involving them |
3449 | * entirely. This was how the code was originally written. |
3450 | * However, I'm still hoping that eventually we can use |
3451 | * C++ templates to do that for us automatically. |
3452 | */ |
3453 | static void |
3454 | c_ekkftjup_scan_aux_pack(const EKKfactinfo * COIN_RESTRICT2 fact, |
3455 | double * COIN_RESTRICT dwork1, double * COIN_RESTRICT dworko , |
3456 | int loop_limit, int *ip, int ** mptp) |
3457 | { |
3458 | double tolerance = fact->zeroTolerance; |
3459 | const double *dluval = fact->xeeadr+1; |
3460 | const int *hrowi = fact->xeradr+1; |
3461 | const int *mcstrt = fact->xcsadr; |
3462 | const int *hpivro = fact->krpadr; |
3463 | const int * back=fact->back; |
3464 | int ipiv = *ip; |
3465 | double dv = dwork1[ipiv]; |
3466 | |
3467 | int * mptX = *mptp; |
3468 | #if 0 |
3469 | int inSlacks=0; |
3470 | int lastSlack; |
3471 | if (fact->numberSlacks!=0) |
3472 | lastSlack=fact->lastSlack; |
3473 | else |
3474 | lastSlack=0; |
3475 | if (c_ekk_IsSet(fact->bitArray,ipiv)) { |
3476 | printf("already in slacks - ipiv %d\n" ,ipiv); |
3477 | inSlacks=1; |
3478 | return; |
3479 | } |
3480 | #endif |
3481 | assert (mptX); |
3482 | while (ipiv != loop_limit) { |
3483 | int next_ipiv = back[ipiv]; |
3484 | #if 0 |
3485 | if (ipiv==lastSlack) { |
3486 | printf("now in slacks - ipiv %d\n" ,ipiv); |
3487 | inSlacks=1; |
3488 | break; |
3489 | } |
3490 | if (inSlacks) { |
3491 | assert (c_ekk_IsSet(fact->bitArray,ipiv)); |
3492 | assert (dluval[mcstrt[ipiv]-1]==-1.0); |
3493 | assert (hrowi[mcstrt[ipiv]-1]==0); |
3494 | } |
3495 | #endif |
3496 | dwork1[ipiv] = 0.0; |
3497 | /* invariant: dv == dwork1[ipiv] */ |
3498 | |
3499 | /* in the case of world.mps with dual, this condition is true |
3500 | * only 20-60% of the time. */ |
3501 | if (fabs(dv) > tolerance) { |
3502 | const int kx = mcstrt[ipiv]; |
3503 | const int nel = hrowi[kx-1]; |
3504 | const double dpiv = dluval[kx-1]; |
3505 | #ifndef UNROLL5 |
3506 | #define UNROLL5 2 |
3507 | #endif |
3508 | #if UNROLL5>1 |
3509 | const int * hrowi2=hrowi+kx; |
3510 | const int * hrowi2end=hrowi2+nel; |
3511 | const double * dluval2=dluval+kx; |
3512 | #else |
3513 | int iel; |
3514 | #endif |
3515 | |
3516 | dv*=dpiv; |
3517 | |
3518 | /* |
3519 | * The following loop is the unrolled version of this: |
3520 | * |
3521 | * for (iel = kx+1; iel <= kx + nel; iel++) { |
3522 | * SHIFT_REF(dwork1, hrowi[iel]) -= dv * dluval[iel]; |
3523 | * } |
3524 | */ |
3525 | #if UNROLL5<2 |
3526 | iel = kx; |
3527 | if (nel&1) { |
3528 | int irow = hrowi[iel]; |
3529 | double dval=dluval[iel]; |
3530 | SHIFT_REF(dwork1, irow) -= dv*dval; |
3531 | iel++; |
3532 | } |
3533 | for (; iel < kx + nel; iel+=2) { |
3534 | int irow0 = hrowi[iel]; |
3535 | int irow1 = hrowi[iel+1]; |
3536 | double dval0=dluval[iel]; |
3537 | double dval1=dluval[iel+1]; |
3538 | double d0=SHIFT_REF(dwork1, irow0); |
3539 | double d1=SHIFT_REF(dwork1, irow1); |
3540 | |
3541 | d0-=dv*dval0; |
3542 | d1-=dv*dval1; |
3543 | SHIFT_REF(dwork1, irow0)=d0; |
3544 | SHIFT_REF(dwork1, irow1)=d1; |
3545 | } /* end loop */ |
3546 | #else |
3547 | if ((nel&1)!=0) { |
3548 | int irow = *hrowi2; |
3549 | double dval=*dluval2; |
3550 | SHIFT_REF(dwork1, irow) -= dv*dval; |
3551 | hrowi2++; |
3552 | dluval2++; |
3553 | } |
3554 | for (; hrowi2 < hrowi2end; hrowi2 +=2,dluval2 +=2) { |
3555 | int irow0 = hrowi2[0]; |
3556 | int irow1 = hrowi2[1]; |
3557 | double dval0=dluval2[0]; |
3558 | double dval1=dluval2[1]; |
3559 | double d0=SHIFT_REF(dwork1, irow0); |
3560 | double d1=SHIFT_REF(dwork1, irow1); |
3561 | |
3562 | d0-=dv*dval0; |
3563 | d1-=dv*dval1; |
3564 | SHIFT_REF(dwork1, irow0)=d0; |
3565 | SHIFT_REF(dwork1, irow1)=d1; |
3566 | } |
3567 | #endif |
3568 | /* put this down here so that dv is less likely to cause a stall */ |
3569 | if (fabs(dv) >= tolerance) { |
3570 | int iput=hpivro[ipiv]; |
3571 | *dworko++=dv; |
3572 | *mptX++=iput-1; |
3573 | } |
3574 | } |
3575 | |
3576 | dv = dwork1[next_ipiv]; |
3577 | ipiv=next_ipiv; |
3578 | } /* endwhile */ |
3579 | |
3580 | *mptp = mptX; |
3581 | *ip = ipiv; |
3582 | } |
3583 | static void c_ekkftjup_aux3_pack(const EKKfactinfo * COIN_RESTRICT2 fact, |
3584 | double * COIN_RESTRICT dwork1, double * COIN_RESTRICT dworko, |
3585 | const int * COIN_RESTRICT back, |
3586 | const int * COIN_RESTRICT hpivro, |
3587 | int *ipivp, int loop_limit, |
3588 | int **mptXp) |
3589 | |
3590 | { |
3591 | double tolerance = fact->zeroTolerance; |
3592 | int ipiv = *ipivp; |
3593 | if (ipiv!=loop_limit) { |
3594 | int *mptX = *mptXp; |
3595 | |
3596 | double dv = dwork1[ipiv]; |
3597 | do { |
3598 | int next_ipiv = back[ipiv]; |
3599 | double next_dv=dwork1[next_ipiv]; |
3600 | |
3601 | dwork1[ipiv]=0.0; |
3602 | |
3603 | if (fabs(dv)>=tolerance) { |
3604 | int iput=hpivro[ipiv]; |
3605 | *dworko++=dv; |
3606 | *mptX++=iput-1; |
3607 | } |
3608 | |
3609 | ipiv = next_ipiv; |
3610 | dv = next_dv; |
3611 | } while (ipiv!=loop_limit); |
3612 | |
3613 | *mptXp = mptX; |
3614 | |
3615 | *ipivp = ipiv; |
3616 | } |
3617 | } |
3618 | |
3619 | /* do not use return value if mpt==0 */ |
3620 | /* using dual, this is usually called via c_ekkftrn_ft, from c_ekksdul |
3621 | * (so mpt is non-null). |
3622 | * it is generally called every iteration, but sometimes several iterations |
3623 | * are skipped (null moves?). |
3624 | * |
3625 | * generally, back[i] == i-1 (initialized in c_ekkshfv towards the end). |
3626 | */ |
3627 | static int c_ekkftjup_pack(const EKKfactinfo * COIN_RESTRICT2 fact, |
3628 | double * COIN_RESTRICT dwork1, int last, |
3629 | double * COIN_RESTRICT dworko , int * COIN_RESTRICT mpt) |
3630 | { |
3631 | const double * COIN_RESTRICT dluval = fact->xeeadr; |
3632 | const int * COIN_RESTRICT hrowi = fact->xeradr; |
3633 | const int * COIN_RESTRICT mcstrt = fact->xcsadr; |
3634 | const int * COIN_RESTRICT hpivro = fact->krpadr; |
3635 | double tolerance = fact->zeroTolerance; |
3636 | int ndenuc=fact->ndenuc; |
3637 | const int first_dense=fact->first_dense; |
3638 | const int last_dense=fact->last_dense; |
3639 | int * mptX = mpt; int * mptY = mpt; |
3640 | |
3641 | const int nrow = fact->nrow; |
3642 | const int * COIN_RESTRICT back=fact->back; |
3643 | int ipiv=back[nrow+1]; |
3644 | assert (mpt); |
3645 | |
3646 | if (last_dense>first_dense&&mcstrt[ipiv]>=mcstrt[last_dense]) { |
3647 | c_ekkftjup_scan_aux_pack(fact, |
3648 | dwork1, dworko, last_dense, &ipiv, |
3649 | &mptX ); |
3650 | /* adjust */ |
3651 | dworko+= (mptX-mpt); |
3652 | mpt=mptX; |
3653 | { |
3654 | int j; |
3655 | int n=0; |
3656 | const int firstDense = nrow- ndenuc+1; |
3657 | double *densew = &dwork1[firstDense]; |
3658 | int offset; |
3659 | |
3660 | /* check first dense to see where in triangle it is */ |
3661 | int last=first_dense; |
3662 | const int k1=mcstrt[last]; |
3663 | const int k2=k1+hrowi[k1]; |
3664 | |
3665 | for (j=k2; j>k1; j--) { |
3666 | int irow = UNSHIFT_INDEX(hrowi[j]); |
3667 | if (irow<firstDense) { |
3668 | break; |
3669 | } else { |
3670 | #ifdef DEBUG |
3671 | if (irow!=last-1) { |
3672 | abort(); |
3673 | } |
3674 | #endif |
3675 | last=irow; |
3676 | n++; |
3677 | } |
3678 | } |
3679 | offset=n-first_dense; |
3680 | int ipiv2=ipiv; |
3681 | /* loop counter i may be modified by this call */ |
3682 | c_ekkftju_dense(&dluval[1],&hrowi[1],mcstrt,back, |
3683 | dwork1, &ipiv2, first_dense,offset,densew); |
3684 | |
3685 | c_ekkftjup_aux3_pack(fact,dwork1, dworko, back, hpivro, &ipiv, ipiv2,&mptX); |
3686 | /* adjust dworko */ |
3687 | dworko += (mptX-mpt); |
3688 | mpt=mptX; |
3689 | } |
3690 | } |
3691 | |
3692 | c_ekkftjup_scan_aux_pack(fact, |
3693 | dwork1, dworko, last, &ipiv, |
3694 | &mptX ); |
3695 | /* adjust dworko */ |
3696 | dworko += (mptX-mpt); |
3697 | while (ipiv!=0) { |
3698 | double dv = dwork1[ipiv]; |
3699 | int next_ipiv = back[ipiv]; |
3700 | |
3701 | dwork1[ipiv]=0.0; |
3702 | |
3703 | if (fabs(dv)>=tolerance) { |
3704 | int iput=hpivro[ipiv]; |
3705 | *dworko++=-dv; |
3706 | *mptX++=iput-1; |
3707 | } |
3708 | |
3709 | ipiv = next_ipiv; |
3710 | } |
3711 | |
3712 | return static_cast<int>(mptX-mptY); |
3713 | } |
3714 | static int c_ekkftju_sparse_a(const EKKfactinfo * COIN_RESTRICT2 fact, |
3715 | int * COIN_RESTRICT mpt, |
3716 | int nincol,int * COIN_RESTRICT spare) |
3717 | { |
3718 | const int * COIN_RESTRICT hrowi = fact->xeradr+1; |
3719 | const int * COIN_RESTRICT mcstrt = fact->xcsadr; |
3720 | const int nrow = fact->nrow; |
3721 | char * COIN_RESTRICT nonzero=fact->nonzero; |
3722 | |
3723 | int k,nStack,kx,nel; |
3724 | int nList=0; |
3725 | int iPivot; |
3726 | /*int kkk=nincol;*/ |
3727 | int * COIN_RESTRICT list = spare; |
3728 | int * COIN_RESTRICT stack = spare+nrow; |
3729 | int * COIN_RESTRICT next = stack+nrow; |
3730 | for (k=0;k<nincol;k++) { |
3731 | nStack=1; |
3732 | iPivot=mpt[k]; |
3733 | stack[0]=iPivot; |
3734 | next[0]=0; |
3735 | while (nStack) { |
3736 | int kPivot,j; |
3737 | /* take off stack */ |
3738 | kPivot=stack[--nStack]; |
3739 | if (nonzero[kPivot]!=1) { |
3740 | kx = mcstrt[kPivot]; |
3741 | nel = hrowi[kx-1]; |
3742 | j=next[nStack]; |
3743 | if (j==nel) { |
3744 | /* finished so mark */ |
3745 | list[nList++]=kPivot; |
3746 | nonzero[kPivot]=1; |
3747 | } else { |
3748 | kPivot=hrowi[kx+j]; |
3749 | /* put back on stack */ |
3750 | next[nStack++] ++; |
3751 | if (!nonzero[kPivot]) { |
3752 | /* and new one */ |
3753 | stack[nStack]=kPivot; |
3754 | nonzero[kPivot]=2; |
3755 | next[nStack++]=0; |
3756 | /*kkk++;*/ |
3757 | } |
3758 | } |
3759 | } |
3760 | } |
3761 | } |
3762 | return (nList); |
3763 | } |
3764 | static int c_ekkftju_sparse_b(const EKKfactinfo * COIN_RESTRICT2 fact, |
3765 | double * COIN_RESTRICT dwork1, |
3766 | double * COIN_RESTRICT dworko , int * COIN_RESTRICT mpt, |
3767 | int nList,int * COIN_RESTRICT spare) |
3768 | { |
3769 | |
3770 | const double * COIN_RESTRICT dluval = fact->xeeadr+1; |
3771 | const int * COIN_RESTRICT hrowi = fact->xeradr+1; |
3772 | const int * COIN_RESTRICT mcstrt = fact->xcsadr; |
3773 | const int * COIN_RESTRICT hpivro = fact->krpadr; |
3774 | double tolerance = fact->zeroTolerance; |
3775 | char * COIN_RESTRICT nonzero=fact->nonzero; |
3776 | int i,k,kx,nel; |
3777 | int iPivot; |
3778 | /*int kkk=nincol;*/ |
3779 | int * COIN_RESTRICT list = spare; |
3780 | i=nList-1; |
3781 | nList=0; |
3782 | for (;i>=0;i--) { |
3783 | double dpiv; |
3784 | double dv; |
3785 | iPivot = list[i]; |
3786 | /*printf("pivot %d %d\n",i,iPivot);*/ |
3787 | dv=dwork1[iPivot]; |
3788 | kx = mcstrt[iPivot]; |
3789 | nel = hrowi[kx-1]; |
3790 | dwork1[iPivot]=0.0; |
3791 | dpiv = dluval[kx-1]; |
3792 | dv*=dpiv; |
3793 | nonzero[iPivot]=0; |
3794 | iPivot=hpivro[iPivot]; |
3795 | if (fabs(dv)>=tolerance) { |
3796 | *dworko++=dv; |
3797 | mpt[nList++]=iPivot-1; |
3798 | for (k = kx; k < kx+nel; k++) { |
3799 | double dval; |
3800 | double dd; |
3801 | int irow = hrowi[k]; |
3802 | dval=dluval[k]; |
3803 | dd=dwork1[irow]; |
3804 | dd-=dv*dval; |
3805 | dwork1[irow]=dd; |
3806 | } |
3807 | } |
3808 | } |
3809 | return (nList); |
3810 | } |
3811 | /* dwork1 = (B^-1)dwork1; |
3812 | * I think dpermu[1..nrow+1] is zeroed on exit (?) |
3813 | * I don't think it is expected to have any particular value on entry (?) |
3814 | */ |
3815 | int c_ekkftrn(const EKKfactinfo * COIN_RESTRICT2 fact, |
3816 | double * COIN_RESTRICT dwork1, |
3817 | double * COIN_RESTRICT dpermu, int * COIN_RESTRICT mpt,int numberNonZero) |
3818 | { |
3819 | const int * COIN_RESTRICT mpermu = fact->mpermu; |
3820 | int lastNonZero; |
3821 | int firstNonZero = c_ekkshfpi_list2(mpermu+1, dwork1+1, dpermu, mpt, |
3822 | numberNonZero,&lastNonZero); |
3823 | if (fact->nnentl&&lastNonZero>=fact->firstLRow) { |
3824 | /* dpermu = (L^-1)dpermu */ |
3825 | c_ekkftj4p(fact, dpermu, firstNonZero); |
3826 | } |
3827 | |
3828 | |
3829 | int lastSlack; |
3830 | |
3831 | /* dpermu = (R^-1) dpermu */ |
3832 | c_ekkftjl(fact, dpermu); |
3833 | |
3834 | assert (fact->numberSlacks!=0||!fact->lastSlack); |
3835 | lastSlack=fact->lastSlack; |
3836 | |
3837 | /* dwork1 = (U^-1)dpermu; dpermu zeroed (?) */ |
3838 | return c_ekkftjup(fact, |
3839 | dpermu, lastSlack, dwork1, mpt); |
3840 | |
3841 | } /* c_ekkftrn */ |
3842 | |
3843 | int c_ekkftrn_ft(EKKfactinfo * COIN_RESTRICT2 fact, |
3844 | double * COIN_RESTRICT dwork1_ft, int * COIN_RESTRICT mpt_ft, int *nincolp_ft) |
3845 | { |
3846 | double * COIN_RESTRICT dpermu_ft = fact->kadrpm; |
3847 | int * COIN_RESTRICT spare = reinterpret_cast<int *>(fact->kp1adr); |
3848 | int nincol = *nincolp_ft; |
3849 | |
3850 | int nuspik; |
3851 | double * COIN_RESTRICT dluvalPut = fact->xeeadr+fact->nnentu+1; |
3852 | int * COIN_RESTRICT hrowiPut = fact->xeradr+fact->nnentu+1; |
3853 | |
3854 | const int nrow = fact->nrow; |
3855 | /* mpermu contains the permutation */ |
3856 | const int * COIN_RESTRICT mpermu=fact->mpermu; |
3857 | |
3858 | int lastSlack; |
3859 | |
3860 | int kdnspt = fact->nnetas - fact->nnentl; |
3861 | bool isRoom = (fact->nnentu + (nrow << 1) < (kdnspt - 2) |
3862 | + fact->R_etas_start[fact->nR_etas + 1]); |
3863 | |
3864 | /* say F-T will be sorted */ |
3865 | fact->sortedEta=1; |
3866 | |
3867 | assert (fact->numberSlacks!=0||!fact->lastSlack); |
3868 | lastSlack=fact->lastSlack; |
3869 | #ifdef CLP_REUSE_ETAS |
3870 | bool skipStuff = (fact->reintro>=0); |
3871 | |
3872 | int save_nR_etas=fact->nR_etas; |
3873 | int * save_hpivcoR=fact->hpivcoR; |
3874 | int * save_R_etas_start=fact->R_etas_start; |
3875 | if (skipStuff) { |
3876 | // just move |
3877 | int * putSeq = fact->xrsadr+2*fact->nrowmx+2; |
3878 | int * position = putSeq+fact->maxinv; |
3879 | int * putStart = position+fact->maxinv; |
3880 | memset(dwork1_ft,0,nincol*sizeof(double)); |
3881 | int iPiv=fact->reintro; |
3882 | int start=putStart[iPiv]&0x7fffffff; |
3883 | int end=putStart[iPiv+1]&0x7fffffff; |
3884 | double * COIN_RESTRICT dluval = fact->xeeadr; |
3885 | int * COIN_RESTRICT hrowi = fact->xeradr; |
3886 | double dValue; |
3887 | if (fact->reintro<fact->nrow) { |
3888 | iPiv++; |
3889 | dValue=1.0/dluval[start++]; |
3890 | } else { |
3891 | iPiv=hrowi[--end]; |
3892 | dValue=dluval[end]; |
3893 | start++; |
3894 | int ndoSkip=0; |
3895 | for (int i=fact->nrow;i<fact->reintro;i++) { |
3896 | if ((putStart[i]&0x80000000)==0) |
3897 | ndoSkip++; |
3898 | } |
3899 | fact->nR_etas-=ndoSkip; |
3900 | fact->hpivcoR+=ndoSkip; |
3901 | fact->R_etas_start+=ndoSkip; |
3902 | } |
3903 | dpermu_ft[iPiv]=dValue; |
3904 | if (fact->if_sparse_update>0 && DENSE_THRESHOLD<nrow) { |
3905 | nincol=0; |
3906 | if (dValue) |
3907 | mpt_ft[nincol++]=iPiv; |
3908 | for (int i=start;i<end;i++) { |
3909 | int iRow=hrowi[i]; |
3910 | dpermu_ft[iRow]=dluval[i]; |
3911 | mpt_ft[nincol++]=iRow; |
3912 | } |
3913 | } else { |
3914 | for (int i=start;i<end;i++) { |
3915 | int iRow=hrowi[i]; |
3916 | dpermu_ft[iRow]=dluval[i]; |
3917 | } |
3918 | } |
3919 | } |
3920 | #else |
3921 | bool skipStuff = false; |
3922 | #endif |
3923 | if (fact->if_sparse_update>0 && DENSE_THRESHOLD<nrow) { |
3924 | if (!skipStuff) { |
3925 | /* iterating so c_ekkgtcl will have list */ |
3926 | /* in order for this to make sense, nonzero[1..nrow] must already be zeroed */ |
3927 | c_ekkshfpi_list3(mpermu+1, dwork1_ft, dpermu_ft, mpt_ft, nincol); |
3928 | |
3929 | /* it may be the case that the basis was entirely upper-triangular */ |
3930 | if (fact->nnentl) { |
3931 | nincol = |
3932 | c_ekkftj4_sparse(fact, |
3933 | dpermu_ft, mpt_ft, |
3934 | nincol,spare); |
3935 | } |
3936 | } |
3937 | /* DO ROW ETAS IN L */ |
3938 | if (isRoom) { |
3939 | ++fact->nnentu; |
3940 | nincol= |
3941 | c_ekkftjl_sparse3(fact, |
3942 | dpermu_ft, |
3943 | mpt_ft, hrowiPut, |
3944 | dluvalPut,nincol); |
3945 | nuspik = nincol; |
3946 | /* temporary */ |
3947 | /* say not sorted */ |
3948 | fact->sortedEta=0; |
3949 | } else { |
3950 | /* no room */ |
3951 | nuspik=-3; |
3952 | nincol= |
3953 | c_ekkftjl_sparse2(fact, |
3954 | dpermu_ft, |
3955 | mpt_ft, nincol); |
3956 | } |
3957 | /* DO U */ |
3958 | if (DENSE_THRESHOLD>nrow-fact->numberSlacks) { |
3959 | nincol = c_ekkftjup_pack(fact, |
3960 | dpermu_ft,lastSlack, dwork1_ft, |
3961 | mpt_ft); |
3962 | } else { |
3963 | nincol= c_ekkftju_sparse_a(fact, |
3964 | mpt_ft, |
3965 | nincol, spare); |
3966 | nincol = c_ekkftju_sparse_b(fact, |
3967 | dpermu_ft, |
3968 | dwork1_ft , mpt_ft, |
3969 | nincol, spare); |
3970 | } |
3971 | } else { |
3972 | if (!skipStuff) { |
3973 | int lastNonZero; |
3974 | int firstNonZero = c_ekkshfpi_list(mpermu+1, dwork1_ft, dpermu_ft, |
3975 | mpt_ft, nincol,&lastNonZero); |
3976 | if (fact->nnentl&&lastNonZero>=fact->firstLRow) { |
3977 | /* dpermu_ft = (L^-1)dpermu_ft */ |
3978 | c_ekkftj4p(fact, dpermu_ft, firstNonZero); |
3979 | } |
3980 | } |
3981 | |
3982 | /* dpermu_ft = (R^-1) dpermu_ft */ |
3983 | c_ekkftjl(fact, dpermu_ft); |
3984 | |
3985 | if (isRoom) { |
3986 | |
3987 | /* fake start to allow room for pivot */ |
3988 | /* dluval[fact->nnentu...] = non-zeros of dpermu_ft; |
3989 | * hrowi[fact->nnentu..] = indices of these non-zeros; |
3990 | * near-zeros in dluval flattened |
3991 | */ |
3992 | ++fact->nnentu; |
3993 | nincol= c_ekkscmv(fact,fact->nrow, dpermu_ft, hrowiPut, |
3994 | dluvalPut); |
3995 | |
3996 | /* |
3997 | * note that this is not the value of nincol determined by c_ekkftjup. |
3998 | * For Forrest-Tomlin update we want vector before U |
3999 | * this vector will replace one in U |
4000 | */ |
4001 | nuspik = nincol; |
4002 | } else { |
4003 | /* no room */ |
4004 | nuspik = -3; |
4005 | } |
4006 | |
4007 | /* dwork1_ft = (U^-1)dpermu_ft; dpermu_ft zeroed (?) */ |
4008 | nincol = c_ekkftjup_pack(fact, |
4009 | dpermu_ft, lastSlack, dwork1_ft, mpt_ft); |
4010 | } |
4011 | #ifdef CLP_REUSE_ETAS |
4012 | fact->nR_etas=save_nR_etas; |
4013 | fact->hpivcoR=save_hpivcoR; |
4014 | fact->R_etas_start=save_R_etas_start; |
4015 | #endif |
4016 | |
4017 | *nincolp_ft = nincol; |
4018 | return (nuspik); |
4019 | } /* c_ekkftrn */ |
4020 | void c_ekkftrn2(EKKfactinfo * COIN_RESTRICT2 fact, double * COIN_RESTRICT dwork1, |
4021 | double * COIN_RESTRICT dpermu1,int * COIN_RESTRICT mpt1, int *nincolp, |
4022 | double * COIN_RESTRICT dwork1_ft, int * COIN_RESTRICT mpt_ft, int *nincolp_ft) |
4023 | { |
4024 | double * COIN_RESTRICT dluvalPut = fact->xeeadr+fact->nnentu+1; |
4025 | int * COIN_RESTRICT hrowiPut = fact->xeradr+fact->nnentu+1; |
4026 | |
4027 | const int nrow = fact->nrow; |
4028 | /* mpermu contains the permutation */ |
4029 | const int * COIN_RESTRICT mpermu=fact->mpermu; |
4030 | |
4031 | int lastSlack; |
4032 | assert (fact->numberSlacks!=0||!fact->lastSlack); |
4033 | lastSlack=fact->lastSlack; |
4034 | |
4035 | int nincol = *nincolp_ft; |
4036 | |
4037 | /* using dwork1 instead double *dpermu_ft = fact->kadrpm; */ |
4038 | int * spare = reinterpret_cast<int *>(fact->kp1adr); |
4039 | |
4040 | int kdnspt = fact->nnetas - fact->nnentl; |
4041 | bool isRoom = (fact->nnentu + (nrow << 1) < (kdnspt - 2) |
4042 | + fact->R_etas_start[fact->nR_etas + 1]); |
4043 | /* say F-T will be sorted */ |
4044 | fact->sortedEta=1; |
4045 | int lastNonZero; |
4046 | int firstNonZero = c_ekkshfpi_list2(mpermu+1, dwork1+1, dpermu1, |
4047 | mpt1, *nincolp,&lastNonZero); |
4048 | if (fact->nnentl&&lastNonZero>=fact->firstLRow) { |
4049 | /* dpermu1 = (L^-1)dpermu1 */ |
4050 | c_ekkftj4p(fact, dpermu1, firstNonZero); |
4051 | } |
4052 | |
4053 | #ifdef CLP_REUSE_ETAS |
4054 | bool skipStuff = (fact->reintro>=0); |
4055 | int save_nR_etas=fact->nR_etas; |
4056 | int * save_hpivcoR=fact->hpivcoR; |
4057 | int * save_R_etas_start=fact->R_etas_start; |
4058 | if (skipStuff) { |
4059 | // just move |
4060 | int * putSeq = fact->xrsadr+2*fact->nrowmx+2; |
4061 | int * position = putSeq+fact->maxinv; |
4062 | int * putStart = position+fact->maxinv; |
4063 | memset(dwork1_ft,0,nincol*sizeof(double)); |
4064 | int iPiv=fact->reintro; |
4065 | int start=putStart[iPiv]&0x7fffffff; |
4066 | int end=putStart[iPiv+1]&0x7fffffff; |
4067 | double * COIN_RESTRICT dluval = fact->xeeadr; |
4068 | int * COIN_RESTRICT hrowi = fact->xeradr; |
4069 | double dValue; |
4070 | if (fact->reintro<fact->nrow) { |
4071 | iPiv++; |
4072 | dValue=1.0/dluval[start++]; |
4073 | } else { |
4074 | iPiv=hrowi[--end]; |
4075 | dValue=dluval[end]; |
4076 | start++; |
4077 | int ndoSkip=0; |
4078 | for (int i=fact->nrow;i<fact->reintro;i++) { |
4079 | if ((putStart[i]&0x80000000)==0) |
4080 | ndoSkip++; |
4081 | } |
4082 | fact->nR_etas-=ndoSkip; |
4083 | fact->hpivcoR+=ndoSkip; |
4084 | fact->R_etas_start+=ndoSkip; |
4085 | } |
4086 | dwork1[iPiv]=dValue; |
4087 | if (fact->if_sparse_update>0 && DENSE_THRESHOLD<nrow) { |
4088 | nincol=0; |
4089 | if (dValue) |
4090 | mpt_ft[nincol++]=iPiv; |
4091 | for (int i=start;i<end;i++) { |
4092 | int iRow=hrowi[i]; |
4093 | dwork1[iRow]=dluval[i]; |
4094 | mpt_ft[nincol++]=iRow; |
4095 | } |
4096 | } else { |
4097 | for (int i=start;i<end;i++) { |
4098 | int iRow=hrowi[i]; |
4099 | dwork1[iRow]=dluval[i]; |
4100 | } |
4101 | } |
4102 | } |
4103 | #else |
4104 | bool skipStuff = false; |
4105 | #endif |
4106 | if (fact->if_sparse_update>0 && DENSE_THRESHOLD<nrow) { |
4107 | if (!skipStuff) { |
4108 | /* iterating so c_ekkgtcl will have list */ |
4109 | /* in order for this to make sense, nonzero[1..nrow] must already be zeroed */ |
4110 | c_ekkshfpi_list3(mpermu+1, dwork1_ft, dwork1, mpt_ft, nincol); |
4111 | |
4112 | /* it may be the case that the basis was entirely upper-triangular */ |
4113 | if (fact->nnentl) { |
4114 | nincol = |
4115 | c_ekkftj4_sparse(fact, |
4116 | dwork1, mpt_ft, |
4117 | nincol,spare); |
4118 | } |
4119 | } |
4120 | /* DO ROW ETAS IN L */ |
4121 | if (isRoom) { |
4122 | ++fact->nnentu; |
4123 | nincol= |
4124 | c_ekkftjl_sparse3(fact, |
4125 | dwork1, |
4126 | mpt_ft, hrowiPut, |
4127 | dluvalPut, |
4128 | nincol); |
4129 | fact->nuspike = nincol; |
4130 | /* say not sorted */ |
4131 | fact->sortedEta=0; |
4132 | } else { |
4133 | /* no room */ |
4134 | fact->nuspike=-3; |
4135 | nincol= |
4136 | c_ekkftjl_sparse2(fact, |
4137 | dwork1, |
4138 | mpt_ft, nincol); |
4139 | } |
4140 | } else { |
4141 | if (!skipStuff) { |
4142 | int lastNonZero; |
4143 | int firstNonZero = c_ekkshfpi_list(mpermu+1, dwork1_ft, dwork1, |
4144 | mpt_ft, nincol,&lastNonZero); |
4145 | if (fact->nnentl&&lastNonZero>=fact->firstLRow) { |
4146 | /* dpermu_ft = (L^-1)dpermu_ft */ |
4147 | c_ekkftj4p(fact, dwork1, firstNonZero); |
4148 | } |
4149 | } |
4150 | c_ekkftjl(fact, dwork1); |
4151 | |
4152 | if (isRoom) { |
4153 | |
4154 | /* fake start to allow room for pivot */ |
4155 | /* dluval[fact->nnentu...] = non-zeros of dpermu_ft; |
4156 | * hrowi[fact->nnentu..] = indices of these non-zeros; |
4157 | * near-zeros in dluval flattened |
4158 | */ |
4159 | ++fact->nnentu; |
4160 | nincol= c_ekkscmv(fact,fact->nrow, dwork1, |
4161 | hrowiPut, |
4162 | dluvalPut); |
4163 | |
4164 | /* |
4165 | * note that this is not the value of nincol determined by c_ekkftjup. |
4166 | * For Forrest-Tomlin update we want vector before U |
4167 | * this vector will replace one in U |
4168 | */ |
4169 | fact->nuspike = nincol; |
4170 | } else { |
4171 | /* no room */ |
4172 | fact->nuspike = -3; |
4173 | } |
4174 | } |
4175 | #ifdef CLP_REUSE_ETAS |
4176 | fact->nR_etas=save_nR_etas; |
4177 | fact->hpivcoR=save_hpivcoR; |
4178 | fact->R_etas_start=save_R_etas_start; |
4179 | #endif |
4180 | |
4181 | |
4182 | /* dpermu1 = (R^-1) dpermu1 */ |
4183 | c_ekkftjl(fact, dpermu1); |
4184 | |
4185 | /* DO U */ |
4186 | if (fact->if_sparse_update<=0 || DENSE_THRESHOLD>nrow-fact->numberSlacks) { |
4187 | nincol = c_ekkftjup_pack(fact, |
4188 | dwork1,lastSlack, dwork1_ft, mpt_ft); |
4189 | } else { |
4190 | nincol= c_ekkftju_sparse_a(fact, |
4191 | mpt_ft, |
4192 | nincol, spare); |
4193 | nincol = c_ekkftju_sparse_b(fact, |
4194 | dwork1, |
4195 | dwork1_ft , mpt_ft, |
4196 | nincol, spare); |
4197 | } |
4198 | *nincolp_ft = nincol; |
4199 | /* dwork1 = (U^-1)dpermu1; dpermu1 zeroed (?) */ |
4200 | *nincolp = c_ekkftjup(fact, |
4201 | dpermu1,lastSlack, dwork1, mpt1); |
4202 | |
4203 | } |
4204 | |