1// [Blend2D]
2// 2D Vector Graphics Powered by a JIT Compiler.
3//
4// [License]
5// Zlib - See LICENSE.md file in the package.
6
7#include "./blapi-build_p.h"
8#include "./blarrayops_p.h"
9#include "./blmath_p.h"
10#include "./blsupport_p.h"
11
12// ============================================================================
13// [CubicRoots]
14// ============================================================================
15
16// Ax^3 + Bx^2 + Cx + D = 0.
17//
18// Roots3And4.c: Graphics Gems, original author Jochen Schwarze (schwarze@isa.de).
19// See also the wiki article at http://en.wikipedia.org/wiki/Cubic_function for
20// other equations.
21size_t blCubicRoots(double* dst, const double* poly, double tMin, double tMax) noexcept {
22 constexpr double k1Div3 = 1.0 / 3.0;
23 constexpr double k1Div9 = 1.0 / 9.0;
24 constexpr double k2Div27 = 2.0 / 27.0;
25
26 size_t nRoots = 0;
27 double norm = poly[0];
28 double a = poly[1];
29 double b = poly[2];
30 double c = poly[3];
31
32 if (norm == 0.0)
33 return blQuadRoots(dst, a, b, c, tMin, tMax);
34
35 // Convert to a normalized form `x^3 + Ax^2 + Bx + C == 0`.
36 a /= norm;
37 b /= norm;
38 c /= norm;
39
40 // Substitute x = y - A/3 to eliminate quadric term `x^3 + px + q = 0`.
41 double sa = a * a;
42 double p = -k1Div9 * sa + k1Div3 * b;
43 double q = (k2Div27 * sa - k1Div3 * b) * 0.5 * a + c;
44
45 // Use Cardano's formula.
46 double p3 = p * p * p;
47 double d = q * q + p3;
48
49 // Resubstitution constant.
50 double sub = -k1Div3 * a;
51
52 if (isNearZero(d)) {
53 // One triple solution.
54 if (isNearZero(q)) {
55 dst[0] = sub;
56 return size_t(sub >= tMin && sub <= tMax);
57 }
58
59 // One single and one double solution.
60 double u = blCbrt(-q);
61 nRoots = 2;
62
63 dst[0] = sub + 2.0 * u;
64 dst[1] = sub - u;
65
66 // Sort.
67 if (dst[0] > dst[1])
68 std::swap(dst[0], dst[1]);
69 }
70 else if (d < 0.0) {
71 // Three real solutions.
72 double phi = k1Div3 * blAcos(-q / blSqrt(-p3));
73 double t = 2.0 * blSqrt(-p);
74
75 nRoots = 3;
76 dst[0] = sub + t * blCos(phi);
77 dst[1] = sub - t * blCos(phi + BL_MATH_PI_DIV_3);
78 dst[2] = sub - t * blCos(phi - BL_MATH_PI_DIV_3);
79
80 // Sort.
81 if (dst[0] > dst[1]) std::swap(dst[0], dst[1]);
82 if (dst[1] > dst[2]) std::swap(dst[1], dst[2]);
83 if (dst[0] > dst[1]) std::swap(dst[0], dst[1]);
84 }
85 else {
86 // One real solution.
87 double sqrt_d = blSqrt(d);
88 double u = blCbrt(sqrt_d - q);
89 double v = -blCbrt(sqrt_d + q);
90
91 nRoots = 1;
92 dst[0] = sub + u + v;
93 }
94
95 size_t n = 0;
96 for (size_t i = 0; i < nRoots; i++)
97 if (dst[i] >= tMin && dst[i] <= tMax)
98 dst[n++] = dst[i];
99 return n;
100}
101
102// ============================================================================
103// [PolyRoots]
104// ============================================================================
105
106// The code adapted from:
107//
108// rpoly.cpp -- Jenkins-Traub real polynomial root finder.
109// (C) 2002, C. Bond. All rights reserved.
110//
111// Translation of TOMS493 from FORTRAN to C. This implementation of Jenkins-Traub
112// partially adapts the original code to a C environment by restruction many of
113// the 'goto' controls to better fit a block structured form. It also eliminates
114// the global memory allocation in favor of stack memory allocation.
115
116#define JT_BASE 2.0
117#define JT_ETA 2.22e-16
118#define JT_INF 3.4e38
119#define JT_SMALL 1.2e-38
120
121class BLJenkinsTraubSolver {
122public:
123 BLMemBufferTmp<2048> mem;
124 double* temp;
125 double* pt;
126 double* p;
127 double* qp;
128 double* k;
129 double* qk;
130 double* svk;
131 double* zeror;
132 double* zeroi;
133
134 double sr, si, u, v, a, b, c, d, a1, a2;
135 double a3, a6, a7, e, f, g, h, szr, szi, lzr, lzi;
136 double are, mre;
137
138 int degree, n, nn, nmi, zerok;
139 int itercnt;
140
141 BLJenkinsTraubSolver(const double* polynomial, int degree);
142 bool init(int degree);
143
144 void quad(double a, double b1, double c, double* sr, double* si, double* lr, double* li);
145 void fxshfr(int l2, int* nz);
146 void quadit(double* uu, double* vv, int* nz);
147 void realit(double sss, int* nz, int* iflag);
148 void calcsc(int* type);
149 void nextk(int* type);
150 void newest(int type, double* uu, double* vv);
151 void quadsd(int n, double* u, double* v, double* p, double* q, double* a, double* b);
152
153 // Called by fPolyRoots_t().
154 int solve();
155};
156
157BLJenkinsTraubSolver::BLJenkinsTraubSolver(const double* poly, int degree) {
158 // Alloc memory and initialize pointers.
159 if (!init(degree))
160 return;
161
162 // Copy the polynomial.
163 for (int i = 0; i <= degree; i++)
164 p[i] = poly[i];
165}
166
167bool BLJenkinsTraubSolver::init(int degree) {
168 temp = reinterpret_cast<double*>(mem.alloc(unsigned(degree + 1) * 9u * sizeof(double)));
169
170 if (temp == nullptr)
171 return false;
172
173 pt = temp + (degree + 1);
174 p = pt + (degree + 1);
175 qp = p + (degree + 1);
176 k = qp + (degree + 1);
177 qk = k + (degree + 1);
178 svk = qk + (degree + 1);
179 zeror = svk + (degree + 1);
180 zeroi = zeror + (degree + 1);
181
182 this->n = degree;
183 this->degree = degree;
184
185 return true;
186}
187
188// Computes up to L2 fixed shift k-polynomials, testing for convergence in the
189// linear or quadratic case. Initiates one of the variable shift iterations and
190// returns with the number of zeros found.
191void BLJenkinsTraubSolver::fxshfr(int l2, int* nz) {
192 double svu, svv, ui, vi, s;
193 double betas, betav, oss, ovv, ss, vv, ts, tv;
194 double ots, otv, tvv, tss;
195 int type, i, j, iflag, vpass, spass, vtry, stry;
196
197 *nz = 0;
198 betav = 0.25;
199 betas = 0.25;
200 oss = sr;
201 ovv = v;
202
203 // Evaluate polynomial by synthetic division.
204 quadsd(n,&u,&v,p,qp,&a,&b);
205 calcsc(&type);
206
207 for (j = 0; j < l2; j++) {
208 // Calculate next k polynomial and estimate v.
209 nextk(&type);
210 calcsc(&type);
211 newest(type,&ui,&vi);
212 vv = vi;
213
214 // Estimate s.
215 ss = k[n-1] == 0.0 ? 0.0 : -p[n] / k[n - 1];
216 tv = 1.0;
217 ts = 1.0;
218 if (j == 0 || type == 3)
219 goto _70;
220
221 // Compute relative measures of convergence of s and v sequences.
222 if (vv != 0.0) tv = blAbs((vv - ovv) / vv);
223 if (ss != 0.0) ts = blAbs((ss - oss) / ss);
224
225 // If decreasing, multiply two most recent convergence measures.
226 tvv = tv < otv ? tv * otv : 1.0;
227 tss = ts < ots ? ts * ots : 1.0;
228
229 // Compare with convergence criteria.
230 vpass = (tvv < betav);
231 spass = (tss < betas);
232 if (!(spass || vpass))
233 goto _70;
234
235 // At least one sequence has passed the convergence test. Store variables
236 // before iterating.
237 svu = u;
238 svv = v;
239 for (i = 0; i < n; i++)
240 svk[i] = k[i];
241 s = ss;
242
243 // Choose iteration according to the fastest converging sequence.
244 vtry = 0;
245 stry = 0;
246 if (spass && (!vpass || tss < tvv))
247 goto _40;
248
249_20:
250 quadit(&ui, &vi, nz);
251 if (*nz > 0)
252 return;
253
254 // Quadratic iteration has failed. Flag that it has been tried and decrease
255 // the convergence criterion.
256 vtry = 1;
257 betav *= 0.25;
258
259 // Try linear iteration if it has not been tried and the S sequence is converging.
260 if (stry || !spass) goto _50;
261 for (i = 0; i < n; i++)
262 k[i] = svk[i];
263
264_40:
265 realit(s, nz, &iflag);
266 if (*nz > 0)
267 return;
268
269 // Linear iteration has failed. Flag that it has been tried and decrease
270 // the convergence criterion.
271 stry = 1;
272 betas *=0.25;
273 if (iflag == 0)
274 goto _50;
275
276 // If linear iteration signals an almost double real zero attempt quadratic
277 // iteration.
278 ui = -(s+s);
279 vi = s*s;
280 goto _20;
281
282 // Restore variables.
283_50:
284 u = svu;
285 v = svv;
286 for (i = 0; i < n; i++)
287 k[i] = svk[i];
288
289 // Try quadratic iteration if it has not been tried and the V sequence is
290 // converging.
291 if (vpass && !vtry)
292 goto _20;
293
294 // Recompute QP and scalar values to continue the second stage.
295 quadsd(n, &u, &v, p, qp, &a, &b);
296 calcsc(&type);
297
298_70:
299 ovv = vv;
300 oss = ss;
301 otv = tv;
302 ots = ts;
303 }
304}
305
306// Variable-shift k-polynomial iteration for a quadratic factor converges only
307// if the zeros are equimodular or nearly so.
308//
309// uu, vv - coefficients of starting quadratic.
310// nz - number of zeros found.
311void BLJenkinsTraubSolver::quadit(double* uu, double* vv, int* nz) {
312 double ui, vi;
313 double mp, omp, ee, relstp, t, zm;
314 int type, i, j, tried;
315
316 *nz = 0;
317 tried = 0;
318 u = *uu;
319 v = *vv;
320 j = 0;
321
322 // Main loop.
323 for (;;) {
324 itercnt++;
325 quad(1.0, u, v, &szr, &szi, &lzr, &lzi);
326
327 // Return if roots of the quadratic are real and not close to multiple or
328 // nearly equal and of opposite sign.
329 if (blAbs(blAbs(szr) - blAbs(lzr)) > 0.01 * blAbs(lzr))
330 return;
331
332 // Evaluate polynomial by quadratic synthetic division.
333 quadsd(n, &u, &v, p, qp, &a, &b);
334 mp = blAbs(a - szr * b) + blAbs(szi * b);
335
336 // Compute a rigorous bound on the rounding error in evaluating p.
337 zm = blSqrt(blAbs(v));
338 ee = 2.0 * blAbs(qp[0]);
339 t = -szr*b;
340
341 for (i = 1; i < n; i++)
342 ee = ee * zm + blAbs(qp[i]);
343
344 ee = ee * zm + blAbs(a+t);
345 ee *= (5.0 * mre + 4.0 * are);
346 ee = ee - (5.0 * mre + 2.0 * are) * (blAbs(a + t) + blAbs(b) * zm);
347 ee = ee + 2.0*are * blAbs(t);
348
349 // Iteration has converged sufficiently if the polynomial value is less than
350 // 20 times this bound.
351 if (mp <= 20.0 * ee) {
352 *nz = 2;
353 return;
354 }
355
356 // Stop iteration after 20 steps.
357 if (++j > 20)
358 return;
359
360 if (!(j < 2 || relstp > 0.01 || mp < omp || tried)) {
361 // A cluster appears to be stalling the convergence. Five fixed shift steps
362 // are taken with a u,v close to the cluster.
363 if (relstp < JT_ETA)
364 relstp = JT_ETA;
365
366 relstp = blSqrt(relstp);
367 u = u - u * relstp;
368 v = v + v * relstp;
369 quadsd(n, &u, &v, p, qp, &a, &b);
370
371 for (i = 0; i < 5; i++) {
372 calcsc(&type);
373 nextk(&type);
374 }
375
376 tried = 1;
377 j = 0;
378 }
379
380 omp = mp;
381
382 // Calculate next k polynomial and new u and v.
383 calcsc(&type);
384 nextk(&type);
385 calcsc(&type);
386 newest(type,&ui,&vi);
387
388 // If vi is zero the iteration is not converging.
389 if (vi == 0.0)
390 return;
391
392 relstp = blAbs((vi - v) / vi);
393 u = ui;
394 v = vi;
395 }
396}
397
398// Variable-shift H polynomial iteration for a real zero.
399//
400// sss - starting iterate
401// nz - number of zeros found
402// iflag - flag to indicate a pair of zeros near real axis.
403void BLJenkinsTraubSolver::realit(double sss, int* nz, int* iflag) {
404 double pv, kv, t, s;
405 double ms, mp, omp, ee;
406 int i, j;
407
408 *nz = 0;
409 *iflag = 0;
410
411 s = sss;
412 j = 0;
413
414 for (;;) {
415 itercnt++;
416 pv = p[0];
417
418 // Evaluate p at s.
419 qp[0] = pv;
420
421 for (i = 1; i <= n; i++) {
422 pv = pv*s + p[i];
423 qp[i] = pv;
424 }
425 mp = blAbs(pv);
426
427 // Compute a rigorous bound on the error in evaluating p.
428 ms = blAbs(s);
429 ee = (mre / (are + mre)) * blAbs(qp[0]);
430
431 for (i = 1; i <= n; i++)
432 ee = ee * ms + blAbs(qp[i]);
433
434 // Iteration has converged sufficiently if the polynomial value is less
435 // than 20 times this bound.
436 if (mp <= 20.0 * ((are+mre)*ee-mre*mp)) {
437 *nz = 1;
438 szr = s;
439 szi = 0.0;
440 return;
441 }
442 j++;
443
444 // Stop iteration after 10 steps.
445 if (j > 10) return;
446 if (j < 2) goto _50;
447 if (blAbs(t) > 0.001 * blAbs(s-t) || mp < omp) goto _50;
448
449 // A cluster of zeros near the real axis has been encountered. Return with
450 // iflag set to initiate a quadratic iteration.
451 *iflag = 1;
452 sss = s;
453 return;
454
455_50:
456 omp = mp;
457
458 // Compute t, the next polynomial, and the new iterate.
459 kv = k[0];
460 qk[0] = kv;
461
462 for (i = 1; i < n; i++) {
463 kv = kv*s + k[i];
464 qk[i] = kv;
465 }
466
467 // HVE n -> n-1
468 if (blAbs(kv) <= blAbs(k[n-1])*10.0*JT_ETA) {
469 // Use the unscaled form.
470 k[0] = 0.0;
471 for (i = 1; i < n; i++)
472 k[i] = qk[i-1];
473 }
474 else {
475 // Use the scaled form of the recurrence if the value of k at s is nonzero.
476 t = -pv / kv;
477 k[0] = qp[0];
478 for (i = 1; i < n; i++)
479 k[i] = t*qk[i-1] + qp[i];
480 }
481
482 kv = k[0];
483 for (i = 1; i < n; i++)
484 kv = kv*s + k[i];
485
486 t = 0.0;
487 if (blAbs(kv) > blAbs(k[n-1] * 10.0 * JT_ETA))
488 t = -pv/kv;
489
490 s += t;
491 }
492}
493
494// This routine calculates scalar quantities used to compute the next k
495// polynomial and new estimates of the quadratic coefficients.
496//
497// type - integer variable set here indicating how the calculations are
498// normalized to avoid overflow.
499void BLJenkinsTraubSolver::calcsc(int* type) {
500 // Synthetic division of k by the quadratic 1, u, v.
501 quadsd(n - 1, &u, &v, k, qk, &c, &d);
502
503 if (blAbs(c) > blAbs(k[n-1] * 100.0 * JT_ETA)) goto _10;
504 if (blAbs(d) > blAbs(k[n-2] * 100.0 * JT_ETA)) goto _10;
505
506 // Type=3 indicates the quadratic is almost a factor of k.
507 *type = 3;
508 return;
509
510_10:
511 if (blAbs(d) < blAbs(c)) {
512 // Type=1 indicates that all formulas are divided by c.
513 *type = 1;
514 e = a / c;
515 f = d / c;
516 g = u * e;
517 h = v * b;
518
519 a3 = a * e + b * (h / c + g);
520 a1 = b - a * (d / c);
521 a7 = a + g * d + h * f;
522 return;
523 }
524 else {
525 // Type=2 indicates that all formulas are divided by d.
526 *type = 2;
527 e = a / d;
528 f = c / d;
529 g = u * b;
530 h = v * b;
531
532 a3 = (a + g) * e + h * (b / d);
533 a1 = b * f - a;
534 a7 = (f + u) * a + h;
535 }
536}
537
538// Computes the next k polynomials using scalars computed in calcsc.
539void BLJenkinsTraubSolver::nextk(int* type) {
540 double x;
541 int i;
542
543 if (*type == 3) {
544 // Use unscaled form of the recurrence if type is 3.
545 k[0] = 0.0;
546 k[1] = 0.0;
547
548 for (i = 2; i < n; i++)
549 k[i] = qk[i-2];
550 }
551 else {
552 x = a;
553 if (*type == 1) x = b;
554
555 if (blAbs(a1) <= blAbs(x) * 10.0 * JT_ETA) {
556 // If a1 is nearly zero then use a special form of the recurrence.
557 k[0] = 0.0;
558 k[1] = -a7 * qp[0];
559
560 for (i = 2; i < n; i++)
561 k[i] = a3 * qk[i-2] - a7 * qp[i-1];
562 }
563 else {
564 // Use scaled form of the recurrence.
565 a7 /= a1;
566 a3 /= a1;
567 k[0] = qp[0];
568 k[1] = qp[1] - a7*qp[0];
569
570 for (i = 2; i < n; i++)
571 k[i] = a3 * qk[i-2] - a7 * qp[i-1] + qp[i];
572 }
573 }
574}
575
576// Compute new estimates of the quadratic coefficients using the scalars
577// computed in calcsc.
578void BLJenkinsTraubSolver::newest(int type, double* uu, double* vv) {
579 // Use formulas appropriate to setting of type.
580 if (type == 3) {
581 // If type=3 the quadratic is zeroed.
582 *uu = 0.0;
583 *vv = 0.0;
584 return;
585 }
586
587 double a4, a5;
588 if (type == 2) {
589 a4 = (a + g) * f + h;
590 a5 = (f + u) * c + v * d;
591 }
592 else {
593 a4 = a + u * b + h * f;
594 a5 = c + d * (u + v * f);
595 }
596
597 // Evaluate new quadratic coefficients.
598 double b1 = -k[n-1] / p[n];
599 double b2 = -(k[n-2] + b1 * p[n-1]) / p[n];
600 double c1 = v * b2 * a1;
601 double c2 = b1 * a7;
602 double c3 = b1 * b1 * a3;
603 double c4 = c1 - c2 - c3;
604
605 double t = a5 + b1 * a4 - c4;
606 if (t == 0.0) {
607 *uu = 0.0;
608 *vv = 0.0;
609 }
610 else {
611 *uu = u - (u * (c3 + c2) + v * (b1 * a1 + b2 * a7)) / t;
612 *vv = v * (1.0 + c4 / t);
613 }
614}
615
616// Divides p by the quadratic 1,u,v placing the quotient in q and the remainder in a, b.
617void BLJenkinsTraubSolver::quadsd(int nn, double* u, double* v, double* p, double* q, double* a, double* b) {
618 double c;
619 int i;
620
621 *b = p[0];
622 q[0] = *b;
623 *a = p[1] - (*b)*(*u);
624 q[1] = *a;
625
626 for (i = 2; i <= nn; i++) {
627 c = p[i] - (*a)*(*u) - (*b)*(*v);
628 q[i] = c;
629 *b = *a;
630 *a = c;
631 }
632}
633
634// Calculate the zeros of the quadratic a*z^2 + b1*z + c. The quadratic formula,
635// modified to avoid overflow, is used to find the larger zero if the zeros are
636// real and both are complex. The smaller real zero is found directly from the
637// product of the zeros c/a.
638void BLJenkinsTraubSolver::quad(double a, double b1, double c, double* sr, double* si, double* lr, double* li) {
639 double b, d, e;
640
641 if (a == 0.0) {
642 // Less than two roots.
643 if (b1 != 0.0)
644 *sr = -c/b1;
645 else
646 *sr = 0.0;
647
648 *lr = 0.0;
649 *si = 0.0;
650 *li = 0.0;
651 return;
652 }
653
654 if (c == 0.0) {
655 // one real root, one zero root.
656 *sr = 0.0;
657 *lr = -b1 / a;
658 *si = 0.0;
659 *li = 0.0;
660 return;
661 }
662
663 // Compute discriminant avoiding overflow.
664 b = b1 / 2.0;
665 if (blAbs(b) < blAbs(c)) {
666 e = c >= 0.0 ? a : -a;
667 e = b * (b / blAbs(c)) - e;
668 d = blSqrt(blAbs(e)) * blSqrt(blAbs(c));
669 }
670 else {
671 e = 1.0 - (a/b)*(c/b);
672 d = blSqrt(blAbs(e)) * blAbs(b);
673 }
674
675 if (e < 0.0) {
676 // Complex conjugate zeros.
677 *sr = -b / a;
678 *lr = *sr;
679 *si = blAbs(d/a);
680 *li = -(*si);
681 }
682 else {
683 // Real zeros.
684 if (b >= 0.0)
685 d = -d;
686 *lr = (d - b)/a;
687 *sr = 0.0;
688
689 if (*lr != 0.0)
690 *sr = (c / *lr) / a;
691 *si = 0.0;
692 *li = 0.0;
693 }
694}
695
696int BLJenkinsTraubSolver::solve() {
697 double t, aa, bb, cc, factor;
698 double lo, max, min, xx, yy, cosr, sinr, xxx, x, sc, bnd;
699 double xm, ff, df, dx;
700 int cnt, nz, i, j, jj, l, nm1, zerok;
701
702 // Algorithm fails of the leading coefficient is zero.
703 BL_ASSERT(p[0] != 0.0);
704 BL_ASSERT(n > 0);
705
706 are = JT_ETA;
707 mre = JT_ETA;
708 lo = JT_SMALL / JT_ETA;
709
710 // Initialization of constants for shift rotation.
711 xx = BL_SQRT_0p5; // sqrt(0.5).
712 yy = -xx; //-sqrt(0.5).
713 sinr = 0.99756405025982424761; // sin(94 * PI / 180).
714 cosr = -0.06975647374412530078; // cos(94 * PI / 180).
715
716 // Start the algorithm for one zero.
717_40:
718 itercnt = 0;
719
720 if (n == 1) {
721 zeror[degree-1] = -p[1] / p[0];
722 zeroi[degree-1] = 0.0;
723 n -= 1;
724 goto _99;
725 }
726
727 // Calculate the final zero or pair of zeros.
728 if (n == 2) {
729 quad(p[0], p[1], p[2], &zeror[degree-2], &zeroi[degree-2], &zeror[degree-1], &zeroi[degree-1]);
730 n -= 2;
731 goto _99;
732 }
733
734 // Find largest and smallest moduli of coefficients.
735 min = JT_INF;
736 max = 0.0;
737
738 for (i = 0; i <= n; i++) {
739 x = blAbs(p[i]);
740
741 if (x > max)
742 max = x;
743 if (x != 0.0 && x < min)
744 min = x;
745 }
746
747 // Scale if there are large or very small coefficients. Computes a scale
748 // factor to multiply the coefficients of the polynomial. The scaling is
749 // done to avoid overflow and to avoid undetected underflow interfering
750 // with the convergence criterion. The factor is a power of the JT_BASE.
751 sc = lo / min;
752 if (sc > 1.0 && max > JT_INF / sc)
753 goto _110;
754
755 if (sc <= 1.0) {
756 if (max < 10.0)
757 goto _110;
758
759 if (sc == 0.0)
760 sc = JT_SMALL;
761 }
762
763 // 1.44269504088896340736 == 1 / log(JT_BASE)
764 l = blRoundToInt(1.44269504088896340736 * log(sc));
765
766 // Scale polynomial.
767 factor = blPow(JT_BASE, l);
768 if (factor != 1.0) {
769 for (i = 0;i <= n; i++)
770 p[i] = factor * p[i];
771 }
772
773_110:
774 // Compute lower bound on moduli of roots.
775 for (i = 0; i <= n; i++)
776 pt[i] = (blAbs(p[i]));
777 pt[n] = - pt[n];
778
779 // Compute upper estimate of bound.
780 x = exp((log(-pt[n]) -log(pt[0])) / (double)n);
781
782 // If Newton step at the origin is better, use it.
783 if (pt[n - 1] != 0.0) {
784 xm = -pt[n] / pt[n - 1];
785 if (xm < x) x = xm;
786 }
787
788 // Chop the interval (0,x) until ff <= 0.
789 for (;;) {
790 xm = x * 0.1;
791 ff = pt[0];
792
793 for (i = 1; i <= n; i++)
794 ff = ff*xm + pt[i];
795
796 if (ff <= 0.0)
797 break;
798 x = xm;
799 }
800 dx = x;
801
802 // Do Newton interation until x converges to two decimal places.
803 while (blAbs(dx/x) > 0.005) {
804 ff = pt[0];
805 df = ff;
806
807 for (i = 1; i < n; i++) {
808 ff = ff*x + pt[i];
809 df = df*x + ff;
810 }
811
812 ff = ff*x + pt[n];
813 dx = ff/df;
814 x -= dx;
815 itercnt++;
816 }
817 bnd = x;
818
819 // Compute the derivative as the initial k polynomial and do 5 steps with
820 // no shift.
821 nm1 = n - 1;
822
823 for (i=1;i<n;i++)
824 k[i] = (double)(n-i)*p[i]/(double)n;
825 k[0] = p[0];
826
827 aa = p[n];
828 bb = p[n-1];
829 zerok = (k[n-1] == 0);
830
831 for (jj = 0; jj < 5; jj++) {
832 itercnt++;
833 cc = k[n-1];
834
835 if (!zerok) {
836 // Use a scaled form of recurrence if value of k at 0 is nonzero.
837 t = -aa/cc;
838 for (i=0;i<nm1;i++) {
839 j = n-i-1;
840 k[j] = t*k[j-1]+p[j];
841 }
842 k[0] = p[0];
843 zerok = (blAbs(k[n-1]) <= blAbs(bb) * JT_ETA * 10.0);
844 }
845 else {
846 // Use unscaled form of recurrence.
847 for (i = 0; i < nm1; i++) {
848 j = n-i-1;
849 k[j] = k[j-1];
850 }
851 k[0] = 0.0;
852 zerok = (k[n-1] == 0.0);
853 }
854 }
855
856 // Save k for restarts with new shifts.
857 for (i=0;i<n;i++)
858 temp[i] = k[i];
859
860 // Loop to select the quadratic corresponding to each new shift.
861 for (cnt = 0; cnt < 20; cnt++) {
862 // Quadratic corresponds to a double shift to a non-real point and its
863 // complex conjugate. The point has modulus bnd and amplitude rotated
864 // by 94 degrees from the previous shift.
865 xxx = cosr * xx - sinr * yy;
866 yy = sinr * xx + cosr * yy;
867 xx = xxx;
868 sr = bnd * xx;
869 si = bnd * yy;
870 u = -2.0 * sr;
871 v = bnd;
872 fxshfr(20 * (cnt + 1), &nz);
873
874 if (nz != 0) {
875 // The second stage jumps directly to one of the third stage iterations
876 // and returns here if successful. Deflate the polynomial, store the
877 // zero or zeros and return to the main algorithm.
878 j = degree - n;
879 zeror[j] = szr;
880 zeroi[j] = szi;
881 n -= nz;
882
883 for (i = 0; i <= n; i++) {
884 p[i] = qp[i];
885 }
886
887 if (nz != 1) {
888 zeror[j+1] = lzr;
889 zeroi[j+1] = lzi;
890 }
891 goto _40;
892 }
893
894 // If the iteration is unsuccessful, another quadratic is chosen after
895 // restoring k.
896 for (i = 0; i < n; i++) {
897 k[i] = temp[i];
898 }
899 }
900
901 // Return with failure if no convergence after 20 shifts.
902_99:
903 return degree - n;
904}
905
906// Inject root into an array.
907static BL_INLINE size_t injectRoot(double* arr, size_t n, double value) noexcept {
908 size_t i, j;
909
910 for (i = 0; i < n; i++) {
911 if (arr[i] < value)
912 continue;
913 if (arr[i] == value)
914 return n;
915 break;
916 }
917
918 for (j = n; j != i; j++)
919 arr[j] = arr[j - 1];
920
921 arr[i] = value;
922 return n + 1;
923}
924
925size_t blPolyRoots(double* dst, const double* poly, int degree, double tMin, double tMax) noexcept {
926 size_t i;
927 size_t zeros = 0;
928
929 // Decrease degree of polynomial if the highest degree coefficient is zero.
930 if (degree <= 0)
931 return 0;
932
933 while (poly[0] == 0.0) {
934 poly++;
935 if (--degree <= 3)
936 break;
937 }
938
939 // Remove the zeros at the origin, if any.
940 if (degree <= 0)
941 return 0;
942
943 while (poly[degree] == 0.0) {
944 zeros++;
945 if (--degree <= 3)
946 break;
947 }
948
949 // Use an analytic method if the degree was decreased to 3.
950 if (degree <= 3) {
951 size_t roots;
952
953 if (degree == 1) {
954 double x = -poly[1] / poly[0];
955 dst[0] = x;
956 return size_t(x >= tMin && x <= tMax);
957 }
958 else if (degree == 2) {
959 roots = blQuadRoots(dst, poly, tMin, tMax);
960 }
961 else {
962 roots = blCubicRoots(dst, poly, tMin, tMax);
963 }
964
965 if (zeros != 0 && tMin <= 0.0 && tMax >= 0.0)
966 return injectRoot(dst, roots, 0.0);
967 else
968 return roots;
969 }
970
971 // Limit the maximum polynomial degree.
972 if (degree > 1024)
973 return 0;
974
975 BLJenkinsTraubSolver solver(poly, degree);
976 size_t roots = unsigned(solver.solve());
977
978 if (zeros)
979 dst[roots++] = 0.0;
980
981 size_t nInterestingRoots = 0;
982 for (i = 0; i < roots; i++) {
983 if (isNearZero(solver.zeroi[i])) {
984 double r = solver.zeror[i];
985 if (r >= tMin && r <= tMax)
986 dst[nInterestingRoots++] = r;
987 }
988 }
989 roots = nInterestingRoots;
990
991 if (roots > 1)
992 blQuickSort<double>(dst, roots);
993
994 return roots;
995}
996
997// ============================================================================
998// [BLMath{Roots} - Unit Tests]
999// ============================================================================
1000
1001#ifdef BL_TEST
1002UNIT(blend2d_math) {
1003 INFO("blFloor()");
1004 {
1005 EXPECT(blFloor(-1.5f) ==-2.0f);
1006 EXPECT(blFloor(-1.5 ) ==-2.0 );
1007 EXPECT(blFloor(-0.9f) ==-1.0f);
1008 EXPECT(blFloor(-0.9 ) ==-1.0 );
1009 EXPECT(blFloor(-0.5f) ==-1.0f);
1010 EXPECT(blFloor(-0.5 ) ==-1.0 );
1011 EXPECT(blFloor(-0.1f) ==-1.0f);
1012 EXPECT(blFloor(-0.1 ) ==-1.0 );
1013 EXPECT(blFloor( 0.0f) == 0.0f);
1014 EXPECT(blFloor( 0.0 ) == 0.0 );
1015 EXPECT(blFloor( 0.1f) == 0.0f);
1016 EXPECT(blFloor( 0.1 ) == 0.0 );
1017 EXPECT(blFloor( 0.5f) == 0.0f);
1018 EXPECT(blFloor( 0.5 ) == 0.0 );
1019 EXPECT(blFloor( 0.9f) == 0.0f);
1020 EXPECT(blFloor( 0.9 ) == 0.0 );
1021 EXPECT(blFloor( 1.5f) == 1.0f);
1022 EXPECT(blFloor( 1.5 ) == 1.0 );
1023 EXPECT(blFloor(-4503599627370496.0) == -4503599627370496.0);
1024 EXPECT(blFloor( 4503599627370496.0) == 4503599627370496.0);
1025 }
1026
1027 INFO("blCeil()");
1028 {
1029 EXPECT(blCeil(-1.5f) ==-1.0f);
1030 EXPECT(blCeil(-1.5 ) ==-1.0 );
1031 EXPECT(blCeil(-0.9f) == 0.0f);
1032 EXPECT(blCeil(-0.9 ) == 0.0 );
1033 EXPECT(blCeil(-0.5f) == 0.0f);
1034 EXPECT(blCeil(-0.5 ) == 0.0 );
1035 EXPECT(blCeil(-0.1f) == 0.0f);
1036 EXPECT(blCeil(-0.1 ) == 0.0 );
1037 EXPECT(blCeil( 0.0f) == 0.0f);
1038 EXPECT(blCeil( 0.0 ) == 0.0 );
1039 EXPECT(blCeil( 0.1f) == 1.0f);
1040 EXPECT(blCeil( 0.1 ) == 1.0 );
1041 EXPECT(blCeil( 0.5f) == 1.0f);
1042 EXPECT(blCeil( 0.5 ) == 1.0 );
1043 EXPECT(blCeil( 0.9f) == 1.0f);
1044 EXPECT(blCeil( 0.9 ) == 1.0 );
1045 EXPECT(blCeil( 1.5f) == 2.0f);
1046 EXPECT(blCeil( 1.5 ) == 2.0 );
1047 EXPECT(blCeil(-4503599627370496.0) == -4503599627370496.0);
1048 EXPECT(blCeil( 4503599627370496.0) == 4503599627370496.0);
1049 }
1050
1051 INFO("blTrunc()");
1052 {
1053 EXPECT(blTrunc(-1.5f) ==-1.0f);
1054 EXPECT(blTrunc(-1.5 ) ==-1.0 );
1055 EXPECT(blTrunc(-0.9f) == 0.0f);
1056 EXPECT(blTrunc(-0.9 ) == 0.0 );
1057 EXPECT(blTrunc(-0.5f) == 0.0f);
1058 EXPECT(blTrunc(-0.5 ) == 0.0 );
1059 EXPECT(blTrunc(-0.1f) == 0.0f);
1060 EXPECT(blTrunc(-0.1 ) == 0.0 );
1061 EXPECT(blTrunc( 0.0f) == 0.0f);
1062 EXPECT(blTrunc( 0.0 ) == 0.0 );
1063 EXPECT(blTrunc( 0.1f) == 0.0f);
1064 EXPECT(blTrunc( 0.1 ) == 0.0 );
1065 EXPECT(blTrunc( 0.5f) == 0.0f);
1066 EXPECT(blTrunc( 0.5 ) == 0.0 );
1067 EXPECT(blTrunc( 0.9f) == 0.0f);
1068 EXPECT(blTrunc( 0.9 ) == 0.0 );
1069 EXPECT(blTrunc( 1.5f) == 1.0f);
1070 EXPECT(blTrunc( 1.5 ) == 1.0 );
1071 EXPECT(blTrunc(-4503599627370496.0) == -4503599627370496.0);
1072 EXPECT(blTrunc( 4503599627370496.0) == 4503599627370496.0);
1073 }
1074
1075 INFO("blRound()");
1076 {
1077 EXPECT(blRound(-1.5f) ==-1.0f);
1078 EXPECT(blRound(-1.5 ) ==-1.0 );
1079 EXPECT(blRound(-0.9f) ==-1.0f);
1080 EXPECT(blRound(-0.9 ) ==-1.0 );
1081 EXPECT(blRound(-0.5f) == 0.0f);
1082 EXPECT(blRound(-0.5 ) == 0.0 );
1083 EXPECT(blRound(-0.1f) == 0.0f);
1084 EXPECT(blRound(-0.1 ) == 0.0 );
1085 EXPECT(blRound( 0.0f) == 0.0f);
1086 EXPECT(blRound( 0.0 ) == 0.0 );
1087 EXPECT(blRound( 0.1f) == 0.0f);
1088 EXPECT(blRound( 0.1 ) == 0.0 );
1089 EXPECT(blRound( 0.5f) == 1.0f);
1090 EXPECT(blRound( 0.5 ) == 1.0 );
1091 EXPECT(blRound( 0.9f) == 1.0f);
1092 EXPECT(blRound( 0.9 ) == 1.0 );
1093 EXPECT(blRound( 1.5f) == 2.0f);
1094 EXPECT(blRound( 1.5 ) == 2.0 );
1095 EXPECT(blRound(-4503599627370496.0) == -4503599627370496.0);
1096 EXPECT(blRound( 4503599627370496.0) == 4503599627370496.0);
1097 }
1098
1099 INFO("blFloorToInt()");
1100 {
1101 EXPECT(blFloorToInt(-1.5f) ==-2);
1102 EXPECT(blFloorToInt(-1.5 ) ==-2);
1103 EXPECT(blFloorToInt(-0.9f) ==-1);
1104 EXPECT(blFloorToInt(-0.9 ) ==-1);
1105 EXPECT(blFloorToInt(-0.5f) ==-1);
1106 EXPECT(blFloorToInt(-0.5 ) ==-1);
1107 EXPECT(blFloorToInt(-0.1f) ==-1);
1108 EXPECT(blFloorToInt(-0.1 ) ==-1);
1109 EXPECT(blFloorToInt( 0.0f) == 0);
1110 EXPECT(blFloorToInt( 0.0 ) == 0);
1111 EXPECT(blFloorToInt( 0.1f) == 0);
1112 EXPECT(blFloorToInt( 0.1 ) == 0);
1113 EXPECT(blFloorToInt( 0.5f) == 0);
1114 EXPECT(blFloorToInt( 0.5 ) == 0);
1115 EXPECT(blFloorToInt( 0.9f) == 0);
1116 EXPECT(blFloorToInt( 0.9 ) == 0);
1117 EXPECT(blFloorToInt( 1.5f) == 1);
1118 EXPECT(blFloorToInt( 1.5 ) == 1);
1119 }
1120
1121 INFO("blCeilToInt()");
1122 {
1123 EXPECT(blCeilToInt(-1.5f) ==-1);
1124 EXPECT(blCeilToInt(-1.5 ) ==-1);
1125 EXPECT(blCeilToInt(-0.9f) == 0);
1126 EXPECT(blCeilToInt(-0.9 ) == 0);
1127 EXPECT(blCeilToInt(-0.5f) == 0);
1128 EXPECT(blCeilToInt(-0.5 ) == 0);
1129 EXPECT(blCeilToInt(-0.1f) == 0);
1130 EXPECT(blCeilToInt(-0.1 ) == 0);
1131 EXPECT(blCeilToInt( 0.0f) == 0);
1132 EXPECT(blCeilToInt( 0.0 ) == 0);
1133 EXPECT(blCeilToInt( 0.1f) == 1);
1134 EXPECT(blCeilToInt( 0.1 ) == 1);
1135 EXPECT(blCeilToInt( 0.5f) == 1);
1136 EXPECT(blCeilToInt( 0.5 ) == 1);
1137 EXPECT(blCeilToInt( 0.9f) == 1);
1138 EXPECT(blCeilToInt( 0.9 ) == 1);
1139 EXPECT(blCeilToInt( 1.5f) == 2);
1140 EXPECT(blCeilToInt( 1.5 ) == 2);
1141 }
1142
1143 INFO("blTruncToInt()");
1144 {
1145 EXPECT(blTruncToInt(-1.5f) ==-1);
1146 EXPECT(blTruncToInt(-1.5 ) ==-1);
1147 EXPECT(blTruncToInt(-0.9f) == 0);
1148 EXPECT(blTruncToInt(-0.9 ) == 0);
1149 EXPECT(blTruncToInt(-0.5f) == 0);
1150 EXPECT(blTruncToInt(-0.5 ) == 0);
1151 EXPECT(blTruncToInt(-0.1f) == 0);
1152 EXPECT(blTruncToInt(-0.1 ) == 0);
1153 EXPECT(blTruncToInt( 0.0f) == 0);
1154 EXPECT(blTruncToInt( 0.0 ) == 0);
1155 EXPECT(blTruncToInt( 0.1f) == 0);
1156 EXPECT(blTruncToInt( 0.1 ) == 0);
1157 EXPECT(blTruncToInt( 0.5f) == 0);
1158 EXPECT(blTruncToInt( 0.5 ) == 0);
1159 EXPECT(blTruncToInt( 0.9f) == 0);
1160 EXPECT(blTruncToInt( 0.9 ) == 0);
1161 EXPECT(blTruncToInt( 1.5f) == 1);
1162 EXPECT(blTruncToInt( 1.5 ) == 1);
1163 }
1164
1165 INFO("blRoundToInt()");
1166 {
1167 EXPECT(blRoundToInt(-1.5f) ==-1);
1168 EXPECT(blRoundToInt(-1.5 ) ==-1);
1169 EXPECT(blRoundToInt(-0.9f) ==-1);
1170 EXPECT(blRoundToInt(-0.9 ) ==-1);
1171 EXPECT(blRoundToInt(-0.5f) == 0);
1172 EXPECT(blRoundToInt(-0.5 ) == 0);
1173 EXPECT(blRoundToInt(-0.1f) == 0);
1174 EXPECT(blRoundToInt(-0.1 ) == 0);
1175 EXPECT(blRoundToInt( 0.0f) == 0);
1176 EXPECT(blRoundToInt( 0.0 ) == 0);
1177 EXPECT(blRoundToInt( 0.1f) == 0);
1178 EXPECT(blRoundToInt( 0.1 ) == 0);
1179 EXPECT(blRoundToInt( 0.5f) == 1);
1180 EXPECT(blRoundToInt( 0.5 ) == 1);
1181 EXPECT(blRoundToInt( 0.9f) == 1);
1182 EXPECT(blRoundToInt( 0.9 ) == 1);
1183 EXPECT(blRoundToInt( 1.5f) == 2);
1184 EXPECT(blRoundToInt( 1.5 ) == 2);
1185 }
1186
1187 INFO("blFrac()");
1188 {
1189 EXPECT(blFrac( 0.00f) == 0.00f);
1190 EXPECT(blFrac( 0.00 ) == 0.00 );
1191 EXPECT(blFrac( 1.00f) == 0.00f);
1192 EXPECT(blFrac( 1.00 ) == 0.00 );
1193 EXPECT(blFrac( 1.25f) == 0.25f);
1194 EXPECT(blFrac( 1.25 ) == 0.25 );
1195 EXPECT(blFrac( 1.75f) == 0.75f);
1196 EXPECT(blFrac( 1.75 ) == 0.75 );
1197 EXPECT(blFrac(-1.00f) == 0.00f);
1198 EXPECT(blFrac(-1.00 ) == 0.00 );
1199 EXPECT(blFrac(-1.25f) == 0.75f);
1200 EXPECT(blFrac(-1.25 ) == 0.75 );
1201 EXPECT(blFrac(-1.75f) == 0.25f);
1202 EXPECT(blFrac(-1.75 ) == 0.25 );
1203 }
1204
1205 INFO("blIsBetween0And1()");
1206 {
1207 EXPECT(blIsBetween0And1( 0.0f ) == true);
1208 EXPECT(blIsBetween0And1( 0.0 ) == true);
1209 EXPECT(blIsBetween0And1( 0.5f ) == true);
1210 EXPECT(blIsBetween0And1( 0.5 ) == true);
1211 EXPECT(blIsBetween0And1( 1.0f ) == true);
1212 EXPECT(blIsBetween0And1( 1.0 ) == true);
1213 EXPECT(blIsBetween0And1(-0.0f ) == true);
1214 EXPECT(blIsBetween0And1(-0.0 ) == true);
1215 EXPECT(blIsBetween0And1(-1.0f ) == false);
1216 EXPECT(blIsBetween0And1(-1.0 ) == false);
1217 EXPECT(blIsBetween0And1( 1.001f) == false);
1218 EXPECT(blIsBetween0And1( 1.001 ) == false);
1219 }
1220
1221 INFO("blQuadRoots");
1222 {
1223 size_t count;
1224 double roots[2];
1225
1226 // x^2 + 4x + 4 == 0
1227 count = blQuadRoots(roots, 1.0, 4.0, 4.0, blMinValue<double>(), blMaxValue<double>());
1228
1229 EXPECT(count == 1);
1230 EXPECT(roots[0] == -2.0);
1231
1232 // -4x^2 + 8x + 12 == 0
1233 count = blQuadRoots(roots, -4.0, 8.0, 12.0, blMinValue<double>(), blMaxValue<double>());
1234
1235 EXPECT(count == 2);
1236 EXPECT(roots[0] == -1.0);
1237 EXPECT(roots[1] == 3.0);
1238 }
1239}
1240#endif
1241