1#include "mupdf/fitz.h"
2
3#include <assert.h>
4#include <errno.h>
5#include <float.h>
6
7#ifndef INFINITY
8#define INFINITY (DBL_MAX+DBL_MAX)
9#endif
10#ifndef NAN
11#define NAN (INFINITY-INFINITY)
12#endif
13
14/*
15 We use "Algorithm D" from "Contributions to a Proposed Standard for Binary
16 Floating-Point Arithmetic" by Jerome Coonen (1984).
17
18 The implementation uses a self-made floating point type, 'strtof_fp_t', with
19 a 32-bit significand. The steps of the algorithm are
20
21 INPUT: Up to 9 decimal digits d1, ... d9 and an exponent dexp.
22 OUTPUT: A float corresponding to the number d1 ... d9 * 10^dexp.
23
24 1) Convert the integer d1 ... d9 to an strtof_fp_t x.
25 2) Lookup the strtof_fp_t power = 10 ^ |dexp|.
26 3) If dexp is positive set x = x * power, else set x = x / power. Use rounding mode 'round to odd'.
27 4) Round x to a float using rounding mode 'to even'.
28
29 Step 1) is always lossless as the strtof_fp_t's significand can hold a 9-digit integer.
30 In the case |dexp| <= 13 the cached power is exact and the algorithm returns
31 the exactly rounded result (with rounding mode 'to even').
32 There is no double-rounding in 3), 4) as the multiply/divide uses 'round to odd'.
33
34 For |dexp| > 13 the maximum error is bounded by (1/2 + 1/256) ulp.
35 This is small enough to ensure that binary to decimal to binary conversion
36 is the identity if the decimal format uses 9 correctly rounded significant digits.
37*/
38typedef struct strtof_fp_t
39{
40 uint32_t f;
41 int e;
42} strtof_fp_t;
43
44/* Multiply/Divide x by y with 'round to odd'. Assume that x and y are normalized. */
45
46static strtof_fp_t
47strtof_multiply(strtof_fp_t x, strtof_fp_t y)
48{
49 uint64_t tmp;
50 strtof_fp_t res;
51
52 assert(x.f & y.f & 0x80000000);
53
54 res.e = x.e + y.e + 32;
55 tmp = (uint64_t) x.f * y.f;
56 /* Normalize. */
57 if ((tmp < ((uint64_t) 1 << 63)))
58 {
59 tmp <<= 1;
60 --res.e;
61 }
62
63 res.f = tmp >> 32;
64
65 /* Set the last bit of the significand to 1 if the result is
66 inexact. */
67 if (tmp & 0xffffffff)
68 res.f |= 1;
69 return res;
70}
71
72static strtof_fp_t
73divide(strtof_fp_t x, strtof_fp_t y)
74{
75 uint64_t product, quotient;
76 uint32_t remainder;
77 strtof_fp_t res;
78
79 res.e = x.e - y.e - 32;
80 product = (uint64_t) x.f << 32;
81 quotient = product / y.f;
82 remainder = product % y.f;
83 /* 2^31 <= quotient <= 2^33 - 2. */
84 if (quotient <= 0xffffffff)
85 res.f = quotient;
86 else
87 {
88 ++res.e;
89 /* If quotient % 2 != 0 we have remainder != 0. */
90 res.f = quotient >> 1;
91 }
92 if (remainder)
93 res.f |= 1;
94 return res;
95}
96
97/* From 10^0 to 10^54. Generated with GNU MPFR. */
98static const uint32_t strtof_powers_ten[55] = {
99 0x80000000, 0xa0000000, 0xc8000000, 0xfa000000, 0x9c400000, 0xc3500000,
100 0xf4240000, 0x98968000, 0xbebc2000, 0xee6b2800, 0x9502f900, 0xba43b740,
101 0xe8d4a510, 0x9184e72a, 0xb5e620f4, 0xe35fa932, 0x8e1bc9bf, 0xb1a2bc2f,
102 0xde0b6b3a, 0x8ac72305, 0xad78ebc6, 0xd8d726b7, 0x87867832, 0xa968163f,
103 0xd3c21bcf, 0x84595161, 0xa56fa5ba, 0xcecb8f28, 0x813f3979, 0xa18f07d7,
104 0xc9f2c9cd, 0xfc6f7c40, 0x9dc5ada8, 0xc5371912, 0xf684df57, 0x9a130b96,
105 0xc097ce7c, 0xf0bdc21b, 0x96769951, 0xbc143fa5, 0xeb194f8e, 0x92efd1b9,
106 0xb7abc627, 0xe596b7b1, 0x8f7e32ce, 0xb35dbf82, 0xe0352f63, 0x8c213d9e,
107 0xaf298d05, 0xdaf3f046, 0x88d8762c, 0xab0e93b7, 0xd5d238a5, 0x85a36367,
108 0xa70c3c41
109};
110static const int strtof_powers_ten_e[55] = {
111 -31, -28, -25, -22, -18, -15, -12, -8, -5, -2,
112 2, 5, 8, 12, 15, 18, 22, 25, 28, 32, 35, 38, 42, 45, 48, 52, 55, 58, 62, 65,
113 68, 71, 75, 78, 81, 85, 88, 91, 95, 98, 101, 105, 108, 111, 115, 118, 121,
114 125, 128, 131, 135, 138, 141, 145, 148
115};
116
117static strtof_fp_t
118strtof_cached_power(int i)
119{
120 strtof_fp_t result;
121 assert (i >= 0 && i <= 54);
122 result.f = strtof_powers_ten[i];
123 result.e = strtof_powers_ten_e[i];
124 return result;
125}
126
127/* Find number of leading zero bits in an uint32_t. Derived from the
128 "Bit Twiddling Hacks" at graphics.stanford.edu/~seander/bithacks.html. */
129static unsigned char clz_table[256] = {
130 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
131# define sixteen_times(N) N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
132 sixteen_times (3) sixteen_times (2) sixteen_times (2)
133 sixteen_times (1) sixteen_times (1) sixteen_times (1) sixteen_times (1)
134 /* Zero for the rest. */
135};
136static unsigned
137leading_zeros (uint32_t x)
138{
139 unsigned tmp1, tmp2;
140
141 tmp1 = x >> 16;
142 if (tmp1)
143 {
144 tmp2 = tmp1 >> 8;
145 if (tmp2)
146 return clz_table[tmp2];
147 else
148 return 8 + clz_table[tmp1];
149 }
150 else
151 {
152 tmp1 = x >> 8;
153 if (tmp1)
154 return 16 + clz_table[tmp1];
155 else
156 return 24 + clz_table[x];
157 }
158}
159
160static strtof_fp_t
161uint32_to_diy (uint32_t x)
162{
163 strtof_fp_t result = {x, 0};
164 unsigned shift = leading_zeros(x);
165
166 result.f <<= shift;
167 result.e -= shift;
168 return result;
169}
170
171#define SP_SIGNIFICAND_SIZE 23
172#define SP_EXPONENT_BIAS (127 + SP_SIGNIFICAND_SIZE)
173#define SP_MIN_EXPONENT (-SP_EXPONENT_BIAS)
174#define SP_EXPONENT_MASK 0x7f800000
175#define SP_SIGNIFICAND_MASK 0x7fffff
176#define SP_HIDDEN_BIT 0x800000 /* 2^23 */
177
178/* Convert normalized strtof_fp_t to IEEE-754 single with 'round to even'.
179 See "Implementing IEEE 754-2008 Rounding" in the
180 "Handbook of Floating-Point Arithmetik".
181*/
182static float
183diy_to_float(strtof_fp_t x, int negative)
184{
185 uint32_t result;
186 union
187 {
188 float f;
189 uint32_t n;
190 } tmp;
191
192 assert(x.f & 0x80000000);
193
194 /* We have 2^32 - 2^7 = 0xffffff80. */
195 if (x.e > 96 || (x.e == 96 && x.f >= 0xffffff80))
196 {
197 /* Overflow. Set result to infinity. */
198 errno = ERANGE;
199 result = 0xff << SP_SIGNIFICAND_SIZE;
200 }
201 /* We have 2^32 - 2^8 = 0xffffff00. */
202 else if (x.e > -158)
203 {
204 /* x is greater or equal to FLT_MAX. So we get a normalized number. */
205 result = (uint32_t) (x.e + 158) << SP_SIGNIFICAND_SIZE;
206 result |= (x.f >> 8) & SP_SIGNIFICAND_MASK;
207
208 if (x.f & 0x80)
209 {
210 /* Round-bit is set. */
211 if (x.f & 0x7f)
212 /* Sticky-bit is set. */
213 ++result;
214 else if (x.f & 0x100)
215 /* Significand is odd. */
216 ++result;
217 }
218 }
219 else if (x.e == -158 && x.f >= 0xffffff00)
220 {
221 /* x is in the range (2^32, 2^32 - 2^8] * 2^-158, so its smaller than
222 FLT_MIN but still rounds to it. */
223 result = 1U << SP_SIGNIFICAND_SIZE;
224 }
225 else if (x.e > -181)
226 {
227 /* Non-zero Denormal. */
228 int shift = -149 - x.e; /* 9 <= shift <= 31. */
229
230 result = x.f >> shift;
231
232 if (x.f & (1U << (shift - 1)))
233 /* Round-bit is set. */
234 {
235 if (x.f & ((1U << (shift - 1)) - 1))
236 /* Sticky-bit is set. */
237 ++result;
238 else if (x.f & 1U << shift)
239 /* Significand is odd. */
240 ++result;
241 }
242 }
243 else if (x.e == -181 && x.f > 0x80000000)
244 {
245 /* x is in the range (0.5,1) * 2^-149 so it rounds to the smallest
246 denormal. Can't handle this in the previous case as shifting a
247 uint32_t 32 bits to the right is undefined behaviour. */
248 result = 1;
249 }
250 else
251 {
252 /* Underflow. */
253 errno = ERANGE;
254 result = 0;
255 }
256
257 if (negative)
258 result |= 0x80000000;
259
260 tmp.n = result;
261 return tmp.f;
262}
263
264static float
265scale_integer_to_float(uint32_t M, int N, int negative)
266{
267 strtof_fp_t result, x, power;
268
269 if (M == 0)
270 return negative ? -0.f : 0.f;
271 if (N > 38)
272 {
273 /* Overflow. */
274 errno = ERANGE;
275 return negative ? -INFINITY : INFINITY;
276 }
277 if (N < -54)
278 {
279 /* Underflow. */
280 errno = ERANGE;
281 return negative ? -0.f : 0.f;
282 }
283 /* If N is in the range {-13, ..., 13} the conversion is exact.
284 Try to scale N into this region. */
285 while (N > 13 && M <= 0xffffffff / 10)
286 {
287 M *= 10;
288 --N;
289 }
290
291 while (N < -13 && M % 10 == 0)
292 {
293 M /= 10;
294 ++N;
295 }
296
297 x = uint32_to_diy (M);
298 if (N >= 0)
299 {
300 power = strtof_cached_power(N);
301 result = strtof_multiply(x, power);
302 }
303 else
304 {
305 power = strtof_cached_power(-N);
306 result = divide(x, power);
307 }
308
309 return diy_to_float(result, negative);
310}
311
312/* Return non-zero if *s starts with string (must be uppercase), ignoring case,
313 and increment *s by its length. */
314static int
315starts_with(const char **s, const char *string)
316{
317 const char *x = *s, *y = string;
318 while (*x && *y && (*x == *y || *x == *y + 32))
319 ++x, ++y;
320 if (*y == 0)
321 {
322 /* Match. */
323 *s = x;
324 return 1;
325 }
326 else
327 return 0;
328}
329#define SET_TAILPTR(tailptr, s) \
330 do \
331 if (tailptr) \
332 *tailptr = (char *) s; \
333 while (0)
334
335/*
336 Locale-independent decimal to binary
337 conversion. On overflow return (-)INFINITY and set errno to ERANGE. On
338 underflow return 0 and set errno to ERANGE. Special inputs (case
339 insensitive): "NAN", "INF" or "INFINITY".
340*/
341float
342fz_strtof(const char *string, char **tailptr)
343{
344 /* FIXME: error (1/2 + 1/256) ulp */
345 const char *s;
346 uint32_t M = 0;
347 int N = 0;
348 /* If decimal_digits gets 9 we truncate all following digits. */
349 int decimal_digits = 0;
350 int negative = 0;
351 const char *number_start = 0;
352
353 /* Skip leading whitespace (isspace in "C" locale). */
354 s = string;
355 while (*s == ' ' || *s == '\f' || *s == '\n' || *s == '\r' || *s == '\t' || *s == '\v')
356 ++s;
357
358 /* Parse sign. */
359 if (*s == '+')
360 ++s;
361 if (*s == '-')
362 {
363 negative = 1;
364 ++s;
365 }
366 number_start = s;
367 /* Parse digits before decimal point. */
368 while (*s >= '0' && *s <= '9')
369 {
370 if (decimal_digits)
371 {
372 if (decimal_digits < 9)
373 {
374 ++decimal_digits;
375 M = M * 10 + *s - '0';
376 }
377 /* Really arcane strings might overflow N. */
378 else if (N < 1000)
379 ++N;
380 }
381 else if (*s > '0')
382 {
383 M = *s - '0';
384 ++decimal_digits;
385 }
386 ++s;
387 }
388
389 /* Parse decimal point. */
390 if (*s == '.')
391 ++s;
392
393 /* Parse digits after decimal point. */
394 while (*s >= '0' && *s <= '9')
395 {
396 if (decimal_digits < 9)
397 {
398 if (decimal_digits || *s > '0')
399 {
400 ++decimal_digits;
401 M = M * 10 + *s - '0';
402 }
403 --N;
404 }
405 ++s;
406 }
407 if ((s == number_start + 1 && *number_start == '.') || number_start == s)
408 {
409 /* No Number. Check for INF and NAN strings. */
410 s = number_start;
411 if (starts_with(&s, "INFINITY") || starts_with(&s, "INF"))
412 {
413 errno = ERANGE;
414 SET_TAILPTR(tailptr, s);
415 return negative ? -INFINITY : +INFINITY;
416 }
417 else if (starts_with(&s, "NAN"))
418 {
419 SET_TAILPTR(tailptr, s);
420 return (float)NAN;
421 }
422 else
423 {
424 SET_TAILPTR(tailptr, string);
425 return 0.f;
426 }
427 }
428
429 /* Parse exponent. */
430 if (*s == 'e' || *s == 'E')
431 {
432 int exp_negative = 0;
433 int exp = 0;
434 const char *int_start;
435 const char *exp_start = s;
436
437 ++s;
438 if (*s == '+')
439 ++s;
440 else if (*s == '-')
441 {
442 ++s;
443 exp_negative = 1;
444 }
445 int_start = s;
446 /* Parse integer. */
447 while (*s >= '0' && *s <= '9')
448 {
449 /* Make sure exp does not get overflowed. */
450 if (exp < 100)
451 exp = exp * 10 + *s - '0';
452 ++s;
453 }
454 if (exp_negative)
455 exp = -exp;
456 if (s == int_start)
457 /* No Number. */
458 s = exp_start;
459 else
460 N += exp;
461 }
462
463 SET_TAILPTR(tailptr, s);
464 return scale_integer_to_float(M, N, negative);
465}
466