1 | #include "mupdf/fitz.h" |
2 | |
3 | #include <assert.h> |
4 | #include <errno.h> |
5 | #include <float.h> |
6 | |
7 | #ifndef INFINITY |
8 | #define INFINITY (DBL_MAX+DBL_MAX) |
9 | #endif |
10 | #ifndef NAN |
11 | #define NAN (INFINITY-INFINITY) |
12 | #endif |
13 | |
14 | /* |
15 | We use "Algorithm D" from "Contributions to a Proposed Standard for Binary |
16 | Floating-Point Arithmetic" by Jerome Coonen (1984). |
17 | |
18 | The implementation uses a self-made floating point type, 'strtof_fp_t', with |
19 | a 32-bit significand. The steps of the algorithm are |
20 | |
21 | INPUT: Up to 9 decimal digits d1, ... d9 and an exponent dexp. |
22 | OUTPUT: A float corresponding to the number d1 ... d9 * 10^dexp. |
23 | |
24 | 1) Convert the integer d1 ... d9 to an strtof_fp_t x. |
25 | 2) Lookup the strtof_fp_t power = 10 ^ |dexp|. |
26 | 3) If dexp is positive set x = x * power, else set x = x / power. Use rounding mode 'round to odd'. |
27 | 4) Round x to a float using rounding mode 'to even'. |
28 | |
29 | Step 1) is always lossless as the strtof_fp_t's significand can hold a 9-digit integer. |
30 | In the case |dexp| <= 13 the cached power is exact and the algorithm returns |
31 | the exactly rounded result (with rounding mode 'to even'). |
32 | There is no double-rounding in 3), 4) as the multiply/divide uses 'round to odd'. |
33 | |
34 | For |dexp| > 13 the maximum error is bounded by (1/2 + 1/256) ulp. |
35 | This is small enough to ensure that binary to decimal to binary conversion |
36 | is the identity if the decimal format uses 9 correctly rounded significant digits. |
37 | */ |
38 | typedef struct strtof_fp_t |
39 | { |
40 | uint32_t f; |
41 | int e; |
42 | } strtof_fp_t; |
43 | |
44 | /* Multiply/Divide x by y with 'round to odd'. Assume that x and y are normalized. */ |
45 | |
46 | static strtof_fp_t |
47 | strtof_multiply(strtof_fp_t x, strtof_fp_t y) |
48 | { |
49 | uint64_t tmp; |
50 | strtof_fp_t res; |
51 | |
52 | assert(x.f & y.f & 0x80000000); |
53 | |
54 | res.e = x.e + y.e + 32; |
55 | tmp = (uint64_t) x.f * y.f; |
56 | /* Normalize. */ |
57 | if ((tmp < ((uint64_t) 1 << 63))) |
58 | { |
59 | tmp <<= 1; |
60 | --res.e; |
61 | } |
62 | |
63 | res.f = tmp >> 32; |
64 | |
65 | /* Set the last bit of the significand to 1 if the result is |
66 | inexact. */ |
67 | if (tmp & 0xffffffff) |
68 | res.f |= 1; |
69 | return res; |
70 | } |
71 | |
72 | static strtof_fp_t |
73 | divide(strtof_fp_t x, strtof_fp_t y) |
74 | { |
75 | uint64_t product, quotient; |
76 | uint32_t remainder; |
77 | strtof_fp_t res; |
78 | |
79 | res.e = x.e - y.e - 32; |
80 | product = (uint64_t) x.f << 32; |
81 | quotient = product / y.f; |
82 | remainder = product % y.f; |
83 | /* 2^31 <= quotient <= 2^33 - 2. */ |
84 | if (quotient <= 0xffffffff) |
85 | res.f = quotient; |
86 | else |
87 | { |
88 | ++res.e; |
89 | /* If quotient % 2 != 0 we have remainder != 0. */ |
90 | res.f = quotient >> 1; |
91 | } |
92 | if (remainder) |
93 | res.f |= 1; |
94 | return res; |
95 | } |
96 | |
97 | /* From 10^0 to 10^54. Generated with GNU MPFR. */ |
98 | static const uint32_t strtof_powers_ten[55] = { |
99 | 0x80000000, 0xa0000000, 0xc8000000, 0xfa000000, 0x9c400000, 0xc3500000, |
100 | 0xf4240000, 0x98968000, 0xbebc2000, 0xee6b2800, 0x9502f900, 0xba43b740, |
101 | 0xe8d4a510, 0x9184e72a, 0xb5e620f4, 0xe35fa932, 0x8e1bc9bf, 0xb1a2bc2f, |
102 | 0xde0b6b3a, 0x8ac72305, 0xad78ebc6, 0xd8d726b7, 0x87867832, 0xa968163f, |
103 | 0xd3c21bcf, 0x84595161, 0xa56fa5ba, 0xcecb8f28, 0x813f3979, 0xa18f07d7, |
104 | 0xc9f2c9cd, 0xfc6f7c40, 0x9dc5ada8, 0xc5371912, 0xf684df57, 0x9a130b96, |
105 | 0xc097ce7c, 0xf0bdc21b, 0x96769951, 0xbc143fa5, 0xeb194f8e, 0x92efd1b9, |
106 | 0xb7abc627, 0xe596b7b1, 0x8f7e32ce, 0xb35dbf82, 0xe0352f63, 0x8c213d9e, |
107 | 0xaf298d05, 0xdaf3f046, 0x88d8762c, 0xab0e93b7, 0xd5d238a5, 0x85a36367, |
108 | 0xa70c3c41 |
109 | }; |
110 | static const int strtof_powers_ten_e[55] = { |
111 | -31, -28, -25, -22, -18, -15, -12, -8, -5, -2, |
112 | 2, 5, 8, 12, 15, 18, 22, 25, 28, 32, 35, 38, 42, 45, 48, 52, 55, 58, 62, 65, |
113 | 68, 71, 75, 78, 81, 85, 88, 91, 95, 98, 101, 105, 108, 111, 115, 118, 121, |
114 | 125, 128, 131, 135, 138, 141, 145, 148 |
115 | }; |
116 | |
117 | static strtof_fp_t |
118 | strtof_cached_power(int i) |
119 | { |
120 | strtof_fp_t result; |
121 | assert (i >= 0 && i <= 54); |
122 | result.f = strtof_powers_ten[i]; |
123 | result.e = strtof_powers_ten_e[i]; |
124 | return result; |
125 | } |
126 | |
127 | /* Find number of leading zero bits in an uint32_t. Derived from the |
128 | "Bit Twiddling Hacks" at graphics.stanford.edu/~seander/bithacks.html. */ |
129 | static unsigned char clz_table[256] = { |
130 | 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, |
131 | # define sixteen_times(N) N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, |
132 | sixteen_times (3) sixteen_times (2) sixteen_times (2) |
133 | sixteen_times (1) sixteen_times (1) sixteen_times (1) sixteen_times (1) |
134 | /* Zero for the rest. */ |
135 | }; |
136 | static unsigned |
137 | leading_zeros (uint32_t x) |
138 | { |
139 | unsigned tmp1, tmp2; |
140 | |
141 | tmp1 = x >> 16; |
142 | if (tmp1) |
143 | { |
144 | tmp2 = tmp1 >> 8; |
145 | if (tmp2) |
146 | return clz_table[tmp2]; |
147 | else |
148 | return 8 + clz_table[tmp1]; |
149 | } |
150 | else |
151 | { |
152 | tmp1 = x >> 8; |
153 | if (tmp1) |
154 | return 16 + clz_table[tmp1]; |
155 | else |
156 | return 24 + clz_table[x]; |
157 | } |
158 | } |
159 | |
160 | static strtof_fp_t |
161 | uint32_to_diy (uint32_t x) |
162 | { |
163 | strtof_fp_t result = {x, 0}; |
164 | unsigned shift = leading_zeros(x); |
165 | |
166 | result.f <<= shift; |
167 | result.e -= shift; |
168 | return result; |
169 | } |
170 | |
171 | #define SP_SIGNIFICAND_SIZE 23 |
172 | #define SP_EXPONENT_BIAS (127 + SP_SIGNIFICAND_SIZE) |
173 | #define SP_MIN_EXPONENT (-SP_EXPONENT_BIAS) |
174 | #define SP_EXPONENT_MASK 0x7f800000 |
175 | #define SP_SIGNIFICAND_MASK 0x7fffff |
176 | #define SP_HIDDEN_BIT 0x800000 /* 2^23 */ |
177 | |
178 | /* Convert normalized strtof_fp_t to IEEE-754 single with 'round to even'. |
179 | See "Implementing IEEE 754-2008 Rounding" in the |
180 | "Handbook of Floating-Point Arithmetik". |
181 | */ |
182 | static float |
183 | diy_to_float(strtof_fp_t x, int negative) |
184 | { |
185 | uint32_t result; |
186 | union |
187 | { |
188 | float f; |
189 | uint32_t n; |
190 | } tmp; |
191 | |
192 | assert(x.f & 0x80000000); |
193 | |
194 | /* We have 2^32 - 2^7 = 0xffffff80. */ |
195 | if (x.e > 96 || (x.e == 96 && x.f >= 0xffffff80)) |
196 | { |
197 | /* Overflow. Set result to infinity. */ |
198 | errno = ERANGE; |
199 | result = 0xff << SP_SIGNIFICAND_SIZE; |
200 | } |
201 | /* We have 2^32 - 2^8 = 0xffffff00. */ |
202 | else if (x.e > -158) |
203 | { |
204 | /* x is greater or equal to FLT_MAX. So we get a normalized number. */ |
205 | result = (uint32_t) (x.e + 158) << SP_SIGNIFICAND_SIZE; |
206 | result |= (x.f >> 8) & SP_SIGNIFICAND_MASK; |
207 | |
208 | if (x.f & 0x80) |
209 | { |
210 | /* Round-bit is set. */ |
211 | if (x.f & 0x7f) |
212 | /* Sticky-bit is set. */ |
213 | ++result; |
214 | else if (x.f & 0x100) |
215 | /* Significand is odd. */ |
216 | ++result; |
217 | } |
218 | } |
219 | else if (x.e == -158 && x.f >= 0xffffff00) |
220 | { |
221 | /* x is in the range (2^32, 2^32 - 2^8] * 2^-158, so its smaller than |
222 | FLT_MIN but still rounds to it. */ |
223 | result = 1U << SP_SIGNIFICAND_SIZE; |
224 | } |
225 | else if (x.e > -181) |
226 | { |
227 | /* Non-zero Denormal. */ |
228 | int shift = -149 - x.e; /* 9 <= shift <= 31. */ |
229 | |
230 | result = x.f >> shift; |
231 | |
232 | if (x.f & (1U << (shift - 1))) |
233 | /* Round-bit is set. */ |
234 | { |
235 | if (x.f & ((1U << (shift - 1)) - 1)) |
236 | /* Sticky-bit is set. */ |
237 | ++result; |
238 | else if (x.f & 1U << shift) |
239 | /* Significand is odd. */ |
240 | ++result; |
241 | } |
242 | } |
243 | else if (x.e == -181 && x.f > 0x80000000) |
244 | { |
245 | /* x is in the range (0.5,1) * 2^-149 so it rounds to the smallest |
246 | denormal. Can't handle this in the previous case as shifting a |
247 | uint32_t 32 bits to the right is undefined behaviour. */ |
248 | result = 1; |
249 | } |
250 | else |
251 | { |
252 | /* Underflow. */ |
253 | errno = ERANGE; |
254 | result = 0; |
255 | } |
256 | |
257 | if (negative) |
258 | result |= 0x80000000; |
259 | |
260 | tmp.n = result; |
261 | return tmp.f; |
262 | } |
263 | |
264 | static float |
265 | scale_integer_to_float(uint32_t M, int N, int negative) |
266 | { |
267 | strtof_fp_t result, x, power; |
268 | |
269 | if (M == 0) |
270 | return negative ? -0.f : 0.f; |
271 | if (N > 38) |
272 | { |
273 | /* Overflow. */ |
274 | errno = ERANGE; |
275 | return negative ? -INFINITY : INFINITY; |
276 | } |
277 | if (N < -54) |
278 | { |
279 | /* Underflow. */ |
280 | errno = ERANGE; |
281 | return negative ? -0.f : 0.f; |
282 | } |
283 | /* If N is in the range {-13, ..., 13} the conversion is exact. |
284 | Try to scale N into this region. */ |
285 | while (N > 13 && M <= 0xffffffff / 10) |
286 | { |
287 | M *= 10; |
288 | --N; |
289 | } |
290 | |
291 | while (N < -13 && M % 10 == 0) |
292 | { |
293 | M /= 10; |
294 | ++N; |
295 | } |
296 | |
297 | x = uint32_to_diy (M); |
298 | if (N >= 0) |
299 | { |
300 | power = strtof_cached_power(N); |
301 | result = strtof_multiply(x, power); |
302 | } |
303 | else |
304 | { |
305 | power = strtof_cached_power(-N); |
306 | result = divide(x, power); |
307 | } |
308 | |
309 | return diy_to_float(result, negative); |
310 | } |
311 | |
312 | /* Return non-zero if *s starts with string (must be uppercase), ignoring case, |
313 | and increment *s by its length. */ |
314 | static int |
315 | starts_with(const char **s, const char *string) |
316 | { |
317 | const char *x = *s, *y = string; |
318 | while (*x && *y && (*x == *y || *x == *y + 32)) |
319 | ++x, ++y; |
320 | if (*y == 0) |
321 | { |
322 | /* Match. */ |
323 | *s = x; |
324 | return 1; |
325 | } |
326 | else |
327 | return 0; |
328 | } |
329 | #define SET_TAILPTR(tailptr, s) \ |
330 | do \ |
331 | if (tailptr) \ |
332 | *tailptr = (char *) s; \ |
333 | while (0) |
334 | |
335 | /* |
336 | Locale-independent decimal to binary |
337 | conversion. On overflow return (-)INFINITY and set errno to ERANGE. On |
338 | underflow return 0 and set errno to ERANGE. Special inputs (case |
339 | insensitive): "NAN", "INF" or "INFINITY". |
340 | */ |
341 | float |
342 | fz_strtof(const char *string, char **tailptr) |
343 | { |
344 | /* FIXME: error (1/2 + 1/256) ulp */ |
345 | const char *s; |
346 | uint32_t M = 0; |
347 | int N = 0; |
348 | /* If decimal_digits gets 9 we truncate all following digits. */ |
349 | int decimal_digits = 0; |
350 | int negative = 0; |
351 | const char *number_start = 0; |
352 | |
353 | /* Skip leading whitespace (isspace in "C" locale). */ |
354 | s = string; |
355 | while (*s == ' ' || *s == '\f' || *s == '\n' || *s == '\r' || *s == '\t' || *s == '\v') |
356 | ++s; |
357 | |
358 | /* Parse sign. */ |
359 | if (*s == '+') |
360 | ++s; |
361 | if (*s == '-') |
362 | { |
363 | negative = 1; |
364 | ++s; |
365 | } |
366 | number_start = s; |
367 | /* Parse digits before decimal point. */ |
368 | while (*s >= '0' && *s <= '9') |
369 | { |
370 | if (decimal_digits) |
371 | { |
372 | if (decimal_digits < 9) |
373 | { |
374 | ++decimal_digits; |
375 | M = M * 10 + *s - '0'; |
376 | } |
377 | /* Really arcane strings might overflow N. */ |
378 | else if (N < 1000) |
379 | ++N; |
380 | } |
381 | else if (*s > '0') |
382 | { |
383 | M = *s - '0'; |
384 | ++decimal_digits; |
385 | } |
386 | ++s; |
387 | } |
388 | |
389 | /* Parse decimal point. */ |
390 | if (*s == '.') |
391 | ++s; |
392 | |
393 | /* Parse digits after decimal point. */ |
394 | while (*s >= '0' && *s <= '9') |
395 | { |
396 | if (decimal_digits < 9) |
397 | { |
398 | if (decimal_digits || *s > '0') |
399 | { |
400 | ++decimal_digits; |
401 | M = M * 10 + *s - '0'; |
402 | } |
403 | --N; |
404 | } |
405 | ++s; |
406 | } |
407 | if ((s == number_start + 1 && *number_start == '.') || number_start == s) |
408 | { |
409 | /* No Number. Check for INF and NAN strings. */ |
410 | s = number_start; |
411 | if (starts_with(&s, "INFINITY" ) || starts_with(&s, "INF" )) |
412 | { |
413 | errno = ERANGE; |
414 | SET_TAILPTR(tailptr, s); |
415 | return negative ? -INFINITY : +INFINITY; |
416 | } |
417 | else if (starts_with(&s, "NAN" )) |
418 | { |
419 | SET_TAILPTR(tailptr, s); |
420 | return (float)NAN; |
421 | } |
422 | else |
423 | { |
424 | SET_TAILPTR(tailptr, string); |
425 | return 0.f; |
426 | } |
427 | } |
428 | |
429 | /* Parse exponent. */ |
430 | if (*s == 'e' || *s == 'E') |
431 | { |
432 | int exp_negative = 0; |
433 | int exp = 0; |
434 | const char *int_start; |
435 | const char *exp_start = s; |
436 | |
437 | ++s; |
438 | if (*s == '+') |
439 | ++s; |
440 | else if (*s == '-') |
441 | { |
442 | ++s; |
443 | exp_negative = 1; |
444 | } |
445 | int_start = s; |
446 | /* Parse integer. */ |
447 | while (*s >= '0' && *s <= '9') |
448 | { |
449 | /* Make sure exp does not get overflowed. */ |
450 | if (exp < 100) |
451 | exp = exp * 10 + *s - '0'; |
452 | ++s; |
453 | } |
454 | if (exp_negative) |
455 | exp = -exp; |
456 | if (s == int_start) |
457 | /* No Number. */ |
458 | s = exp_start; |
459 | else |
460 | N += exp; |
461 | } |
462 | |
463 | SET_TAILPTR(tailptr, s); |
464 | return scale_integer_to_float(M, N, negative); |
465 | } |
466 | |