| 1 | #include "mupdf/fitz.h" |
| 2 | |
| 3 | #include <assert.h> |
| 4 | #include <errno.h> |
| 5 | #include <float.h> |
| 6 | |
| 7 | #ifndef INFINITY |
| 8 | #define INFINITY (DBL_MAX+DBL_MAX) |
| 9 | #endif |
| 10 | #ifndef NAN |
| 11 | #define NAN (INFINITY-INFINITY) |
| 12 | #endif |
| 13 | |
| 14 | /* |
| 15 | We use "Algorithm D" from "Contributions to a Proposed Standard for Binary |
| 16 | Floating-Point Arithmetic" by Jerome Coonen (1984). |
| 17 | |
| 18 | The implementation uses a self-made floating point type, 'strtof_fp_t', with |
| 19 | a 32-bit significand. The steps of the algorithm are |
| 20 | |
| 21 | INPUT: Up to 9 decimal digits d1, ... d9 and an exponent dexp. |
| 22 | OUTPUT: A float corresponding to the number d1 ... d9 * 10^dexp. |
| 23 | |
| 24 | 1) Convert the integer d1 ... d9 to an strtof_fp_t x. |
| 25 | 2) Lookup the strtof_fp_t power = 10 ^ |dexp|. |
| 26 | 3) If dexp is positive set x = x * power, else set x = x / power. Use rounding mode 'round to odd'. |
| 27 | 4) Round x to a float using rounding mode 'to even'. |
| 28 | |
| 29 | Step 1) is always lossless as the strtof_fp_t's significand can hold a 9-digit integer. |
| 30 | In the case |dexp| <= 13 the cached power is exact and the algorithm returns |
| 31 | the exactly rounded result (with rounding mode 'to even'). |
| 32 | There is no double-rounding in 3), 4) as the multiply/divide uses 'round to odd'. |
| 33 | |
| 34 | For |dexp| > 13 the maximum error is bounded by (1/2 + 1/256) ulp. |
| 35 | This is small enough to ensure that binary to decimal to binary conversion |
| 36 | is the identity if the decimal format uses 9 correctly rounded significant digits. |
| 37 | */ |
| 38 | typedef struct strtof_fp_t |
| 39 | { |
| 40 | uint32_t f; |
| 41 | int e; |
| 42 | } strtof_fp_t; |
| 43 | |
| 44 | /* Multiply/Divide x by y with 'round to odd'. Assume that x and y are normalized. */ |
| 45 | |
| 46 | static strtof_fp_t |
| 47 | strtof_multiply(strtof_fp_t x, strtof_fp_t y) |
| 48 | { |
| 49 | uint64_t tmp; |
| 50 | strtof_fp_t res; |
| 51 | |
| 52 | assert(x.f & y.f & 0x80000000); |
| 53 | |
| 54 | res.e = x.e + y.e + 32; |
| 55 | tmp = (uint64_t) x.f * y.f; |
| 56 | /* Normalize. */ |
| 57 | if ((tmp < ((uint64_t) 1 << 63))) |
| 58 | { |
| 59 | tmp <<= 1; |
| 60 | --res.e; |
| 61 | } |
| 62 | |
| 63 | res.f = tmp >> 32; |
| 64 | |
| 65 | /* Set the last bit of the significand to 1 if the result is |
| 66 | inexact. */ |
| 67 | if (tmp & 0xffffffff) |
| 68 | res.f |= 1; |
| 69 | return res; |
| 70 | } |
| 71 | |
| 72 | static strtof_fp_t |
| 73 | divide(strtof_fp_t x, strtof_fp_t y) |
| 74 | { |
| 75 | uint64_t product, quotient; |
| 76 | uint32_t remainder; |
| 77 | strtof_fp_t res; |
| 78 | |
| 79 | res.e = x.e - y.e - 32; |
| 80 | product = (uint64_t) x.f << 32; |
| 81 | quotient = product / y.f; |
| 82 | remainder = product % y.f; |
| 83 | /* 2^31 <= quotient <= 2^33 - 2. */ |
| 84 | if (quotient <= 0xffffffff) |
| 85 | res.f = quotient; |
| 86 | else |
| 87 | { |
| 88 | ++res.e; |
| 89 | /* If quotient % 2 != 0 we have remainder != 0. */ |
| 90 | res.f = quotient >> 1; |
| 91 | } |
| 92 | if (remainder) |
| 93 | res.f |= 1; |
| 94 | return res; |
| 95 | } |
| 96 | |
| 97 | /* From 10^0 to 10^54. Generated with GNU MPFR. */ |
| 98 | static const uint32_t strtof_powers_ten[55] = { |
| 99 | 0x80000000, 0xa0000000, 0xc8000000, 0xfa000000, 0x9c400000, 0xc3500000, |
| 100 | 0xf4240000, 0x98968000, 0xbebc2000, 0xee6b2800, 0x9502f900, 0xba43b740, |
| 101 | 0xe8d4a510, 0x9184e72a, 0xb5e620f4, 0xe35fa932, 0x8e1bc9bf, 0xb1a2bc2f, |
| 102 | 0xde0b6b3a, 0x8ac72305, 0xad78ebc6, 0xd8d726b7, 0x87867832, 0xa968163f, |
| 103 | 0xd3c21bcf, 0x84595161, 0xa56fa5ba, 0xcecb8f28, 0x813f3979, 0xa18f07d7, |
| 104 | 0xc9f2c9cd, 0xfc6f7c40, 0x9dc5ada8, 0xc5371912, 0xf684df57, 0x9a130b96, |
| 105 | 0xc097ce7c, 0xf0bdc21b, 0x96769951, 0xbc143fa5, 0xeb194f8e, 0x92efd1b9, |
| 106 | 0xb7abc627, 0xe596b7b1, 0x8f7e32ce, 0xb35dbf82, 0xe0352f63, 0x8c213d9e, |
| 107 | 0xaf298d05, 0xdaf3f046, 0x88d8762c, 0xab0e93b7, 0xd5d238a5, 0x85a36367, |
| 108 | 0xa70c3c41 |
| 109 | }; |
| 110 | static const int strtof_powers_ten_e[55] = { |
| 111 | -31, -28, -25, -22, -18, -15, -12, -8, -5, -2, |
| 112 | 2, 5, 8, 12, 15, 18, 22, 25, 28, 32, 35, 38, 42, 45, 48, 52, 55, 58, 62, 65, |
| 113 | 68, 71, 75, 78, 81, 85, 88, 91, 95, 98, 101, 105, 108, 111, 115, 118, 121, |
| 114 | 125, 128, 131, 135, 138, 141, 145, 148 |
| 115 | }; |
| 116 | |
| 117 | static strtof_fp_t |
| 118 | strtof_cached_power(int i) |
| 119 | { |
| 120 | strtof_fp_t result; |
| 121 | assert (i >= 0 && i <= 54); |
| 122 | result.f = strtof_powers_ten[i]; |
| 123 | result.e = strtof_powers_ten_e[i]; |
| 124 | return result; |
| 125 | } |
| 126 | |
| 127 | /* Find number of leading zero bits in an uint32_t. Derived from the |
| 128 | "Bit Twiddling Hacks" at graphics.stanford.edu/~seander/bithacks.html. */ |
| 129 | static unsigned char clz_table[256] = { |
| 130 | 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, |
| 131 | # define sixteen_times(N) N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, |
| 132 | sixteen_times (3) sixteen_times (2) sixteen_times (2) |
| 133 | sixteen_times (1) sixteen_times (1) sixteen_times (1) sixteen_times (1) |
| 134 | /* Zero for the rest. */ |
| 135 | }; |
| 136 | static unsigned |
| 137 | leading_zeros (uint32_t x) |
| 138 | { |
| 139 | unsigned tmp1, tmp2; |
| 140 | |
| 141 | tmp1 = x >> 16; |
| 142 | if (tmp1) |
| 143 | { |
| 144 | tmp2 = tmp1 >> 8; |
| 145 | if (tmp2) |
| 146 | return clz_table[tmp2]; |
| 147 | else |
| 148 | return 8 + clz_table[tmp1]; |
| 149 | } |
| 150 | else |
| 151 | { |
| 152 | tmp1 = x >> 8; |
| 153 | if (tmp1) |
| 154 | return 16 + clz_table[tmp1]; |
| 155 | else |
| 156 | return 24 + clz_table[x]; |
| 157 | } |
| 158 | } |
| 159 | |
| 160 | static strtof_fp_t |
| 161 | uint32_to_diy (uint32_t x) |
| 162 | { |
| 163 | strtof_fp_t result = {x, 0}; |
| 164 | unsigned shift = leading_zeros(x); |
| 165 | |
| 166 | result.f <<= shift; |
| 167 | result.e -= shift; |
| 168 | return result; |
| 169 | } |
| 170 | |
| 171 | #define SP_SIGNIFICAND_SIZE 23 |
| 172 | #define SP_EXPONENT_BIAS (127 + SP_SIGNIFICAND_SIZE) |
| 173 | #define SP_MIN_EXPONENT (-SP_EXPONENT_BIAS) |
| 174 | #define SP_EXPONENT_MASK 0x7f800000 |
| 175 | #define SP_SIGNIFICAND_MASK 0x7fffff |
| 176 | #define SP_HIDDEN_BIT 0x800000 /* 2^23 */ |
| 177 | |
| 178 | /* Convert normalized strtof_fp_t to IEEE-754 single with 'round to even'. |
| 179 | See "Implementing IEEE 754-2008 Rounding" in the |
| 180 | "Handbook of Floating-Point Arithmetik". |
| 181 | */ |
| 182 | static float |
| 183 | diy_to_float(strtof_fp_t x, int negative) |
| 184 | { |
| 185 | uint32_t result; |
| 186 | union |
| 187 | { |
| 188 | float f; |
| 189 | uint32_t n; |
| 190 | } tmp; |
| 191 | |
| 192 | assert(x.f & 0x80000000); |
| 193 | |
| 194 | /* We have 2^32 - 2^7 = 0xffffff80. */ |
| 195 | if (x.e > 96 || (x.e == 96 && x.f >= 0xffffff80)) |
| 196 | { |
| 197 | /* Overflow. Set result to infinity. */ |
| 198 | errno = ERANGE; |
| 199 | result = 0xff << SP_SIGNIFICAND_SIZE; |
| 200 | } |
| 201 | /* We have 2^32 - 2^8 = 0xffffff00. */ |
| 202 | else if (x.e > -158) |
| 203 | { |
| 204 | /* x is greater or equal to FLT_MAX. So we get a normalized number. */ |
| 205 | result = (uint32_t) (x.e + 158) << SP_SIGNIFICAND_SIZE; |
| 206 | result |= (x.f >> 8) & SP_SIGNIFICAND_MASK; |
| 207 | |
| 208 | if (x.f & 0x80) |
| 209 | { |
| 210 | /* Round-bit is set. */ |
| 211 | if (x.f & 0x7f) |
| 212 | /* Sticky-bit is set. */ |
| 213 | ++result; |
| 214 | else if (x.f & 0x100) |
| 215 | /* Significand is odd. */ |
| 216 | ++result; |
| 217 | } |
| 218 | } |
| 219 | else if (x.e == -158 && x.f >= 0xffffff00) |
| 220 | { |
| 221 | /* x is in the range (2^32, 2^32 - 2^8] * 2^-158, so its smaller than |
| 222 | FLT_MIN but still rounds to it. */ |
| 223 | result = 1U << SP_SIGNIFICAND_SIZE; |
| 224 | } |
| 225 | else if (x.e > -181) |
| 226 | { |
| 227 | /* Non-zero Denormal. */ |
| 228 | int shift = -149 - x.e; /* 9 <= shift <= 31. */ |
| 229 | |
| 230 | result = x.f >> shift; |
| 231 | |
| 232 | if (x.f & (1U << (shift - 1))) |
| 233 | /* Round-bit is set. */ |
| 234 | { |
| 235 | if (x.f & ((1U << (shift - 1)) - 1)) |
| 236 | /* Sticky-bit is set. */ |
| 237 | ++result; |
| 238 | else if (x.f & 1U << shift) |
| 239 | /* Significand is odd. */ |
| 240 | ++result; |
| 241 | } |
| 242 | } |
| 243 | else if (x.e == -181 && x.f > 0x80000000) |
| 244 | { |
| 245 | /* x is in the range (0.5,1) * 2^-149 so it rounds to the smallest |
| 246 | denormal. Can't handle this in the previous case as shifting a |
| 247 | uint32_t 32 bits to the right is undefined behaviour. */ |
| 248 | result = 1; |
| 249 | } |
| 250 | else |
| 251 | { |
| 252 | /* Underflow. */ |
| 253 | errno = ERANGE; |
| 254 | result = 0; |
| 255 | } |
| 256 | |
| 257 | if (negative) |
| 258 | result |= 0x80000000; |
| 259 | |
| 260 | tmp.n = result; |
| 261 | return tmp.f; |
| 262 | } |
| 263 | |
| 264 | static float |
| 265 | scale_integer_to_float(uint32_t M, int N, int negative) |
| 266 | { |
| 267 | strtof_fp_t result, x, power; |
| 268 | |
| 269 | if (M == 0) |
| 270 | return negative ? -0.f : 0.f; |
| 271 | if (N > 38) |
| 272 | { |
| 273 | /* Overflow. */ |
| 274 | errno = ERANGE; |
| 275 | return negative ? -INFINITY : INFINITY; |
| 276 | } |
| 277 | if (N < -54) |
| 278 | { |
| 279 | /* Underflow. */ |
| 280 | errno = ERANGE; |
| 281 | return negative ? -0.f : 0.f; |
| 282 | } |
| 283 | /* If N is in the range {-13, ..., 13} the conversion is exact. |
| 284 | Try to scale N into this region. */ |
| 285 | while (N > 13 && M <= 0xffffffff / 10) |
| 286 | { |
| 287 | M *= 10; |
| 288 | --N; |
| 289 | } |
| 290 | |
| 291 | while (N < -13 && M % 10 == 0) |
| 292 | { |
| 293 | M /= 10; |
| 294 | ++N; |
| 295 | } |
| 296 | |
| 297 | x = uint32_to_diy (M); |
| 298 | if (N >= 0) |
| 299 | { |
| 300 | power = strtof_cached_power(N); |
| 301 | result = strtof_multiply(x, power); |
| 302 | } |
| 303 | else |
| 304 | { |
| 305 | power = strtof_cached_power(-N); |
| 306 | result = divide(x, power); |
| 307 | } |
| 308 | |
| 309 | return diy_to_float(result, negative); |
| 310 | } |
| 311 | |
| 312 | /* Return non-zero if *s starts with string (must be uppercase), ignoring case, |
| 313 | and increment *s by its length. */ |
| 314 | static int |
| 315 | starts_with(const char **s, const char *string) |
| 316 | { |
| 317 | const char *x = *s, *y = string; |
| 318 | while (*x && *y && (*x == *y || *x == *y + 32)) |
| 319 | ++x, ++y; |
| 320 | if (*y == 0) |
| 321 | { |
| 322 | /* Match. */ |
| 323 | *s = x; |
| 324 | return 1; |
| 325 | } |
| 326 | else |
| 327 | return 0; |
| 328 | } |
| 329 | #define SET_TAILPTR(tailptr, s) \ |
| 330 | do \ |
| 331 | if (tailptr) \ |
| 332 | *tailptr = (char *) s; \ |
| 333 | while (0) |
| 334 | |
| 335 | /* |
| 336 | Locale-independent decimal to binary |
| 337 | conversion. On overflow return (-)INFINITY and set errno to ERANGE. On |
| 338 | underflow return 0 and set errno to ERANGE. Special inputs (case |
| 339 | insensitive): "NAN", "INF" or "INFINITY". |
| 340 | */ |
| 341 | float |
| 342 | fz_strtof(const char *string, char **tailptr) |
| 343 | { |
| 344 | /* FIXME: error (1/2 + 1/256) ulp */ |
| 345 | const char *s; |
| 346 | uint32_t M = 0; |
| 347 | int N = 0; |
| 348 | /* If decimal_digits gets 9 we truncate all following digits. */ |
| 349 | int decimal_digits = 0; |
| 350 | int negative = 0; |
| 351 | const char *number_start = 0; |
| 352 | |
| 353 | /* Skip leading whitespace (isspace in "C" locale). */ |
| 354 | s = string; |
| 355 | while (*s == ' ' || *s == '\f' || *s == '\n' || *s == '\r' || *s == '\t' || *s == '\v') |
| 356 | ++s; |
| 357 | |
| 358 | /* Parse sign. */ |
| 359 | if (*s == '+') |
| 360 | ++s; |
| 361 | if (*s == '-') |
| 362 | { |
| 363 | negative = 1; |
| 364 | ++s; |
| 365 | } |
| 366 | number_start = s; |
| 367 | /* Parse digits before decimal point. */ |
| 368 | while (*s >= '0' && *s <= '9') |
| 369 | { |
| 370 | if (decimal_digits) |
| 371 | { |
| 372 | if (decimal_digits < 9) |
| 373 | { |
| 374 | ++decimal_digits; |
| 375 | M = M * 10 + *s - '0'; |
| 376 | } |
| 377 | /* Really arcane strings might overflow N. */ |
| 378 | else if (N < 1000) |
| 379 | ++N; |
| 380 | } |
| 381 | else if (*s > '0') |
| 382 | { |
| 383 | M = *s - '0'; |
| 384 | ++decimal_digits; |
| 385 | } |
| 386 | ++s; |
| 387 | } |
| 388 | |
| 389 | /* Parse decimal point. */ |
| 390 | if (*s == '.') |
| 391 | ++s; |
| 392 | |
| 393 | /* Parse digits after decimal point. */ |
| 394 | while (*s >= '0' && *s <= '9') |
| 395 | { |
| 396 | if (decimal_digits < 9) |
| 397 | { |
| 398 | if (decimal_digits || *s > '0') |
| 399 | { |
| 400 | ++decimal_digits; |
| 401 | M = M * 10 + *s - '0'; |
| 402 | } |
| 403 | --N; |
| 404 | } |
| 405 | ++s; |
| 406 | } |
| 407 | if ((s == number_start + 1 && *number_start == '.') || number_start == s) |
| 408 | { |
| 409 | /* No Number. Check for INF and NAN strings. */ |
| 410 | s = number_start; |
| 411 | if (starts_with(&s, "INFINITY" ) || starts_with(&s, "INF" )) |
| 412 | { |
| 413 | errno = ERANGE; |
| 414 | SET_TAILPTR(tailptr, s); |
| 415 | return negative ? -INFINITY : +INFINITY; |
| 416 | } |
| 417 | else if (starts_with(&s, "NAN" )) |
| 418 | { |
| 419 | SET_TAILPTR(tailptr, s); |
| 420 | return (float)NAN; |
| 421 | } |
| 422 | else |
| 423 | { |
| 424 | SET_TAILPTR(tailptr, string); |
| 425 | return 0.f; |
| 426 | } |
| 427 | } |
| 428 | |
| 429 | /* Parse exponent. */ |
| 430 | if (*s == 'e' || *s == 'E') |
| 431 | { |
| 432 | int exp_negative = 0; |
| 433 | int exp = 0; |
| 434 | const char *int_start; |
| 435 | const char *exp_start = s; |
| 436 | |
| 437 | ++s; |
| 438 | if (*s == '+') |
| 439 | ++s; |
| 440 | else if (*s == '-') |
| 441 | { |
| 442 | ++s; |
| 443 | exp_negative = 1; |
| 444 | } |
| 445 | int_start = s; |
| 446 | /* Parse integer. */ |
| 447 | while (*s >= '0' && *s <= '9') |
| 448 | { |
| 449 | /* Make sure exp does not get overflowed. */ |
| 450 | if (exp < 100) |
| 451 | exp = exp * 10 + *s - '0'; |
| 452 | ++s; |
| 453 | } |
| 454 | if (exp_negative) |
| 455 | exp = -exp; |
| 456 | if (s == int_start) |
| 457 | /* No Number. */ |
| 458 | s = exp_start; |
| 459 | else |
| 460 | N += exp; |
| 461 | } |
| 462 | |
| 463 | SET_TAILPTR(tailptr, s); |
| 464 | return scale_integer_to_float(M, N, negative); |
| 465 | } |
| 466 | |