strtof.c source code [MuPDF/source/fitz/strtof.c]

1	#include "mupdf/fitz.h"
2
3	#include <assert.h>
4	#include <errno.h>
5	#include <float.h>
6
7	#ifndef INFINITY
8	#define INFINITY (DBL_MAX+DBL_MAX)
9	#endif
10	#ifndef NAN
11	#define NAN (INFINITY-INFINITY)
12	#endif
13
14	/*
15	We use "Algorithm D" from "Contributions to a Proposed Standard for Binary
16	Floating-Point Arithmetic" by Jerome Coonen (1984).
17
18	The implementation uses a self-made floating point type, 'strtof_fp_t', with
19	a 32-bit significand. The steps of the algorithm are
20
21	INPUT: Up to 9 decimal digits d1, ... d9 and an exponent dexp.
22	OUTPUT: A float corresponding to the number d1 ... d9 10^dexp.*
23
24	1) Convert the integer d1 ... d9 to an strtof_fp_t x.
25	2) Lookup the strtof_fp_t power = 10 ^ \|dexp\|.
26	3) If dexp is positive set x = x power, else set x = x / power. Use rounding mode 'round to odd'.*
27	4) Round x to a float using rounding mode 'to even'.
28
29	Step 1) is always lossless as the strtof_fp_t's significand can hold a 9-digit integer.
30	In the case \|dexp\| <= 13 the cached power is exact and the algorithm returns
31	the exactly rounded result (with rounding mode 'to even').
32	There is no double-rounding in 3), 4) as the multiply/divide uses 'round to odd'.
33
34	For \|dexp\| > 13 the maximum error is bounded by (1/2 + 1/256) ulp.
35	This is small enough to ensure that binary to decimal to binary conversion
36	is the identity if the decimal format uses 9 correctly rounded significant digits.
37	*/
38	typedef struct strtof_fp_t
39	{
40	uint32_t f;
41	int e;
42	} strtof_fp_t;
43
44	/ Multiply/Divide x by y with 'round to odd'. Assume that x and y are normalized. /
45
46	static strtof_fp_t
47	strtof_multiply(strtof_fp_t x, strtof_fp_t y)
48	{
49	uint64_t tmp;
50	strtof_fp_t res;
51
52	assert(x.f & y.f & `0x80000000`);
53
54	res.e = x.e + y.e + `32`;
55	tmp = (uint64_t) x.f * y.f;
56	/ Normalize. /
57	if ((tmp < ((uint64_t) `1` << `63`)))
58	{
59	tmp <<= `1`;
60	--res.e;
61	}
62
63	res.f = tmp >> `32`;
64
65	/ Set the last bit of the significand to 1 if the result is*
66	inexact. /*
67	if (tmp & `0xffffffff`)
68	res.f \|= `1`;
69	return res;
70	}
71
72	static strtof_fp_t
73	divide(strtof_fp_t x, strtof_fp_t y)
74	{
75	uint64_t product, quotient;
76	uint32_t remainder;
77	strtof_fp_t res;
78
79	res.e = x.e - y.e - `32`;
80	product = (uint64_t) x.f << `32`;
81	quotient = product / y.f;
82	remainder = product % y.f;
83	/ 2^31 <= quotient <= 2^33 - 2. /
84	if (quotient <= `0xffffffff`)
85	res.f = quotient;
86	else
87	{
88	++res.e;
89	/ If quotient % 2 != 0 we have remainder != 0. /
90	res.f = quotient >> `1`;
91	}
92	if (remainder)
93	res.f \|= `1`;
94	return res;
95	}
96
97	/ From 10^0 to 10^54. Generated with GNU MPFR. /
98	static const uint32_t strtof_powers_ten[`55`] = {
99	`0x80000000`, `0xa0000000`, `0xc8000000`, `0xfa000000`, `0x9c400000`, `0xc3500000`,
100	`0xf4240000`, `0x98968000`, `0xbebc2000`, `0xee6b2800`, `0x9502f900`, `0xba43b740`,
101	`0xe8d4a510`, `0x9184e72a`, `0xb5e620f4`, `0xe35fa932`, `0x8e1bc9bf`, `0xb1a2bc2f`,
102	`0xde0b6b3a`, `0x8ac72305`, `0xad78ebc6`, `0xd8d726b7`, `0x87867832`, `0xa968163f`,
103	`0xd3c21bcf`, `0x84595161`, `0xa56fa5ba`, `0xcecb8f28`, `0x813f3979`, `0xa18f07d7`,
104	`0xc9f2c9cd`, `0xfc6f7c40`, `0x9dc5ada8`, `0xc5371912`, `0xf684df57`, `0x9a130b96`,
105	`0xc097ce7c`, `0xf0bdc21b`, `0x96769951`, `0xbc143fa5`, `0xeb194f8e`, `0x92efd1b9`,
106	`0xb7abc627`, `0xe596b7b1`, `0x8f7e32ce`, `0xb35dbf82`, `0xe0352f63`, `0x8c213d9e`,
107	`0xaf298d05`, `0xdaf3f046`, `0x88d8762c`, `0xab0e93b7`, `0xd5d238a5`, `0x85a36367`,
108	`0xa70c3c41`
109	};
110	static const int strtof_powers_ten_e[`55`] = {
111	-`31`, -`28`, -`25`, -`22`, -`18`, -`15`, -`12`, -`8`, -`5`, -`2`,
112	`2`, `5`, `8`, `12`, `15`, `18`, `22`, `25`, `28`, `32`, `35`, `38`, `42`, `45`, `48`, `52`, `55`, `58`, `62`, `65`,
113	`68`, `71`, `75`, `78`, `81`, `85`, `88`, `91`, `95`, `98`, `101`, `105`, `108`, `111`, `115`, `118`, `121`,
114	`125`, `128`, `131`, `135`, `138`, `141`, `145`, `148`
115	};
116
117	static strtof_fp_t
118	strtof_cached_power(int i)
119	{
120	strtof_fp_t result;
121	assert (i >= `0` && i <= `54`);
122	result.f = strtof_powers_ten[i];
123	result.e = strtof_powers_ten_e[i];
124	return result;
125	}
126
127	/ Find number of leading zero bits in an uint32_t. Derived from the*
128	"Bit Twiddling Hacks" at graphics.stanford.edu/~seander/bithacks.html. /*
129	static unsigned char clz_table[`256`] = {
130	`8`, `7`, `6`, `6`, `5`, `5`, `5`, `5`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
131	# define sixteen_times(N) N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
132	sixteen_times (`3`) sixteen_times (`2`) sixteen_times (`2`)
133	sixteen_times (`1`) sixteen_times (`1`) sixteen_times (`1`) sixteen_times (`1`)
134	/ Zero for the rest. /
135	};
136	static unsigned
137	leading_zeros (uint32_t x)
138	{
139	unsigned tmp1, tmp2;
140
141	tmp1 = x >> `16`;
142	if (tmp1)
143	{
144	tmp2 = tmp1 >> `8`;
145	if (tmp2)
146	return clz_table[tmp2];
147	else
148	return `8` + clz_table[tmp1];
149	}
150	else
151	{
152	tmp1 = x >> `8`;
153	if (tmp1)
154	return `16` + clz_table[tmp1];
155	else
156	return `24` + clz_table[x];
157	}
158	}
159
160	static strtof_fp_t
161	uint32_to_diy (uint32_t x)
162	{
163	strtof_fp_t result = {x, `0`};
164	unsigned shift = leading_zeros(x);
165
166	result.f <<= shift;
167	result.e -= shift;
168	return result;
169	}
170
171	#define SP_SIGNIFICAND_SIZE 23
172	#define SP_EXPONENT_BIAS (127 + SP_SIGNIFICAND_SIZE)
173	#define SP_MIN_EXPONENT (-SP_EXPONENT_BIAS)
174	#define SP_EXPONENT_MASK 0x7f800000
175	#define SP_SIGNIFICAND_MASK 0x7fffff
176	#define SP_HIDDEN_BIT 0x800000 /* 2^23 */
177
178	/ Convert normalized strtof_fp_t to IEEE-754 single with 'round to even'.*
179	See "Implementing IEEE 754-2008 Rounding" in the
180	"Handbook of Floating-Point Arithmetik".
181	*/
182	static float
183	diy_to_float(strtof_fp_t x, int negative)
184	{
185	uint32_t result;
186	union
187	{
188	float f;
189	uint32_t n;
190	} tmp;
191
192	assert(x.f & `0x80000000`);
193
194	/ We have 2^32 - 2^7 = 0xffffff80. /
195	if (x.e > `96` \|\| (x.e == `96` && x.f >= `0xffffff80`))
196	{
197	/ Overflow. Set result to infinity. /
198	errno = ERANGE;
199	result = `0xff` << SP_SIGNIFICAND_SIZE;
200	}
201	/ We have 2^32 - 2^8 = 0xffffff00. /
202	else if (x.e > -`158`)
203	{
204	/ x is greater or equal to FLT_MAX. So we get a normalized number. /
205	result = (uint32_t) (x.e + `158`) << SP_SIGNIFICAND_SIZE;
206	result \|= (x.f >> `8`) & SP_SIGNIFICAND_MASK;
207
208	if (x.f & `0x80`)
209	{
210	/ Round-bit is set. /
211	if (x.f & `0x7f`)
212	/ Sticky-bit is set. /
213	++result;
214	else if (x.f & `0x100`)
215	/ Significand is odd. /
216	++result;
217	}
218	}
219	else if (x.e == -`158` && x.f >= `0xffffff00`)
220	{
221	/ x is in the range (2^32, 2^32 - 2^8] * 2^-158, so its smaller than*
222	FLT_MIN but still rounds to it. /*
223	result = `1U` << SP_SIGNIFICAND_SIZE;
224	}
225	else if (x.e > -`181`)
226	{
227	/ Non-zero Denormal. /
228	int shift = -`149` - x.e; / 9 <= shift <= 31. /
229
230	result = x.f >> shift;
231
232	if (x.f & (`1U` << (shift - `1`)))
233	/ Round-bit is set. /
234	{
235	if (x.f & ((`1U` << (shift - `1`)) - `1`))
236	/ Sticky-bit is set. /
237	++result;
238	else if (x.f & `1U` << shift)
239	/ Significand is odd. /
240	++result;
241	}
242	}
243	else if (x.e == -`181` && x.f > `0x80000000`)
244	{
245	/ x is in the range (0.5,1) * 2^-149 so it rounds to the smallest*
246	denormal. Can't handle this in the previous case as shifting a
247	uint32_t 32 bits to the right is undefined behaviour. /*
248	result = `1`;
249	}
250	else
251	{
252	/ Underflow. /
253	errno = ERANGE;
254	result = `0`;
255	}
256
257	if (negative)
258	result \|= `0x80000000`;
259
260	tmp.n = result;
261	return tmp.f;
262	}
263
264	static float
265	scale_integer_to_float(uint32_t M, int N, int negative)
266	{
267	strtof_fp_t result, x, power;
268
269	if (M == `0`)
270	return negative ? -`0.f` : `0.f`;
271	if (N > `38`)
272	{
273	/ Overflow. /
274	errno = ERANGE;
275	return negative ? -INFINITY : INFINITY;
276	}
277	if (N < -`54`)
278	{
279	/ Underflow. /
280	errno = ERANGE;
281	return negative ? -`0.f` : `0.f`;
282	}
283	/ If N is in the range {-13, ..., 13} the conversion is exact.*
284	Try to scale N into this region. /*
285	while (N > `13` && M <= `0xffffffff` / `10`)
286	{
287	M *= `10`;
288	--N;
289	}
290
291	while (N < -`13` && M % `10` == `0`)
292	{
293	M /= `10`;
294	++N;
295	}
296
297	x = uint32_to_diy (M);
298	if (N >= `0`)
299	{
300	power = strtof_cached_power(N);
301	result = strtof_multiply(x, power);
302	}
303	else
304	{
305	power = strtof_cached_power(-N);
306	result = divide(x, power);
307	}
308
309	return diy_to_float(result, negative);
310	}
311
312	/ Return non-zero if s starts with string (must be uppercase), ignoring case,
313	and increment s by its length. /
314	static int
315	starts_with(const char *s, const* char *string)
316	{
317	const char x = s, *y = string;
318	while (x && y && (x == y \|\| x == y + `32`))
319	++x, ++y;
320	if (*y == `0`)
321	{
322	/ Match. /
323	*s = x;
324	return `1`;
325	}
326	else
327	return `0`;
328	}
329	#define SET_TAILPTR(tailptr, s) \
330	do \
331	if (tailptr) \
332	tailptr = (char ) s; \
333	while (0)
334
335	/*
336	Locale-independent decimal to binary
337	conversion. On overflow return (-)INFINITY and set errno to ERANGE. On
338	underflow return 0 and set errno to ERANGE. Special inputs (case
339	insensitive): "NAN", "INF" or "INFINITY".
340	*/
341	float
342	fz_strtof(const char string, char* **tailptr)
343	{
344	/ FIXME: error (1/2 + 1/256) ulp /
345	const char *s;
346	uint32_t M = `0`;
347	int N = `0`;
348	/ If decimal_digits gets 9 we truncate all following digits. /
349	int decimal_digits = `0`;
350	int negative = `0`;
351	const char *number_start = `0`;
352
353	/ Skip leading whitespace (isspace in "C" locale). /
354	s = string;
355	while (s == `' '` \|\| s == `'\f'` \|\| s == `'\n'` \|\| s == `'\r'` \|\| s == `'\t'` \|\| s == `'\v'`)
356	++s;
357
358	/ Parse sign. /
359	if (*s == `'+'`)
360	++s;
361	if (*s == `'-'`)
362	{
363	negative = `1`;
364	++s;
365	}
366	number_start = s;
367	/ Parse digits before decimal point. /
368	while (s >= `'0'` && s <= `'9'`)
369	{
370	if (decimal_digits)
371	{
372	if (decimal_digits < `9`)
373	{
374	++decimal_digits;
375	M = M * `10` + *s - `'0'`;
376	}
377	/ Really arcane strings might overflow N. /
378	else if (N < `1000`)
379	++N;
380	}
381	else if (*s > `'0'`)
382	{
383	M = *s - `'0'`;
384	++decimal_digits;
385	}
386	++s;
387	}
388
389	/ Parse decimal point. /
390	if (*s == `'.'`)
391	++s;
392
393	/ Parse digits after decimal point. /
394	while (s >= `'0'` && s <= `'9'`)
395	{
396	if (decimal_digits < `9`)
397	{
398	if (decimal_digits \|\| *s > `'0'`)
399	{
400	++decimal_digits;
401	M = M * `10` + *s - `'0'`;
402	}
403	--N;
404	}
405	++s;
406	}
407	if ((s == number_start + `1` && *number_start == `'.'`) \|\| number_start == s)
408	{
409	/ No Number. Check for INF and NAN strings. /
410	s = number_start;
411	if (starts_with(&s, "INFINITY") \|\| starts_with(&s, "INF"))
412	{
413	errno = ERANGE;
414	SET_TAILPTR(tailptr, s);
415	return negative ? -INFINITY : +INFINITY;
416	}
417	else if (starts_with(&s, "NAN"))
418	{
419	SET_TAILPTR(tailptr, s);
420	return (float)NAN;
421	}
422	else
423	{
424	SET_TAILPTR(tailptr, string);
425	return `0.f`;
426	}
427	}
428
429	/ Parse exponent. /
430	if (s == `'e'` \|\| s == `'E'`)
431	{
432	int exp_negative = `0`;
433	int exp = `0`;
434	const char *int_start;
435	const char *exp_start = s;
436
437	++s;
438	if (*s == `'+'`)
439	++s;
440	else if (*s == `'-'`)
441	{
442	++s;
443	exp_negative = `1`;
444	}
445	int_start = s;
446	/ Parse integer. /
447	while (s >= `'0'` && s <= `'9'`)
448	{
449	/ Make sure exp does not get overflowed. /
450	if (exp < `100`)
451	exp = exp * `10` + *s - `'0'`;
452	++s;
453	}
454	if (exp_negative)
455	exp = -exp;
456	if (s == int_start)
457	/ No Number. /
458	s = exp_start;
459	else
460	N += exp;
461	}
462
463	SET_TAILPTR(tailptr, s);
464	return scale_integer_to_float(M, N, negative);
465	}
466

Browse the source code of MuPDF/source/fitz/strtof.c