d2s.c source code [PostgreSQL/src/common/d2s.c]

1	/---------------------------------------------------------------------------*
2	*
3	* Ryu floating-point output for double precision.
4	*
5	* Portions Copyright (c) 2018-2019, PostgreSQL Global Development Group
6	*
7	* IDENTIFICATION
8	* src/common/d2s.c
9	*
10	* This is a modification of code taken from github.com/ulfjack/ryu under the
11	* terms of the Boost license (not the Apache license). The original copyright
12	* notice follows:
13	*
14	* Copyright 2018 Ulf Adams
15	*
16	* The contents of this file may be used under the terms of the Apache
17	* License, Version 2.0.
18	*
19	* (See accompanying file LICENSE-Apache or copy at
20	* http://www.apache.org/licenses/LICENSE-2.0)
21	*
22	* Alternatively, the contents of this file may be used under the terms of the
23	* Boost Software License, Version 1.0.
24	*
25	* (See accompanying file LICENSE-Boost or copy at
26	* https://www.boost.org/LICENSE_1_0.txt)
27	*
28	* Unless required by applicable law or agreed to in writing, this software is
29	* distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
30	* KIND, either express or implied.
31	*
32	*---------------------------------------------------------------------------
33	*/
34
35	/*
36	* Runtime compiler options:
37	*
38	* -DRYU_ONLY_64_BIT_OPS Avoid using uint128 or 64-bit intrinsics. Slower,
39	* depending on your compiler.
40	*/
41
42	#ifndef FRONTEND
43	#include "postgres.h"
44	#else
45	#include "postgres_fe.h"
46	#endif
47
48	#include "common/shortest_dec.h"
49
50	/*
51	* For consistency, we use 128-bit types if and only if the rest of PG also
52	* does, even though we could use them here without worrying about the
53	* alignment concerns that apply elsewhere.
54	*/
55	#if !defined(HAVE_INT128) && defined(_MSC_VER) \
56	&& !defined(RYU_ONLY_64_BIT_OPS) && defined(_M_X64)
57	#define HAS_64_BIT_INTRINSICS
58	#endif
59
60	#include "ryu_common.h"
61	#include "digit_table.h"
62	#include "d2s_full_table.h"
63	#include "d2s_intrinsics.h"
64
65	#define DOUBLE_MANTISSA_BITS 52
66	#define DOUBLE_EXPONENT_BITS 11
67	#define DOUBLE_BIAS 1023
68
69	#define DOUBLE_POW5_INV_BITCOUNT 122
70	#define DOUBLE_POW5_BITCOUNT 121
71
72
73	static inline uint32
74	pow5Factor(uint64 value)
75	{
76	uint32 count = `0`;
77
78	for (;;)
79	{
80	Assert(value != `0`);
81	const uint64 q = div5(value);
82	const uint32 r = (uint32) (value - `5` * q);
83
84	if (r != `0`)
85	break;
86
87	value = q;
88	++count;
89	}
90	return count;
91	}
92
93	/ Returns true if value is divisible by 5^p. /
94	static inline bool
95	multipleOfPowerOf5(const uint64 value, const uint32 p)
96	{
97	/*
98	* I tried a case distinction on p, but there was no performance
99	* difference.
100	*/
101	return pow5Factor(value) >= p;
102	}
103
104	/ Returns true if value is divisible by 2^p. /
105	static inline bool
106	multipleOfPowerOf2(const uint64 value, const uint32 p)
107	{
108	/ return __builtin_ctzll(value) >= p; /
109	return (value & ((UINT64CONST(`1`) << p) - `1`)) == `0`;
110	}
111
112	/*
113	* We need a 64x128-bit multiplication and a subsequent 128-bit shift.
114	*
115	* Multiplication:
116	*
117	* The 64-bit factor is variable and passed in, the 128-bit factor comes
118	* from a lookup table. We know that the 64-bit factor only has 55
119	* significant bits (i.e., the 9 topmost bits are zeros). The 128-bit
120	* factor only has 124 significant bits (i.e., the 4 topmost bits are
121	* zeros).
122	*
123	* Shift:
124	*
125	* In principle, the multiplication result requires 55 + 124 = 179 bits to
126	* represent. However, we then shift this value to the right by j, which is
127	* at least j >= 115, so the result is guaranteed to fit into 179 - 115 =
128	* 64 bits. This means that we only need the topmost 64 significant bits of
129	* the 64x128-bit multiplication.
130	*
131	* There are several ways to do this:
132	*
133	* 1. Best case: the compiler exposes a 128-bit type.
134	* We perform two 64x64-bit multiplications, add the higher 64 bits of the
135	* lower result to the higher result, and shift by j - 64 bits.
136	*
137	* We explicitly cast from 64-bit to 128-bit, so the compiler can tell
138	* that these are only 64-bit inputs, and can map these to the best
139	* possible sequence of assembly instructions. x86-64 machines happen to
140	* have matching assembly instructions for 64x64-bit multiplications and
141	* 128-bit shifts.
142	*
143	* 2. Second best case: the compiler exposes intrinsics for the x86-64
144	* assembly instructions mentioned in 1.
145	*
146	* 3. We only have 64x64 bit instructions that return the lower 64 bits of
147	* the result, i.e., we have to use plain C.
148	*
149	* Our inputs are less than the full width, so we have three options:
150	* a. Ignore this fact and just implement the intrinsics manually.
151	* b. Split both into 31-bit pieces, which guarantees no internal
152	* overflow, but requires extra work upfront (unless we change the
153	* lookup table).
154	* c. Split only the first factor into 31-bit pieces, which also
155	* guarantees no internal overflow, but requires extra work since the
156	* intermediate results are not perfectly aligned.
157	*/
158	#if defined(HAVE_INT128)
159
160	/ Best case: use 128-bit type. /
161	static inline uint64
162	mulShift(const uint64 m, const uint64 *const mul, const int32 j)
163	{
164	const uint128 b0 = ((uint128) m) * mul[`0`];
165	const uint128 b2 = ((uint128) m) * mul[`1`];
166
167	return (uint64) (((b0 >> `64`) + b2) >> (j - `64`));
168	}
169
170	static inline uint64
171	mulShiftAll(const uint64 m, const uint64 *const mul, const int32 j,
172	uint64 *const vp, uint64 *const vm, const uint32 mmShift)
173	{
174	vp = mulShift(`4` m + `2`, mul, j);
175	vm = mulShift(`4` m - `1` - mmShift, mul, j);
176	return mulShift(`4` * m, mul, j);
177	}
178
179	#elif defined(HAS_64_BIT_INTRINSICS)
180
181	static inline uint64
182	mulShift(const uint64 m, const uint64 *const mul, const int32 j)
183	{
184	/ m is maximum 55 bits /
185	uint64 high1;
186
187	/ 128 /
188	const uint64 low1 = umul128(m, mul[`1`], &high1);
189
190	/ 64 /
191	uint64 high0;
192	uint64 sum;
193
194	/ 64 /
195	umul128(m, mul[`0`], &high0);
196	/ 0 /
197	sum = high0 + low1;
198
199	if (sum < high0)
200	{
201	++high1;
202	/ overflow into high1 /
203	}
204	return shiftright128(sum, high1, j - `64`);
205	}
206
207	static inline uint64
208	mulShiftAll(const uint64 m, const uint64 *const mul, const int32 j,
209	uint64 *const vp, uint64 *const vm, const uint32 mmShift)
210	{
211	vp = mulShift(`4` m + `2`, mul, j);
212	vm = mulShift(`4` m - `1` - mmShift, mul, j);
213	return mulShift(`4` * m, mul, j);
214	}
215
216	#else /* // !defined(HAVE_INT128) &&
217	* !defined(HAS_64_BIT_INTRINSICS) */
218
219	static inline uint64
220	mulShiftAll(uint64 m, const uint64 *const mul, const int32 j,
221	uint64 *const vp, uint64 *const vm, const uint32 mmShift)
222	{
223	m <<= `1`; / m is maximum 55 bits /
224
225	uint64 tmp;
226	const uint64 lo = umul128(m, mul[`0`], &tmp);
227	uint64 hi;
228	const uint64 mid = tmp + umul128(m, mul[`1`], &hi);
229
230	hi += mid < tmp; / overflow into hi /
231
232	const uint64 lo2 = lo + mul[`0`];
233	const uint64 mid2 = mid + mul[`1`] + (lo2 < lo);
234	const uint64 hi2 = hi + (mid2 < mid);
235
236	*vp = shiftright128(mid2, hi2, j - `64` - `1`);
237
238	if (mmShift == `1`)
239	{
240	const uint64 lo3 = lo - mul[`0`];
241	const uint64 mid3 = mid - mul[`1`] - (lo3 > lo);
242	const uint64 hi3 = hi - (mid3 > mid);
243
244	*vm = shiftright128(mid3, hi3, j - `64` - `1`);
245	}
246	else
247	{
248	const uint64 lo3 = lo + lo;
249	const uint64 mid3 = mid + mid + (lo3 < lo);
250	const uint64 hi3 = hi + hi + (mid3 < mid);
251	const uint64 lo4 = lo3 - mul[`0`];
252	const uint64 mid4 = mid3 - mul[`1`] - (lo4 > lo3);
253	const uint64 hi4 = hi3 - (mid4 > mid3);
254
255	*vm = shiftright128(mid4, hi4, j - `64`);
256	}
257
258	return shiftright128(mid, hi, j - `64` - `1`);
259	}
260
261	#endif /* // HAS_64_BIT_INTRINSICS */
262
263	static inline uint32
264	decimalLength(const uint64 v)
265	{
266	/ This is slightly faster than a loop. /
267	/ The average output length is 16.38 digits, so we check high-to-low. /
268	/ Function precondition: v is not an 18, 19, or 20-digit number. /
269	/ (17 digits are sufficient for round-tripping.) /
270	Assert(v < `100000000000000000L`);
271	if (v >= `10000000000000000L`)
272	{
273	return `17`;
274	}
275	if (v >= `1000000000000000L`)
276	{
277	return `16`;
278	}
279	if (v >= `100000000000000L`)
280	{
281	return `15`;
282	}
283	if (v >= `10000000000000L`)
284	{
285	return `14`;
286	}
287	if (v >= `1000000000000L`)
288	{
289	return `13`;
290	}
291	if (v >= `100000000000L`)
292	{
293	return `12`;
294	}
295	if (v >= `10000000000L`)
296	{
297	return `11`;
298	}
299	if (v >= `1000000000L`)
300	{
301	return `10`;
302	}
303	if (v >= `100000000L`)
304	{
305	return `9`;
306	}
307	if (v >= `10000000L`)
308	{
309	return `8`;
310	}
311	if (v >= `1000000L`)
312	{
313	return `7`;
314	}
315	if (v >= `100000L`)
316	{
317	return `6`;
318	}
319	if (v >= `10000L`)
320	{
321	return `5`;
322	}
323	if (v >= `1000L`)
324	{
325	return `4`;
326	}
327	if (v >= `100L`)
328	{
329	return `3`;
330	}
331	if (v >= `10L`)
332	{
333	return `2`;
334	}
335	return `1`;
336	}
337
338	/ A floating decimal representing m * 10^e. /
339	typedef struct floating_decimal_64
340	{
341	uint64 mantissa;
342	int32 exponent;
343	} floating_decimal_64;
344
345	static inline floating_decimal_64
346	d2d(const uint64 ieeeMantissa, const uint32 ieeeExponent)
347	{
348	int32 e2;
349	uint64 m2;
350
351	if (ieeeExponent == `0`)
352	{
353	/ We subtract 2 so that the bounds computation has 2 additional bits. /
354	e2 = `1` - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - `2`;
355	m2 = ieeeMantissa;
356	}
357	else
358	{
359	e2 = ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - `2`;
360	m2 = (UINT64CONST(`1`) << DOUBLE_MANTISSA_BITS) \| ieeeMantissa;
361	}
362
363	#if STRICTLY_SHORTEST
364	const bool even = (m2 & `1`) == `0`;
365	const bool acceptBounds = even;
366	#else
367	const bool acceptBounds = false;
368	#endif
369
370	/ Step 2: Determine the interval of legal decimal representations. /
371	const uint64 mv = `4` * m2;
372
373	/ Implicit bool -> int conversion. True is 1, false is 0. /
374	const uint32 mmShift = ieeeMantissa != `0` \|\| ieeeExponent <= `1`;
375
376	/ We would compute mp and mm like this: /
377	/ uint64 mp = 4 * m2 + 2; /
378	/ uint64 mm = mv - 1 - mmShift; /
379
380	/ Step 3: Convert to a decimal power base using 128-bit arithmetic. /
381	uint64 vr,
382	vp,
383	vm;
384	int32 e10;
385	bool vmIsTrailingZeros = false;
386	bool vrIsTrailingZeros = false;
387
388	if (e2 >= `0`)
389	{
390	/*
391	* I tried special-casing q == 0, but there was no effect on
392	* performance.
393	*
394	* This expr is slightly faster than max(0, log10Pow2(e2) - 1).
395	*/
396	const uint32 q = log10Pow2(e2) - (e2 > `3`);
397	const int32 k = DOUBLE_POW5_INV_BITCOUNT + pow5bits(q) - `1`;
398	const int32 i = -e2 + q + k;
399
400	e10 = q;
401
402	vr = mulShiftAll(m2, DOUBLE_POW5_INV_SPLIT[q], i, &vp, &vm, mmShift);
403
404	if (q <= `21`)
405	{
406	/*
407	* This should use q <= 22, but I think 21 is also safe. Smaller
408	* values may still be safe, but it's more difficult to reason
409	* about them.
410	*
411	* Only one of mp, mv, and mm can be a multiple of 5, if any.
412	*/
413	const uint32 mvMod5 = (uint32) (mv - `5` * div5(mv));
414
415	if (mvMod5 == `0`)
416	{
417	vrIsTrailingZeros = multipleOfPowerOf5(mv, q);
418	}
419	else if (acceptBounds)
420	{
421	/----*
422	* Same as min(e2 + (~mm & 1), pow5Factor(mm)) >= q
423	* <=> e2 + (~mm & 1) >= q && pow5Factor(mm) >= q
424	* <=> true && pow5Factor(mm) >= q, since e2 >= q.
425	*----
426	*/
427	vmIsTrailingZeros = multipleOfPowerOf5(mv - `1` - mmShift, q);
428	}
429	else
430	{
431	/ Same as min(e2 + 1, pow5Factor(mp)) >= q. /
432	vp -= multipleOfPowerOf5(mv + `2`, q);
433	}
434	}
435	}
436	else
437	{
438	/*
439	* This expression is slightly faster than max(0, log10Pow5(-e2) - 1).
440	*/
441	const uint32 q = log10Pow5(-e2) - (-e2 > `1`);
442	const int32 i = -e2 - q;
443	const int32 k = pow5bits(i) - DOUBLE_POW5_BITCOUNT;
444	const int32 j = q - k;
445
446	e10 = q + e2;
447
448	vr = mulShiftAll(m2, DOUBLE_POW5_SPLIT[i], j, &vp, &vm, mmShift);
449
450	if (q <= `1`)
451	{
452	/*
453	* {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q
454	* trailing 0 bits.
455	*/
456	/ mv = 4 * m2, so it always has at least two trailing 0 bits. /
457	vrIsTrailingZeros = true;
458	if (acceptBounds)
459	{
460	/*
461	* mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff
462	* mmShift == 1.
463	*/
464	vmIsTrailingZeros = mmShift == `1`;
465	}
466	else
467	{
468	/*
469	* mp = mv + 2, so it always has at least one trailing 0 bit.
470	*/
471	--vp;
472	}
473	}
474	else if (q < `63`)
475	{
476	/ TODO(ulfjack):Use a tighter bound here. /
477	/*
478	* We need to compute min(ntz(mv), pow5Factor(mv) - e2) >= q - 1
479	*/
480	/ <=> ntz(mv) >= q - 1 && pow5Factor(mv) - e2 >= q - 1 /
481	/ <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q) /
482	/ <=> (mv & ((1 << (q - 1)) - 1)) == 0 /
483
484	/*
485	* We also need to make sure that the left shift does not
486	* overflow.
487	*/
488	vrIsTrailingZeros = multipleOfPowerOf2(mv, q - `1`);
489	}
490	}
491
492	/*
493	* Step 4: Find the shortest decimal representation in the interval of
494	* legal representations.
495	*/
496	uint32 removed = `0`;
497	uint8 lastRemovedDigit = `0`;
498	uint64 output;
499
500	/ On average, we remove ~2 digits. /
501	if (vmIsTrailingZeros \|\| vrIsTrailingZeros)
502	{
503	/ General case, which happens rarely (~0.7%). /
504	for (;;)
505	{
506	const uint64 vpDiv10 = div10(vp);
507	const uint64 vmDiv10 = div10(vm);
508
509	if (vpDiv10 <= vmDiv10)
510	break;
511
512	const uint32 vmMod10 = (uint32) (vm - `10` * vmDiv10);
513	const uint64 vrDiv10 = div10(vr);
514	const uint32 vrMod10 = (uint32) (vr - `10` * vrDiv10);
515
516	vmIsTrailingZeros &= vmMod10 == `0`;
517	vrIsTrailingZeros &= lastRemovedDigit == `0`;
518	lastRemovedDigit = (uint8) vrMod10;
519	vr = vrDiv10;
520	vp = vpDiv10;
521	vm = vmDiv10;
522	++removed;
523	}
524
525	if (vmIsTrailingZeros)
526	{
527	for (;;)
528	{
529	const uint64 vmDiv10 = div10(vm);
530	const uint32 vmMod10 = (uint32) (vm - `10` * vmDiv10);
531
532	if (vmMod10 != `0`)
533	break;
534
535	const uint64 vpDiv10 = div10(vp);
536	const uint64 vrDiv10 = div10(vr);
537	const uint32 vrMod10 = (uint32) (vr - `10` * vrDiv10);
538
539	vrIsTrailingZeros &= lastRemovedDigit == `0`;
540	lastRemovedDigit = (uint8) vrMod10;
541	vr = vrDiv10;
542	vp = vpDiv10;
543	vm = vmDiv10;
544	++removed;
545	}
546	}
547
548	if (vrIsTrailingZeros && lastRemovedDigit == `5` && vr % `2` == `0`)
549	{
550	/ Round even if the exact number is .....50..0. /
551	lastRemovedDigit = `4`;
552	}
553
554	/*
555	* We need to take vr + 1 if vr is outside bounds or we need to round
556	* up.
557	*/
558	output = vr + ((vr == vm && (!acceptBounds \|\| !vmIsTrailingZeros)) \|\| lastRemovedDigit >= `5`);
559	}
560	else
561	{
562	/*
563	* Specialized for the common case (~99.3%). Percentages below are
564	* relative to this.
565	*/
566	bool roundUp = false;
567	const uint64 vpDiv100 = div100(vp);
568	const uint64 vmDiv100 = div100(vm);
569
570	if (vpDiv100 > vmDiv100)
571	{
572	/ Optimization:remove two digits at a time(~86.2 %). /
573	const uint64 vrDiv100 = div100(vr);
574	const uint32 vrMod100 = (uint32) (vr - `100` * vrDiv100);
575
576	roundUp = vrMod100 >= `50`;
577	vr = vrDiv100;
578	vp = vpDiv100;
579	vm = vmDiv100;
580	removed += `2`;
581	}
582
583	/----*
584	* Loop iterations below (approximately), without optimization
585	* above:
586	*
587	* 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%,
588	* 6+: 0.02%
589	*
590	* Loop iterations below (approximately), with optimization
591	* above:
592	*
593	* 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
594	*----
595	*/
596	for (;;)
597	{
598	const uint64 vpDiv10 = div10(vp);
599	const uint64 vmDiv10 = div10(vm);
600
601	if (vpDiv10 <= vmDiv10)
602	break;
603
604	const uint64 vrDiv10 = div10(vr);
605	const uint32 vrMod10 = (uint32) (vr - `10` * vrDiv10);
606
607	roundUp = vrMod10 >= `5`;
608	vr = vrDiv10;
609	vp = vpDiv10;
610	vm = vmDiv10;
611	++removed;
612	}
613
614	/*
615	* We need to take vr + 1 if vr is outside bounds or we need to round
616	* up.
617	*/
618	output = vr + (vr == vm \|\| roundUp);
619	}
620
621	const int32 exp = e10 + removed;
622
623	floating_decimal_64 fd;
624
625	fd.exponent = exp;
626	fd.mantissa = output;
627	return fd;
628	}
629
630	static inline int
631	to_chars_df(const floating_decimal_64 v, const uint32 olength, char *const result)
632	{
633	/ Step 5: Print the decimal representation. /
634	int index = `0`;
635
636	uint64 output = v.mantissa;
637	int32 exp = v.exponent;
638
639	/----*
640	* On entry, mantissa * 10^exp is the result to be output.
641	* Caller has already done the - sign if needed.
642	*
643	* We want to insert the point somewhere depending on the output length
644	* and exponent, which might mean adding zeros:
645	*
646	* exp \| format
647	* 1+ \| ddddddddd000000
648	* 0 \| ddddddddd
649	* -1 .. -len+1 \| dddddddd.d to d.ddddddddd
650	* -len ... \| 0.ddddddddd to 0.000dddddd
651	*/
652	uint32 i = `0`;
653	int32 nexp = exp + olength;
654
655	if (nexp <= `0`)
656	{
657	/ -nexp is number of 0s to add after '.' /
658	Assert(nexp >= -`3`);
659	/ 0.000ddddd /
660	index = `2` - nexp;
661	/ won't need more than this many 0s /
662	memcpy(result, "0.000000", `8`);
663	}
664	else if (exp < `0`)
665	{
666	/*
667	* dddd.dddd; leave space at the start and move the '.' in after
668	*/
669	index = `1`;
670	}
671	else
672	{
673	/*
674	* We can save some code later by pre-filling with zeros. We know that
675	* there can be no more than 16 output digits in this form, otherwise
676	* we would not choose fixed-point output.
677	*/
678	Assert(exp < `16` && exp + olength <= `16`);
679	memset(result, `'0'`, `16`);
680	}
681
682	/*
683	* We prefer 32-bit operations, even on 64-bit platforms. We have at most
684	* 17 digits, and uint32 can store 9 digits. If output doesn't fit into
685	* uint32, we cut off 8 digits, so the rest will fit into uint32.
686	*/
687	if ((output >> `32`) != `0`)
688	{
689	/ Expensive 64-bit division. /
690	const uint64 q = div1e8(output);
691	uint32 output2 = (uint32) (output - `100000000` * q);
692	const uint32 c = output2 % `10000`;
693
694	output = q;
695	output2 /= `10000`;
696
697	const uint32 d = output2 % `10000`;
698	const uint32 c0 = (c % `100`) << `1`;
699	const uint32 c1 = (c / `100`) << `1`;
700	const uint32 d0 = (d % `100`) << `1`;
701	const uint32 d1 = (d / `100`) << `1`;
702
703	memcpy(result + index + olength - i - `2`, DIGIT_TABLE + c0, `2`);
704	memcpy(result + index + olength - i - `4`, DIGIT_TABLE + c1, `2`);
705	memcpy(result + index + olength - i - `6`, DIGIT_TABLE + d0, `2`);
706	memcpy(result + index + olength - i - `8`, DIGIT_TABLE + d1, `2`);
707	i += `8`;
708	}
709
710	uint32 output2 = (uint32) output;
711
712	while (output2 >= `10000`)
713	{
714	const uint32 c = output2 - `10000` * (output2 / `10000`);
715	const uint32 c0 = (c % `100`) << `1`;
716	const uint32 c1 = (c / `100`) << `1`;
717
718	output2 /= `10000`;
719	memcpy(result + index + olength - i - `2`, DIGIT_TABLE + c0, `2`);
720	memcpy(result + index + olength - i - `4`, DIGIT_TABLE + c1, `2`);
721	i += `4`;
722	}
723	if (output2 >= `100`)
724	{
725	const uint32 c = (output2 % `100`) << `1`;
726
727	output2 /= `100`;
728	memcpy(result + index + olength - i - `2`, DIGIT_TABLE + c, `2`);
729	i += `2`;
730	}
731	if (output2 >= `10`)
732	{
733	const uint32 c = output2 << `1`;
734
735	memcpy(result + index + olength - i - `2`, DIGIT_TABLE + c, `2`);
736	}
737	else
738	{
739	result[index] = (char) (`'0'` + output2);
740	}
741
742	if (index == `1`)
743	{
744	/*
745	* nexp is 1..15 here, representing the number of digits before the
746	* point. A value of 16 is not possible because we switch to
747	* scientific notation when the display exponent reaches 15.
748	*/
749	Assert(nexp < `16`);
750	/ gcc only seems to want to optimize memmove for small 2^n /
751	if (nexp & `8`)
752	{
753	memmove(result + index - `1`, result + index, `8`);
754	index += `8`;
755	}
756	if (nexp & `4`)
757	{
758	memmove(result + index - `1`, result + index, `4`);
759	index += `4`;
760	}
761	if (nexp & `2`)
762	{
763	memmove(result + index - `1`, result + index, `2`);
764	index += `2`;
765	}
766	if (nexp & `1`)
767	{
768	result[index - `1`] = result[index];
769	}
770	result[nexp] = `'.'`;
771	index = olength + `1`;
772	}
773	else if (exp >= `0`)
774	{
775	/ we supplied the trailing zeros earlier, now just set the length. /
776	index = olength + exp;
777	}
778	else
779	{
780	index = olength + (`2` - nexp);
781	}
782
783	return index;
784	}
785
786	static inline int
787	to_chars(floating_decimal_64 v, const bool sign, char *const result)
788	{
789	/ Step 5: Print the decimal representation. /
790	int index = `0`;
791
792	uint64 output = v.mantissa;
793	uint32 olength = decimalLength(output);
794	int32 exp = v.exponent + olength - `1`;
795
796	if (sign)
797	{
798	result[index++] = `'-'`;
799	}
800
801	/*
802	* The thresholds for fixed-point output are chosen to match printf
803	* defaults. Beware that both the code of to_chars_df and the value of
804	* DOUBLE_SHORTEST_DECIMAL_LEN are sensitive to these thresholds.
805	*/
806	if (exp >= -`4` && exp < `15`)
807	return to_chars_df(v, olength, result + index) + sign;
808
809	/*
810	* If v.exponent is exactly 0, we might have reached here via the small
811	* integer fast path, in which case v.mantissa might contain trailing
812	* (decimal) zeros. For scientific notation we need to move these zeros
813	* into the exponent. (For fixed point this doesn't matter, which is why
814	* we do this here rather than above.)
815	*
816	* Since we already calculated the display exponent (exp) above based on
817	* the old decimal length, that value does not change here. Instead, we
818	* just reduce the display length for each digit removed.
819	*
820	* If we didn't get here via the fast path, the raw exponent will not
821	* usually be 0, and there will be no trailing zeros, so we pay no more
822	* than one div10/multiply extra cost. We claw back half of that by
823	* checking for divisibility by 2 before dividing by 10.
824	*/
825	if (v.exponent == `0`)
826	{
827	while ((output & `1`) == `0`)
828	{
829	const uint64 q = div10(output);
830	const uint32 r = (uint32) (output - `10` * q);
831
832	if (r != `0`)
833	break;
834	output = q;
835	--olength;
836	}
837	}
838
839	/----*
840	* Print the decimal digits.
841	*
842	* The following code is equivalent to:
843	*
844	* for (uint32 i = 0; i < olength - 1; ++i) {
845	* const uint32 c = output % 10; output /= 10;
846	* result[index + olength - i] = (char) ('0' + c);
847	* }
848	* result[index] = '0' + output % 10;
849	*----
850	*/
851
852	uint32 i = `0`;
853
854	/*
855	* We prefer 32-bit operations, even on 64-bit platforms. We have at most
856	* 17 digits, and uint32 can store 9 digits. If output doesn't fit into
857	* uint32, we cut off 8 digits, so the rest will fit into uint32.
858	*/
859	if ((output >> `32`) != `0`)
860	{
861	/ Expensive 64-bit division. /
862	const uint64 q = div1e8(output);
863	uint32 output2 = (uint32) (output - `100000000` * q);
864
865	output = q;
866
867	const uint32 c = output2 % `10000`;
868
869	output2 /= `10000`;
870
871	const uint32 d = output2 % `10000`;
872	const uint32 c0 = (c % `100`) << `1`;
873	const uint32 c1 = (c / `100`) << `1`;
874	const uint32 d0 = (d % `100`) << `1`;
875	const uint32 d1 = (d / `100`) << `1`;
876
877	memcpy(result + index + olength - i - `1`, DIGIT_TABLE + c0, `2`);
878	memcpy(result + index + olength - i - `3`, DIGIT_TABLE + c1, `2`);
879	memcpy(result + index + olength - i - `5`, DIGIT_TABLE + d0, `2`);
880	memcpy(result + index + olength - i - `7`, DIGIT_TABLE + d1, `2`);
881	i += `8`;
882	}
883
884	uint32 output2 = (uint32) output;
885
886	while (output2 >= `10000`)
887	{
888	const uint32 c = output2 - `10000` * (output2 / `10000`);
889
890	output2 /= `10000`;
891
892	const uint32 c0 = (c % `100`) << `1`;
893	const uint32 c1 = (c / `100`) << `1`;
894
895	memcpy(result + index + olength - i - `1`, DIGIT_TABLE + c0, `2`);
896	memcpy(result + index + olength - i - `3`, DIGIT_TABLE + c1, `2`);
897	i += `4`;
898	}
899	if (output2 >= `100`)
900	{
901	const uint32 c = (output2 % `100`) << `1`;
902
903	output2 /= `100`;
904	memcpy(result + index + olength - i - `1`, DIGIT_TABLE + c, `2`);
905	i += `2`;
906	}
907	if (output2 >= `10`)
908	{
909	const uint32 c = output2 << `1`;
910
911	/*
912	* We can't use memcpy here: the decimal dot goes between these two
913	* digits.
914	*/
915	result[index + olength - i] = DIGIT_TABLE[c + `1`];
916	result[index] = DIGIT_TABLE[c];
917	}
918	else
919	{
920	result[index] = (char) (`'0'` + output2);
921	}
922
923	/ Print decimal point if needed. /
924	if (olength > `1`)
925	{
926	result[index + `1`] = `'.'`;
927	index += olength + `1`;
928	}
929	else
930	{
931	++index;
932	}
933
934	/ Print the exponent. /
935	result[index++] = `'e'`;
936	if (exp < `0`)
937	{
938	result[index++] = `'-'`;
939	exp = -exp;
940	}
941	else
942	result[index++] = `'+'`;
943
944	if (exp >= `100`)
945	{
946	const int32 c = exp % `10`;
947
948	memcpy(result + index, DIGIT_TABLE + `2` * (exp / `10`), `2`);
949	result[index + `2`] = (char) (`'0'` + c);
950	index += `3`;
951	}
952	else
953	{
954	memcpy(result + index, DIGIT_TABLE + `2` * exp, `2`);
955	index += `2`;
956	}
957
958	return index;
959	}
960
961	static inline bool
962	d2d_small_int(const uint64 ieeeMantissa,
963	const uint32 ieeeExponent,
964	floating_decimal_64 *v)
965	{
966	const int32 e2 = (int32) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS;
967
968	/*
969	* Avoid using multiple "return false;" here since it tends to provoke the
970	* compiler into inlining multiple copies of d2d, which is undesirable.
971	*/
972
973	if (e2 >= -DOUBLE_MANTISSA_BITS && e2 <= `0`)
974	{
975	/----*
976	* Since 2^52 <= m2 < 2^53 and 0 <= -e2 <= 52:
977	* 1 <= f = m2 / 2^-e2 < 2^53.
978	*
979	* Test if the lower -e2 bits of the significand are 0, i.e. whether
980	* the fraction is 0. We can use ieeeMantissa here, since the implied
981	* 1 bit can never be tested by this; the implied 1 can only be part
982	* of a fraction if e2 < -DOUBLE_MANTISSA_BITS which we already
983	* checked. (e.g. 0.5 gives ieeeMantissa == 0 and e2 == -53)
984	*/
985	const uint64 mask = (UINT64CONST(`1`) << -e2) - `1`;
986	const uint64 fraction = ieeeMantissa & mask;
987
988	if (fraction == `0`)
989	{
990	/----*
991	* f is an integer in the range [1, 2^53).
992	* Note: mantissa might contain trailing (decimal) 0's.
993	* Note: since 2^53 < 10^16, there is no need to adjust
994	* decimalLength().
995	*/
996	const uint64 m2 = (UINT64CONST(`1`) << DOUBLE_MANTISSA_BITS) \| ieeeMantissa;
997
998	v->mantissa = m2 >> -e2;
999	v->exponent = `0`;
1000	return true;
1001	}
1002	}
1003
1004	return false;
1005	}
1006
1007	/*
1008	* Store the shortest decimal representation of the given double as an
1009	* UNTERMINATED string in the caller's supplied buffer (which must be at least
1010	* DOUBLE_SHORTEST_DECIMAL_LEN-1 bytes long).
1011	*
1012	* Returns the number of bytes stored.
1013	*/
1014	int
1015	double_to_shortest_decimal_bufn(double f, char *result)
1016	{
1017	/*
1018	* Step 1: Decode the floating-point number, and unify normalized and
1019	* subnormal cases.
1020	*/
1021	const uint64 bits = double_to_bits(f);
1022
1023	/ Decode bits into sign, mantissa, and exponent. /
1024	const bool ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & `1`) != `0`;
1025	const uint64 ieeeMantissa = bits & ((UINT64CONST(`1`) << DOUBLE_MANTISSA_BITS) - `1`);
1026	const uint32 ieeeExponent = (uint32) ((bits >> DOUBLE_MANTISSA_BITS) & ((`1u` << DOUBLE_EXPONENT_BITS) - `1`));
1027
1028	/ Case distinction; exit early for the easy cases. /
1029	if (ieeeExponent == ((`1u` << DOUBLE_EXPONENT_BITS) - `1u`) \|\| (ieeeExponent == `0` && ieeeMantissa == `0`))
1030	{
1031	return copy_special_str(result, ieeeSign, (ieeeExponent != `0`), (ieeeMantissa != `0`));
1032	}
1033
1034	floating_decimal_64 v;
1035	const bool isSmallInt = d2d_small_int(ieeeMantissa, ieeeExponent, &v);
1036
1037	if (!isSmallInt)
1038	{
1039	v = d2d(ieeeMantissa, ieeeExponent);
1040	}
1041
1042	return to_chars(v, ieeeSign, result);
1043	}
1044
1045	/*
1046	* Store the shortest decimal representation of the given double as a
1047	* null-terminated string in the caller's supplied buffer (which must be at
1048	* least DOUBLE_SHORTEST_DECIMAL_LEN bytes long).
1049	*
1050	* Returns the string length.
1051	*/
1052	int
1053	double_to_shortest_decimal_buf(double f, char *result)
1054	{
1055	const int index = double_to_shortest_decimal_bufn(f, result);
1056
1057	/ Terminate the string. /
1058	Assert(index < DOUBLE_SHORTEST_DECIMAL_LEN);
1059	result[index] = `'\0'`;
1060	return index;
1061	}
1062
1063	/*
1064	* Return the shortest decimal representation as a null-terminated palloc'd
1065	* string (outside the backend, uses malloc() instead).
1066	*
1067	* Caller is responsible for freeing the result.
1068	*/
1069	char *
1070	double_to_shortest_decimal(double f)
1071	{
1072	char *const result = (char *) palloc(DOUBLE_SHORTEST_DECIMAL_LEN);
1073
1074	double_to_shortest_decimal_buf(f, result);
1075	return result;
1076	}
1077

Browse the source code of PostgreSQL/src/common/d2s.c