Int128.cc source code [ClickHouse/contrib/orc/c++/src/Int128.cc]

1	/**
2	* Licensed to the Apache Software Foundation (ASF) under one
3	* or more contributor license agreements. See the NOTICE file
4	* distributed with this work for additional information
5	* regarding copyright ownership. The ASF licenses this file
6	* to you under the Apache License, Version 2.0 (the
7	* "License"); you may not use this file except in compliance
8	* with the License. You may obtain a copy of the License at
9	*
10	* http://www.apache.org/licenses/LICENSE-2.0
11	*
12	* Unless required by applicable law or agreed to in writing, software
13	* distributed under the License is distributed on an "AS IS" BASIS,
14	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15	* See the License for the specific language governing permissions and
16	* limitations under the License.
17	*/
18
19	#include "orc/Int128.hh"
20	#include "Adaptor.hh"
21
22	#include <algorithm>
23	#include <iomanip>
24	#include <iostream>
25	#include <sstream>
26
27	namespace orc {
28
29	Int128 Int128::maximumValue() {
30	return Int128 (`0x7fffffffffffffff`, `0xfffffffffffffff`);
31	}
32
33	Int128 Int128::minimumValue() {
34	return Int128 (static_cast<int64_t>(`0x8000000000000000`), `0x0`);
35	}
36
37	Int128::Int128(const std::string& str) {
38	lowbits = `0`;
39	highbits = `0`;
40	size_t length = str.length();
41	if (length > `0`) {
42	bool isNegative = str [`0`] == `'-'`;
43	size_t posn = isNegative ? `1` : `0`;
44	while (posn < length) {
45	size_t group = std::min(static_cast<size_t>(`18`), length - posn);
46	int64_t chunk = std::stoll(str.substr(posn, group));
47	int64_t multiple = `1`;
48	for(size_t i=`0`; i < group; ++i) {
49	multiple *= `10`;
50	}
51	*this *= multiple;
52	*this += chunk;
53	posn += group;
54	}
55	if (isNegative) {
56	negate();
57	}
58	}
59	}
60
61	Int128& Int128::operator=(const* Int128 &right) {
62	const uint64_t INT_MASK = `0xffffffff`;
63	const uint64_t CARRY_BIT = INT_MASK + `1`;
64
65	// Break the left and right numbers into 32 bit chunks
66	// so that we can multiply them without overflow.
67	uint64_t L0 = static_cast<uint64_t>(highbits) >> `32`;
68	uint64_t L1 = static_cast<uint64_t>(highbits) & INT_MASK;
69	uint64_t L2 = lowbits >> `32`;
70	uint64_t L3 = lowbits & INT_MASK;
71	uint64_t R0 = static_cast<uint64_t>(right.highbits) >> `32`;
72	uint64_t R1 = static_cast<uint64_t>(right.highbits) & INT_MASK;
73	uint64_t R2 = right.lowbits >> `32`;
74	uint64_t R3 = right.lowbits & INT_MASK;
75
76	uint64_t product = L3 * R3;
77	lowbits = product & INT_MASK;
78	uint64_t sum = product >> `32`;
79	product = L2 * R3;
80	sum += product;
81	highbits = sum < product ? CARRY_BIT : `0`;
82	product = L3 * R2;
83	sum += product;
84	if (sum < product) {
85	highbits += CARRY_BIT;
86	}
87	lowbits += sum << `32`;
88	highbits += static_cast<int64_t>(sum >> `32`);
89	highbits += L1 * R3 + L2 * R2 + L3 * R1;
90	highbits += (L0 * R3 + L1 * R2 + L2 * R1 + L3 * R0) << `32`;
91	return *this;
92	}
93
94	/**
95	* Expands the given value into an array of ints so that we can work on
96	* it. The array will be converted to an absolute value and the wasNegative
97	* flag will be set appropriately. The array will remove leading zeros from
98	* the value.
99	* @param array an array of length 4 to set with the value
100	* @param wasNegative a flag for whether the value was original negative
101	* @result the output length of the array
102	*/
103	int64_t Int128::fillInArray(uint32_t* array, bool &wasNegative) const {
104	uint64_t high;
105	uint64_t low;
106	if (highbits < `0`) {
107	low = ~lowbits + `1`;
108	high = static_cast<uint64_t>(~highbits);
109	if (low == `0`) {
110	high += `1`;
111	}
112	wasNegative = true;
113	} else {
114	low = lowbits;
115	high = static_cast<uint64_t>(highbits);
116	wasNegative = false;
117	}
118	if (high != `0`) {
119	if (high > UINT32_MAX) {
120	array[`0`] = static_cast<uint32_t>(high >> `32`);
121	array[`1`] = static_cast<uint32_t>(high);
122	array[`2`] = static_cast<uint32_t>(low >> `32`);
123	array[`3`] = static_cast<uint32_t>(low);
124	return `4`;
125	} else {
126	array[`0`] = static_cast<uint32_t>(high);
127	array[`1`] = static_cast<uint32_t>(low >> `32`);
128	array[`2`] = static_cast<uint32_t>(low);
129	return `3`;
130	}
131	} else if (low >= UINT32_MAX) {
132	array[`0`] = static_cast<uint32_t>(low >> `32`);
133	array[`1`] = static_cast<uint32_t>(low);
134	return `2`;
135	} else if (low == `0`) {
136	return `0`;
137	} else {
138	array[`0`] = static_cast<uint32_t>(low);
139	return `1`;
140	}
141	}
142
143
144	/**
145	* Find last set bit in a 32 bit integer. Bit 1 is the LSB and bit 32 is
146	* the MSB. We can replace this with bsrq asm instruction on x64.
147	*/
148	int64_t fls(uint32_t x) {
149	int64_t bitpos = `0`;
150	while (x) {
151	x >>= `1`;
152	bitpos += `1`;
153	}
154	return bitpos;
155	}
156
157	/**
158	* Shift the number in the array left by bits positions.
159	* @param array the number to shift, must have length elements
160	* @param length the number of entries in the array
161	* @param bits the number of bits to shift (0 <= bits < 32)
162	*/
163	void shiftArrayLeft(uint32_t* array, int64_t length, int64_t bits) {
164	if (length > `0` && bits != `0`) {
165	for(int64_t i=`0`; i < length-`1`; ++i) {
166	array[i] = (array[i] << bits) \| (array[i+`1`] >> (`32` - bits));
167	}
168	array[length-`1`] <<= bits;
169	}
170	}
171
172	/**
173	* Shift the number in the array right by bits positions.
174	* @param array the number to shift, must have length elements
175	* @param length the number of entries in the array
176	* @param bits the number of bits to shift (0 <= bits < 32)
177	*/
178	void shiftArrayRight(uint32_t* array, int64_t length, int64_t bits) {
179	if (length > `0` && bits != `0`) {
180	for(int64_t i=length-`1`; i > `0`; --i) {
181	array[i] = (array[i] >> bits) \| (array[i-`1`] << (`32` - bits));
182	}
183	array[`0`] >>= bits;
184	}
185	}
186
187	/**
188	* Fix the signs of the result and remainder at the end of the division
189	* based on the signs of the dividend and divisor.
190	*/
191	void fixDivisionSigns(Int128 &result, Int128 &remainder,
192	bool dividendWasNegative, bool divisorWasNegative) {
193	if (dividendWasNegative != divisorWasNegative) {
194	result.negate();
195	}
196	if (dividendWasNegative) {
197	remainder.negate();
198	}
199	}
200
201	/**
202	* Build a Int128 from a list of ints.
203	*/
204	void buildFromArray(Int128& value, uint32_t* array, int64_t length) {
205	switch (length) {
206	case `0`:
207	value = `0`;
208	break;
209	case `1`:
210	value = array[`0`];
211	break;
212	case `2`:
213	value = Int128 (`0`, (static_cast<uint64_t>(array[`0`]) << `32`) + array[`1`]);
214	break;
215	case `3`:
216	value = Int128 (array[`0`],
217	(static_cast<uint64_t>(array[`1`]) << `32`) + array[`2`]);
218	break;
219	case `4`:
220	value = Int128 ((static_cast<int64_t>(array[`0`]) << `32`) + array[`1`],
221	(static_cast<uint64_t>(array[`2`]) << `32`) + array[`3`]);
222	break;
223	case `5`:
224	if (array[`0`] != `0`) {
225	throw std::logic_error ("Can't build Int128 with 5 ints.");
226	}
227	value = Int128 ((static_cast<int64_t>(array[`1`]) << `32`) + array[`2`],
228	(static_cast<uint64_t>(array[`3`]) << `32`) + array[`4`]);
229	break;
230	default:
231	throw std::logic_error ("Unsupported length for building Int128");
232	}
233	}
234
235	/**
236	* Do a division where the divisor fits into a single 32 bit value.
237	*/
238	Int128 singleDivide(uint32_t* dividend, int64_t dividendLength,
239	uint32_t divisor, Int128& remainder,
240	bool dividendWasNegative, bool divisorWasNegative) {
241	uint64_t r = `0`;
242	uint32_t resultArray[`5`];
243	for(int64_t j=`0`; j < dividendLength; j++) {
244	r <<= `32`;
245	r += dividend[j];
246	resultArray[j] = static_cast<uint32_t>(r / divisor);
247	r %= divisor;
248	}
249	Int128 result;
250	buildFromArray(result, resultArray, dividendLength);
251	remainder = static_cast<int64_t>(r);
252	fixDivisionSigns(result, remainder, dividendWasNegative,
253	divisorWasNegative);
254	return result;
255	}
256
257	Int128 Int128::divide(const Int128 &divisor, Int128 &remainder) const {
258	// Split the dividend and divisor into integer pieces so that we can
259	// work on them.
260	uint32_t dividendArray[`5`];
261	uint32_t divisorArray[`4`];
262	bool dividendWasNegative;
263	bool divisorWasNegative;
264	// leave an extra zero before the dividend
265	dividendArray[`0`] = `0`;
266	int64_t dividendLength = fillInArray(dividendArray + `1`, dividendWasNegative)+`1`;
267	int64_t divisorLength = divisor.fillInArray(divisorArray, divisorWasNegative);
268
269	// Handle some of the easy cases.
270	if (dividendLength <= divisorLength) {
271	remainder = *this;
272	return `0`;
273	} else if (divisorLength == `0`) {
274	throw std::range_error ("Division by 0 in Int128");
275	} else if (divisorLength == `1`) {
276	return singleDivide(dividendArray, dividendLength, divisorArray[`0`],
277	remainder, dividendWasNegative, divisorWasNegative);
278	}
279
280	int64_t resultLength = dividendLength - divisorLength;
281	uint32_t resultArray[`4`];
282
283	// Normalize by shifting both by a multiple of 2 so that
284	// the digit guessing is better. The requirement is that
285	// divisorArray[0] is greater than 231.
286	int64_t normalizeBits = `32` - fls(divisorArray[`0`]);
287	shiftArrayLeft(divisorArray, divisorLength, normalizeBits);
288	shiftArrayLeft(dividendArray, dividendLength, normalizeBits);
289
290	// compute each digit in the result
291	for(int64_t j=`0`; j < resultLength; ++j) {
292	// Guess the next digit. At worst it is two too large
293	uint32_t guess = UINT32_MAX;
294	uint64_t highDividend = static_cast<uint64_t>(dividendArray[j]) << `32` \|
295	dividendArray[j+`1`];
296	if (dividendArray[j] != divisorArray[`0`]) {
297	guess = static_cast<uint32_t>(highDividend / divisorArray[`0`]);
298	}
299
300	// catch all of the cases where guess is two too large and most of the
301	// cases where it is one too large
302	uint32_t rhat =
303	static_cast<uint32_t>(highDividend - guess *
304	static_cast<uint64_t>(divisorArray[`0`]));
305	while (static_cast<uint64_t>(divisorArray[`1`]) * guess >
306	(static_cast<uint64_t>(rhat) << `32`) + dividendArray[j+`2`]) {
307	guess -= `1`;
308	rhat += divisorArray[`0`];
309	if (static_cast<uint64_t>(rhat) < divisorArray[`0`]) {
310	break;
311	}
312	}
313
314	// subtract off the guess divisor from the dividend*
315	uint64_t mult = `0`;
316	for(int64_t i=divisorLength-`1`; i >= `0`; --i) {
317	mult += static_cast<uint64_t>(guess) * divisorArray[i];
318	uint32_t prev = dividendArray[j+i+`1`];
319	dividendArray[j+i+`1`] -= static_cast<uint32_t>(mult);
320	mult >>= `32`;
321	if (dividendArray[j+i+`1`] > prev) {
322	mult += `1`;
323	}
324	}
325	uint32_t prev = dividendArray[j];
326	dividendArray[j] -= static_cast<uint32_t>(mult);
327
328	// if guess was too big, we add back divisor
329	if (dividendArray[j] > prev) {
330	guess -= `1`;
331	uint32_t carry = `0`;
332	for(int64_t i=divisorLength-`1`; i >= `0`; --i) {
333	uint64_t sum = static_cast<uint64_t>(divisorArray[i]) +
334	dividendArray[j+i+`1`] + carry;
335	dividendArray[j+i+`1`] = static_cast<uint32_t>(sum);
336	carry = static_cast<uint32_t>(sum >> `32`);
337	}
338	dividendArray[j] += carry;
339	}
340
341	resultArray[j] = guess;
342	}
343
344	// denormalize the remainder
345	shiftArrayRight(dividendArray, dividendLength, normalizeBits);
346
347	// return result and remainder
348	Int128 result;
349	buildFromArray(result, resultArray, resultLength);
350	buildFromArray(remainder, dividendArray, dividendLength);
351	fixDivisionSigns(result, remainder,
352	dividendWasNegative, divisorWasNegative);
353	return result;
354	}
355
356	std::string Int128::toString() const {
357	// 1018 - the largest power of 10 less than 63 bits
358	const Int128 tenTo18(`0xde0b6b3a7640000`);
359	// 1036
360	const Int128 tenTo36(`0xc097ce7bc90715`, `0xb34b9f1000000000`);
361	Int128 remainder;
362	std::stringstream buf;
363	bool needFill = false;
364
365	// get anything above 1036 and print it
366	Int128 top = divide(tenTo36, remainder);
367	if (top != `0`) {
368	buf << top.toLong();
369	remainder.abs();
370	needFill = true;
371	}
372
373	// now get anything above 1018 and print it
374	Int128 tail;
375	top = remainder.divide(tenTo18, tail);
376	if (needFill \|\| top != `0`) {
377	if (needFill) {
378	buf << std::setw(`18`) << std::setfill(`'0'`);
379	} else {
380	needFill = true;
381	tail.abs();
382	}
383	buf << top.toLong();
384	}
385
386	// finally print the tail, which is less than 1018
387	if (needFill) {
388	buf << std::setw(`18`) << std::setfill(`'0'`);
389	}
390	buf << tail.toLong();
391	return buf.str();
392	}
393
394	std::string Int128::toDecimalString(int32_t scale) const {
395	std::string str = toString();
396	if (scale == `0`) {
397	return str;
398	} else if (*this < `0`) {
399	int32_t len = static_cast<int32_t>(str.length());
400	if (len - `1` > scale) {
401	return str.substr(`0`, static_cast<size_t>(len - scale)) + "." +
402	str.substr(static_cast<size_t>(len - scale),
403	static_cast<size_t>(scale));
404	} else if (len - `1` == scale) {
405	return "-0." + str.substr(`1`, std::string::npos);
406	} else {
407	std::string result = "-0.";
408	for(int32_t i=`0`; i < scale - len + `1`; ++i) {
409	result += "0";
410	}
411	return result + str.substr(`1`, std::string::npos);
412	}
413	} else {
414	int32_t len = static_cast<int32_t>(str.length());
415	if (len > scale) {
416	return str.substr(`0`, static_cast<size_t>(len - scale)) + "." +
417	str.substr(static_cast<size_t>(len - scale),
418	static_cast<size_t>(scale));
419	} else if (len == scale) {
420	return "0." + str;
421	} else {
422	std::string result = "0.";
423	for(int32_t i=`0`; i < scale - len; ++i) {
424	result += "0";
425	}
426	return result + str;
427	}
428	}
429	}
430
431	std::string Int128::toHexString() const {
432	std::stringstream buf;
433	buf << std::hex << "0x"
434	<< std::setw(`16`) << std::setfill(`'0'`) << highbits
435	<< std::setw(`16`) << std::setfill(`'0'`) << lowbits;
436	return buf.str();
437	}
438
439	const static int32_t MAX_PRECISION_64 = `18`;
440	const static int64_t POWERS_OF_TEN[MAX_PRECISION_64 + `1`] =
441	{`1`,
442	`10`,
443	`100`,
444	`1000`,
445	`10000`,
446	`100000`,
447	`1000000`,
448	`10000000`,
449	`100000000`,
450	`1000000000`,
451	`10000000000`,
452	`100000000000`,
453	`1000000000000`,
454	`10000000000000`,
455	`100000000000000`,
456	`1000000000000000`,
457	`10000000000000000`,
458	`100000000000000000`,
459	`1000000000000000000`};
460
461	Int128 scaleUpInt128ByPowerOfTen(Int128 value,
462	int32_t power,
463	bool &overflow) {
464	overflow = false;
465	Int128 remainder;
466
467	while (power > `0`) {
468	int32_t step = std::min(power, MAX_PRECISION_64);
469	if (value > `0` && Int128::maximumValue().divide(POWERS_OF_TEN[step], remainder) < value) {
470	overflow = true;
471	return Int128::maximumValue();
472	} else if (value < `0` && Int128::minimumValue().divide(POWERS_OF_TEN[step], remainder) > value) {
473	overflow = true;
474	return Int128::minimumValue();
475	}
476
477	value *= POWERS_OF_TEN[step];
478	power -= step;
479	}
480
481	return value;
482	}
483
484	Int128 scaleDownInt128ByPowerOfTen(Int128 value, int32_t power) {
485	Int128 remainder;
486	while (power > `0`) {
487	int32_t step = std::min(std::abs(power), MAX_PRECISION_64);
488	value = value.divide(POWERS_OF_TEN[step], remainder);
489	power -= step;
490	}
491	return value;
492	}
493
494	}
495

Browse the source code of ClickHouse/contrib/orc/c++/src/Int128.cc