decimal.cc source code [arrow/arrow/util/decimal.cc]

1	// Licensed to the Apache Software Foundation (ASF) under one
2	// or more contributor license agreements. See the NOTICE file
3	// distributed with this work for additional information
4	// regarding copyright ownership. The ASF licenses this file
5	// to you under the Apache License, Version 2.0 (the
6	// "License"); you may not use this file except in compliance
7	// with the License. You may obtain a copy of the License at
8	//
9	// http://www.apache.org/licenses/LICENSE-2.0
10	//
11	// Unless required by applicable law or agreed to in writing,
12	// software distributed under the License is distributed on an
13	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14	// KIND, either express or implied. See the License for the
15	// specific language governing permissions and limitations
16	// under the License.
17
18	#include <algorithm>
19	#include <array>
20	#include <climits>
21	#include <cstdint>
22	#include <cstdlib>
23	#include <cstring>
24	#include <iomanip>
25	#include <limits>
26	#include <sstream>
27	#include <string>
28
29	#include "arrow/status.h"
30	#include "arrow/util/bit-util.h"
31	#include "arrow/util/decimal.h"
32	#include "arrow/util/int-util.h"
33	#include "arrow/util/logging.h"
34	#include "arrow/util/macros.h"
35
36	namespace arrow {
37
38	using internal::SafeLeftShift;
39	using internal::SafeSignedAdd;
40
41	Decimal128::Decimal128(const std::string& str) : Decimal128 () {
42	Status status(Decimal128::FromString(str, this));
43	DCHECK(status.ok()) << status.message();
44	}
45
46	static const Decimal128 kTenTo36(static_cast<int64_t>(`0xC097CE7BC90715`),
47	`0xB34B9F1000000000`);
48	static const Decimal128 kTenTo18(`0xDE0B6B3A7640000`);
49
50	std::string Decimal128::ToIntegerString() const {
51	Decimal128 remainder;
52	std::stringstream buf;
53	bool need_fill = false;
54
55	// get anything above 10 * 36 and print it*
56	Decimal128 top;
57	DCHECK_OK(Divide(kTenTo36, &top, &remainder));
58
59	if (top != `0`) {
60	buf << static_cast<int64_t>(top);
61	remainder.Abs();
62	need_fill = true;
63	}
64
65	// now get anything above 10 * 18 and print it*
66	Decimal128 tail;
67	auto s = remainder.Divide(kTenTo18, &top, &tail);
68
69	if (need_fill \|\| top != `0`) {
70	if (need_fill) {
71	buf << std::setw(`18`) << std::setfill(`'0'`);
72	} else {
73	need_fill = true;
74	tail.Abs();
75	}
76
77	buf << static_cast<int64_t>(top);
78	}
79
80	// finally print the tail, which is less than 1018
81	if (need_fill) {
82	buf << std::setw(`18`) << std::setfill(`'0'`);
83	}
84	buf << static_cast<int64_t>(tail);
85	return buf.str();
86	}
87
88	Decimal128::operator int64_t() const {
89	DCHECK(high_bits() == `0` \|\| high_bits() == -`1`)
90	<< "Trying to cast an Decimal128 greater than the value range of a "
91	"int64_t. high_bits_ must be equal to 0 or -1, got: "
92	<< high_bits();
93	return static_cast<int64_t>(low_bits());
94	}
95
96	static std::string ToStringNegativeScale(const std::string& str,
97	int32_t adjusted_exponent, bool is_negative) {
98	std::stringstream buf;
99
100	size_t offset = `0`;
101	buf << str [offset++];
102
103	if (is_negative) {
104	buf << str [offset++];
105	}
106
107	buf << `'.'` << str.substr(offset, std::string::npos) << `'E'` << std::showpos
108	<< adjusted_exponent;
109	return buf.str();
110	}
111
112	std::string Decimal128::ToString(int32_t scale) const {
113	const std::string str(ToIntegerString());
114
115	if (scale == `0`) {
116	return str;
117	}
118
119	const bool is_negative = *this < `0`;
120
121	const auto len = static_cast<int32_t>(str.size());
122	const auto is_negative_offset = static_cast<int32_t>(is_negative);
123	const int32_t adjusted_exponent = -scale + (len - `1` - is_negative_offset);
124
125	/// Note that the -6 is taken from the Java BigDecimal documentation.
126	if (scale < `0` \|\| adjusted_exponent < -`6`) {
127	return ToStringNegativeScale(str, adjusted_exponent, is_negative);
128	}
129
130	if (is_negative) {
131	if (len - `1` > scale) {
132	const auto n = static_cast<size_t>(len - scale);
133	return str.substr(`0`, n) + "." + str.substr(n, static_cast<size_t>(scale));
134	}
135
136	if (len - `1` == scale) {
137	return "-0." + str.substr(`1`, std::string::npos);
138	}
139
140	std::string result("-0." + std::string (static_cast<size_t>(scale - len + `1`), `'0'`));
141	return result + str.substr(`1`, std::string::npos);
142	}
143
144	if (len > scale) {
145	const auto n = static_cast<size_t>(len - scale);
146	return str.substr(`0`, n) + "." + str.substr(n, static_cast<size_t>(scale));
147	}
148
149	if (len == scale) {
150	return "0." + str;
151	}
152
153	return "0." + std::string (static_cast<size_t>(scale - len), `'0'`) + str;
154	}
155
156	static constexpr auto kInt64DecimalDigits =
157	static_cast<size_t>(std::numeric_limits<int64_t>::digits10);
158	static constexpr int64_t kPowersOfTen[kInt64DecimalDigits + `1`] = {`1LL`,
159	`10LL`,
160	`100LL`,
161	`1000LL`,
162	`10000LL`,
163	`100000LL`,
164	`1000000LL`,
165	`10000000LL`,
166	`100000000LL`,
167	`1000000000LL`,
168	`10000000000LL`,
169	`100000000000LL`,
170	`1000000000000LL`,
171	`10000000000000LL`,
172	`100000000000000LL`,
173	`1000000000000000LL`,
174	`10000000000000000LL`,
175	`100000000000000000LL`,
176	`1000000000000000000LL`};
177
178	static void StringToInteger(const std::string& str, Decimal128* out) {
179	using std::size_t;
180
181	DCHECK_NE(out, nullptr) << "Decimal128 output variable cannot be nullptr";
182	DCHECK_EQ(*out, `0`)
183	<< "When converting a string to Decimal128 the initial output must be 0";
184
185	const size_t length = str.length();
186
187	DCHECK_GT(length, `0`) << "length of parsed decimal string should be greater than 0";
188
189	for (size_t posn = `0`; posn < length;) {
190	const size_t group = std::min(kInt64DecimalDigits, length - posn);
191	const int64_t chunk = std::stoll(str.substr(posn, group));
192	const int64_t multiple = kPowersOfTen[group];
193
194	out = multiple;
195	*out += chunk;
196
197	posn += group;
198	}
199	}
200
201	namespace {
202
203	struct DecimalComponents {
204	std::string sign;
205	std::string whole_digits;
206	std::string fractional_digits;
207	std::string exponent_sign;
208	std::string exponent_digits;
209	};
210
211	inline bool IsSign(char c) { return c == `'-'` \|\| c == `'+'`; }
212
213	inline bool IsDot(char c) { return c == `'.'`; }
214
215	inline bool IsDigit(char c) { return c >= `'0'` && c <= `'9'`; }
216
217	inline bool StartsExponent(char c) { return c == `'e'` \|\| c == `'E'`; }
218
219	inline size_t ParseDigitsRun(const char* s, size_t start, size_t size, std::string* out) {
220	size_t pos;
221	for (pos = start; pos < size; ++pos) {
222	if (!IsDigit(s[pos])) {
223	break;
224	}
225	}
226	*out = std::string (s + start, pos - start);
227	return pos;
228	}
229
230	bool ParseDecimalComponents(const char* s, size_t size, DecimalComponents* out) {
231	size_t pos = `0`;
232
233	if (size == `0`) {
234	return false;
235	}
236	// Sign of the number
237	if (IsSign(s[pos])) {
238	out->sign = std::string (s + pos, `1`);
239	++pos;
240	}
241	// First run of digits
242	pos = ParseDigitsRun(s, pos, size, &out->whole_digits);
243	if (pos == size) {
244	return !out->whole_digits.empty();
245	}
246	// Optional dot (if given in fractional form)
247	bool has_dot = IsDot(s[pos]);
248	if (has_dot) {
249	// Second run of digits
250	++pos;
251	pos = ParseDigitsRun(s, pos, size, &out->fractional_digits);
252	}
253	if (out->whole_digits.empty() && out->fractional_digits.empty()) {
254	// Need at least some digits (whole or fractional)
255	return false;
256	}
257	if (pos == size) {
258	return true;
259	}
260	// Optional exponent
261	if (StartsExponent(s[pos])) {
262	++pos;
263	if (pos == size) {
264	return false;
265	}
266	// Optional exponent sign
267	if (IsSign(s[pos])) {
268	out->exponent_sign = std::string (s + pos, `1`);
269	++pos;
270	}
271	pos = ParseDigitsRun(s, pos, size, &out->exponent_digits);
272	if (out->exponent_digits.empty()) {
273	// Need some exponent digits
274	return false;
275	}
276	}
277	return pos == size;
278	}
279
280	} // namespace
281
282	Status Decimal128::FromString(const util::string_view& s, Decimal128* out,
283	int32_t* precision, int32_t* scale) {
284	if (s.empty()) {
285	return Status::Invalid("Empty string cannot be converted to decimal");
286	}
287
288	DecimalComponents dec;
289	if (!ParseDecimalComponents(s.data(), s.size(), &dec)) {
290	return Status::Invalid("The string '", s, "' is not a valid decimal number");
291	}
292	std::string exponent_value = dec.exponent_sign + dec.exponent_digits;
293
294	// Count number of significant digits (without leading zeros)
295	size_t first_non_zero = dec.whole_digits.find_first_not_of(`'0'`);
296	size_t significant_digits = dec.fractional_digits.size();
297	if (first_non_zero != std::string::npos) {
298	significant_digits += dec.whole_digits.size() - first_non_zero;
299	}
300
301	if (precision != nullptr) {
302	precision = static_cast*<int32_t>(significant_digits);
303	}
304
305	if (scale != nullptr) {
306	if (!exponent_value.empty()) {
307	auto adjusted_exponent = static_cast<int32_t>(std::stol(exponent_value));
308	auto len = static_cast<int32_t>(significant_digits);
309	*scale = -adjusted_exponent + len - `1`;
310	} else {
311	scale = static_cast*<int32_t>(dec.fractional_digits.size());
312	}
313	}
314
315	if (out != nullptr) {
316	*out = `0`;
317	StringToInteger(dec.whole_digits + dec.fractional_digits, out);
318	if (dec.sign == "-") {
319	out->Negate();
320	}
321
322	if (scale != nullptr && *scale < `0`) {
323	const int32_t abs_scale = std::abs(*scale);
324	out = GetScaleMultiplier(abs_scale);
325
326	if (precision != nullptr) {
327	*precision += abs_scale;
328	}
329	*scale = `0`;
330	}
331	}
332
333	return Status::OK();
334	}
335
336	Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* precision,
337	int32_t* scale) {
338	return FromString(util::string_view (s), out, precision, scale);
339	}
340
341	Status Decimal128::FromString(const char* s, Decimal128* out, int32_t* precision,
342	int32_t* scale) {
343	return FromString(util::string_view (s), out, precision, scale);
344	}
345
346	// Helper function used by Decimal128::FromBigEndian
347	static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length) {
348	// We don't bounds check the length here because this is called by
349	// FromBigEndian that has a Decimal128 as its out parameters and
350	// that function is already checking the length of the bytes and only
351	// passes lengths between zero and eight.
352	uint64_t result = `0`;
353	// Using memcpy instead of special casing for length
354	// and doing the conversion in 16, 32 parts, which could
355	// possibly create unaligned memory access on certain platforms
356	memcpy(reinterpret_cast<uint8_t*>(&result) + `8` - length, bytes, length);
357	return ::arrow::BitUtil::FromBigEndian(result);
358	}
359
360	Status Decimal128::FromBigEndian(const uint8_t* bytes, int32_t length, Decimal128* out) {
361	static constexpr int32_t kMinDecimalBytes = `1`;
362	static constexpr int32_t kMaxDecimalBytes = `16`;
363
364	int64_t high, low;
365
366	if (length < kMinDecimalBytes \|\| length > kMaxDecimalBytes) {
367	return Status::Invalid("Length of byte array passed to Decimal128::FromBigEndian ",
368	"was ", length, ", but must be between ", kMinDecimalBytes,
369	" and ", kMaxDecimalBytes);
370	}
371
372	// Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
373	// sign bit.
374	const bool is_negative = static_cast<int8_t>(bytes[`0`]) < `0`;
375
376	// 1. Extract the high bytes
377	// Stop byte of the high bytes
378	const int32_t high_bits_offset = std::max(`0`, length - `8`);
379	const auto high_bits = UInt64FromBigEndian(bytes, high_bits_offset);
380
381	if (high_bits_offset == `8`) {
382	// Avoid undefined shift by 64 below
383	high = high_bits;
384	} else {
385	high = -`1` * (is_negative && length < kMaxDecimalBytes);
386	// Shift left enough bits to make room for the incoming int64_t
387	high = SafeLeftShift(high, high_bits_offset * CHAR_BIT);
388	// Preserve the upper bits by inplace OR-ing the int64_t
389	high \|= high_bits;
390	}
391
392	// 2. Extract the low bytes
393	// Stop byte of the low bytes
394	const int32_t low_bits_offset = std::min(length, `8`);
395	const auto low_bits =
396	UInt64FromBigEndian(bytes + high_bits_offset, length - high_bits_offset);
397
398	if (low_bits_offset == `8`) {
399	// Avoid undefined shift by 64 below
400	low = low_bits;
401	} else {
402	// Sign extend the low bits if necessary
403	low = -`1` * (is_negative && length < `8`);
404	// Shift left enough bits to make room for the incoming int64_t
405	low = SafeLeftShift(low, low_bits_offset * CHAR_BIT);
406	// Preserve the upper bits by inplace OR-ing the int64_t
407	low \|= low_bits;
408	}
409
410	out = Decimal128 (high, static_cast*<uint64_t>(low));
411	return Status::OK();
412	}
413
414	Status Decimal128::ToArrowStatus(DecimalStatus dstatus) const {
415	Status status;
416
417	switch (dstatus) {
418	case DecimalStatus::kSuccess:
419	status = Status::OK();
420	break;
421
422	case DecimalStatus::kDivideByZero:
423	status = Status::Invalid("Division by 0 in Decimal128");
424	break;
425
426	case DecimalStatus::kOverflow:
427	status = Status::Invalid("Overflow occurred during Decimal128 operation.");
428	break;
429
430	case DecimalStatus::kRescaleDataLoss:
431	status = Status::Invalid("Rescaling decimal value would cause data loss");
432	break;
433	}
434	return status;
435	}
436
437	} // namespace arrow
438

Browse the source code of arrow/arrow/util/decimal.cc