1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #include <algorithm> |
19 | #include <array> |
20 | #include <climits> |
21 | #include <cstdint> |
22 | #include <cstdlib> |
23 | #include <cstring> |
24 | #include <iomanip> |
25 | #include <limits> |
26 | #include <sstream> |
27 | #include <string> |
28 | |
29 | #include "arrow/status.h" |
30 | #include "arrow/util/bit-util.h" |
31 | #include "arrow/util/decimal.h" |
32 | #include "arrow/util/int-util.h" |
33 | #include "arrow/util/logging.h" |
34 | #include "arrow/util/macros.h" |
35 | |
36 | namespace arrow { |
37 | |
38 | using internal::SafeLeftShift; |
39 | using internal::SafeSignedAdd; |
40 | |
41 | Decimal128::Decimal128(const std::string& str) : Decimal128() { |
42 | Status status(Decimal128::FromString(str, this)); |
43 | DCHECK(status.ok()) << status.message(); |
44 | } |
45 | |
46 | static const Decimal128 kTenTo36(static_cast<int64_t>(0xC097CE7BC90715), |
47 | 0xB34B9F1000000000); |
48 | static const Decimal128 kTenTo18(0xDE0B6B3A7640000); |
49 | |
50 | std::string Decimal128::ToIntegerString() const { |
51 | Decimal128 remainder; |
52 | std::stringstream buf; |
53 | bool need_fill = false; |
54 | |
55 | // get anything above 10 ** 36 and print it |
56 | Decimal128 top; |
57 | DCHECK_OK(Divide(kTenTo36, &top, &remainder)); |
58 | |
59 | if (top != 0) { |
60 | buf << static_cast<int64_t>(top); |
61 | remainder.Abs(); |
62 | need_fill = true; |
63 | } |
64 | |
65 | // now get anything above 10 ** 18 and print it |
66 | Decimal128 tail; |
67 | auto s = remainder.Divide(kTenTo18, &top, &tail); |
68 | |
69 | if (need_fill || top != 0) { |
70 | if (need_fill) { |
71 | buf << std::setw(18) << std::setfill('0'); |
72 | } else { |
73 | need_fill = true; |
74 | tail.Abs(); |
75 | } |
76 | |
77 | buf << static_cast<int64_t>(top); |
78 | } |
79 | |
80 | // finally print the tail, which is less than 10**18 |
81 | if (need_fill) { |
82 | buf << std::setw(18) << std::setfill('0'); |
83 | } |
84 | buf << static_cast<int64_t>(tail); |
85 | return buf.str(); |
86 | } |
87 | |
88 | Decimal128::operator int64_t() const { |
89 | DCHECK(high_bits() == 0 || high_bits() == -1) |
90 | << "Trying to cast an Decimal128 greater than the value range of a " |
91 | "int64_t. high_bits_ must be equal to 0 or -1, got: " |
92 | << high_bits(); |
93 | return static_cast<int64_t>(low_bits()); |
94 | } |
95 | |
96 | static std::string ToStringNegativeScale(const std::string& str, |
97 | int32_t adjusted_exponent, bool is_negative) { |
98 | std::stringstream buf; |
99 | |
100 | size_t offset = 0; |
101 | buf << str[offset++]; |
102 | |
103 | if (is_negative) { |
104 | buf << str[offset++]; |
105 | } |
106 | |
107 | buf << '.' << str.substr(offset, std::string::npos) << 'E' << std::showpos |
108 | << adjusted_exponent; |
109 | return buf.str(); |
110 | } |
111 | |
112 | std::string Decimal128::ToString(int32_t scale) const { |
113 | const std::string str(ToIntegerString()); |
114 | |
115 | if (scale == 0) { |
116 | return str; |
117 | } |
118 | |
119 | const bool is_negative = *this < 0; |
120 | |
121 | const auto len = static_cast<int32_t>(str.size()); |
122 | const auto is_negative_offset = static_cast<int32_t>(is_negative); |
123 | const int32_t adjusted_exponent = -scale + (len - 1 - is_negative_offset); |
124 | |
125 | /// Note that the -6 is taken from the Java BigDecimal documentation. |
126 | if (scale < 0 || adjusted_exponent < -6) { |
127 | return ToStringNegativeScale(str, adjusted_exponent, is_negative); |
128 | } |
129 | |
130 | if (is_negative) { |
131 | if (len - 1 > scale) { |
132 | const auto n = static_cast<size_t>(len - scale); |
133 | return str.substr(0, n) + "." + str.substr(n, static_cast<size_t>(scale)); |
134 | } |
135 | |
136 | if (len - 1 == scale) { |
137 | return "-0." + str.substr(1, std::string::npos); |
138 | } |
139 | |
140 | std::string result("-0." + std::string(static_cast<size_t>(scale - len + 1), '0')); |
141 | return result + str.substr(1, std::string::npos); |
142 | } |
143 | |
144 | if (len > scale) { |
145 | const auto n = static_cast<size_t>(len - scale); |
146 | return str.substr(0, n) + "." + str.substr(n, static_cast<size_t>(scale)); |
147 | } |
148 | |
149 | if (len == scale) { |
150 | return "0." + str; |
151 | } |
152 | |
153 | return "0." + std::string(static_cast<size_t>(scale - len), '0') + str; |
154 | } |
155 | |
156 | static constexpr auto kInt64DecimalDigits = |
157 | static_cast<size_t>(std::numeric_limits<int64_t>::digits10); |
158 | static constexpr int64_t kPowersOfTen[kInt64DecimalDigits + 1] = {1LL, |
159 | 10LL, |
160 | 100LL, |
161 | 1000LL, |
162 | 10000LL, |
163 | 100000LL, |
164 | 1000000LL, |
165 | 10000000LL, |
166 | 100000000LL, |
167 | 1000000000LL, |
168 | 10000000000LL, |
169 | 100000000000LL, |
170 | 1000000000000LL, |
171 | 10000000000000LL, |
172 | 100000000000000LL, |
173 | 1000000000000000LL, |
174 | 10000000000000000LL, |
175 | 100000000000000000LL, |
176 | 1000000000000000000LL}; |
177 | |
178 | static void StringToInteger(const std::string& str, Decimal128* out) { |
179 | using std::size_t; |
180 | |
181 | DCHECK_NE(out, nullptr) << "Decimal128 output variable cannot be nullptr" ; |
182 | DCHECK_EQ(*out, 0) |
183 | << "When converting a string to Decimal128 the initial output must be 0" ; |
184 | |
185 | const size_t length = str.length(); |
186 | |
187 | DCHECK_GT(length, 0) << "length of parsed decimal string should be greater than 0" ; |
188 | |
189 | for (size_t posn = 0; posn < length;) { |
190 | const size_t group = std::min(kInt64DecimalDigits, length - posn); |
191 | const int64_t chunk = std::stoll(str.substr(posn, group)); |
192 | const int64_t multiple = kPowersOfTen[group]; |
193 | |
194 | *out *= multiple; |
195 | *out += chunk; |
196 | |
197 | posn += group; |
198 | } |
199 | } |
200 | |
201 | namespace { |
202 | |
203 | struct DecimalComponents { |
204 | std::string sign; |
205 | std::string whole_digits; |
206 | std::string fractional_digits; |
207 | std::string exponent_sign; |
208 | std::string exponent_digits; |
209 | }; |
210 | |
211 | inline bool IsSign(char c) { return c == '-' || c == '+'; } |
212 | |
213 | inline bool IsDot(char c) { return c == '.'; } |
214 | |
215 | inline bool IsDigit(char c) { return c >= '0' && c <= '9'; } |
216 | |
217 | inline bool StartsExponent(char c) { return c == 'e' || c == 'E'; } |
218 | |
219 | inline size_t ParseDigitsRun(const char* s, size_t start, size_t size, std::string* out) { |
220 | size_t pos; |
221 | for (pos = start; pos < size; ++pos) { |
222 | if (!IsDigit(s[pos])) { |
223 | break; |
224 | } |
225 | } |
226 | *out = std::string(s + start, pos - start); |
227 | return pos; |
228 | } |
229 | |
230 | bool ParseDecimalComponents(const char* s, size_t size, DecimalComponents* out) { |
231 | size_t pos = 0; |
232 | |
233 | if (size == 0) { |
234 | return false; |
235 | } |
236 | // Sign of the number |
237 | if (IsSign(s[pos])) { |
238 | out->sign = std::string(s + pos, 1); |
239 | ++pos; |
240 | } |
241 | // First run of digits |
242 | pos = ParseDigitsRun(s, pos, size, &out->whole_digits); |
243 | if (pos == size) { |
244 | return !out->whole_digits.empty(); |
245 | } |
246 | // Optional dot (if given in fractional form) |
247 | bool has_dot = IsDot(s[pos]); |
248 | if (has_dot) { |
249 | // Second run of digits |
250 | ++pos; |
251 | pos = ParseDigitsRun(s, pos, size, &out->fractional_digits); |
252 | } |
253 | if (out->whole_digits.empty() && out->fractional_digits.empty()) { |
254 | // Need at least some digits (whole or fractional) |
255 | return false; |
256 | } |
257 | if (pos == size) { |
258 | return true; |
259 | } |
260 | // Optional exponent |
261 | if (StartsExponent(s[pos])) { |
262 | ++pos; |
263 | if (pos == size) { |
264 | return false; |
265 | } |
266 | // Optional exponent sign |
267 | if (IsSign(s[pos])) { |
268 | out->exponent_sign = std::string(s + pos, 1); |
269 | ++pos; |
270 | } |
271 | pos = ParseDigitsRun(s, pos, size, &out->exponent_digits); |
272 | if (out->exponent_digits.empty()) { |
273 | // Need some exponent digits |
274 | return false; |
275 | } |
276 | } |
277 | return pos == size; |
278 | } |
279 | |
280 | } // namespace |
281 | |
282 | Status Decimal128::FromString(const util::string_view& s, Decimal128* out, |
283 | int32_t* precision, int32_t* scale) { |
284 | if (s.empty()) { |
285 | return Status::Invalid("Empty string cannot be converted to decimal" ); |
286 | } |
287 | |
288 | DecimalComponents dec; |
289 | if (!ParseDecimalComponents(s.data(), s.size(), &dec)) { |
290 | return Status::Invalid("The string '" , s, "' is not a valid decimal number" ); |
291 | } |
292 | std::string exponent_value = dec.exponent_sign + dec.exponent_digits; |
293 | |
294 | // Count number of significant digits (without leading zeros) |
295 | size_t first_non_zero = dec.whole_digits.find_first_not_of('0'); |
296 | size_t significant_digits = dec.fractional_digits.size(); |
297 | if (first_non_zero != std::string::npos) { |
298 | significant_digits += dec.whole_digits.size() - first_non_zero; |
299 | } |
300 | |
301 | if (precision != nullptr) { |
302 | *precision = static_cast<int32_t>(significant_digits); |
303 | } |
304 | |
305 | if (scale != nullptr) { |
306 | if (!exponent_value.empty()) { |
307 | auto adjusted_exponent = static_cast<int32_t>(std::stol(exponent_value)); |
308 | auto len = static_cast<int32_t>(significant_digits); |
309 | *scale = -adjusted_exponent + len - 1; |
310 | } else { |
311 | *scale = static_cast<int32_t>(dec.fractional_digits.size()); |
312 | } |
313 | } |
314 | |
315 | if (out != nullptr) { |
316 | *out = 0; |
317 | StringToInteger(dec.whole_digits + dec.fractional_digits, out); |
318 | if (dec.sign == "-" ) { |
319 | out->Negate(); |
320 | } |
321 | |
322 | if (scale != nullptr && *scale < 0) { |
323 | const int32_t abs_scale = std::abs(*scale); |
324 | *out *= GetScaleMultiplier(abs_scale); |
325 | |
326 | if (precision != nullptr) { |
327 | *precision += abs_scale; |
328 | } |
329 | *scale = 0; |
330 | } |
331 | } |
332 | |
333 | return Status::OK(); |
334 | } |
335 | |
336 | Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* precision, |
337 | int32_t* scale) { |
338 | return FromString(util::string_view(s), out, precision, scale); |
339 | } |
340 | |
341 | Status Decimal128::FromString(const char* s, Decimal128* out, int32_t* precision, |
342 | int32_t* scale) { |
343 | return FromString(util::string_view(s), out, precision, scale); |
344 | } |
345 | |
346 | // Helper function used by Decimal128::FromBigEndian |
347 | static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length) { |
348 | // We don't bounds check the length here because this is called by |
349 | // FromBigEndian that has a Decimal128 as its out parameters and |
350 | // that function is already checking the length of the bytes and only |
351 | // passes lengths between zero and eight. |
352 | uint64_t result = 0; |
353 | // Using memcpy instead of special casing for length |
354 | // and doing the conversion in 16, 32 parts, which could |
355 | // possibly create unaligned memory access on certain platforms |
356 | memcpy(reinterpret_cast<uint8_t*>(&result) + 8 - length, bytes, length); |
357 | return ::arrow::BitUtil::FromBigEndian(result); |
358 | } |
359 | |
360 | Status Decimal128::FromBigEndian(const uint8_t* bytes, int32_t length, Decimal128* out) { |
361 | static constexpr int32_t kMinDecimalBytes = 1; |
362 | static constexpr int32_t kMaxDecimalBytes = 16; |
363 | |
364 | int64_t high, low; |
365 | |
366 | if (length < kMinDecimalBytes || length > kMaxDecimalBytes) { |
367 | return Status::Invalid("Length of byte array passed to Decimal128::FromBigEndian " , |
368 | "was " , length, ", but must be between " , kMinDecimalBytes, |
369 | " and " , kMaxDecimalBytes); |
370 | } |
371 | |
372 | // Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the |
373 | // sign bit. |
374 | const bool is_negative = static_cast<int8_t>(bytes[0]) < 0; |
375 | |
376 | // 1. Extract the high bytes |
377 | // Stop byte of the high bytes |
378 | const int32_t high_bits_offset = std::max(0, length - 8); |
379 | const auto high_bits = UInt64FromBigEndian(bytes, high_bits_offset); |
380 | |
381 | if (high_bits_offset == 8) { |
382 | // Avoid undefined shift by 64 below |
383 | high = high_bits; |
384 | } else { |
385 | high = -1 * (is_negative && length < kMaxDecimalBytes); |
386 | // Shift left enough bits to make room for the incoming int64_t |
387 | high = SafeLeftShift(high, high_bits_offset * CHAR_BIT); |
388 | // Preserve the upper bits by inplace OR-ing the int64_t |
389 | high |= high_bits; |
390 | } |
391 | |
392 | // 2. Extract the low bytes |
393 | // Stop byte of the low bytes |
394 | const int32_t low_bits_offset = std::min(length, 8); |
395 | const auto low_bits = |
396 | UInt64FromBigEndian(bytes + high_bits_offset, length - high_bits_offset); |
397 | |
398 | if (low_bits_offset == 8) { |
399 | // Avoid undefined shift by 64 below |
400 | low = low_bits; |
401 | } else { |
402 | // Sign extend the low bits if necessary |
403 | low = -1 * (is_negative && length < 8); |
404 | // Shift left enough bits to make room for the incoming int64_t |
405 | low = SafeLeftShift(low, low_bits_offset * CHAR_BIT); |
406 | // Preserve the upper bits by inplace OR-ing the int64_t |
407 | low |= low_bits; |
408 | } |
409 | |
410 | *out = Decimal128(high, static_cast<uint64_t>(low)); |
411 | return Status::OK(); |
412 | } |
413 | |
414 | Status Decimal128::ToArrowStatus(DecimalStatus dstatus) const { |
415 | Status status; |
416 | |
417 | switch (dstatus) { |
418 | case DecimalStatus::kSuccess: |
419 | status = Status::OK(); |
420 | break; |
421 | |
422 | case DecimalStatus::kDivideByZero: |
423 | status = Status::Invalid("Division by 0 in Decimal128" ); |
424 | break; |
425 | |
426 | case DecimalStatus::kOverflow: |
427 | status = Status::Invalid("Overflow occurred during Decimal128 operation." ); |
428 | break; |
429 | |
430 | case DecimalStatus::kRescaleDataLoss: |
431 | status = Status::Invalid("Rescaling decimal value would cause data loss" ); |
432 | break; |
433 | } |
434 | return status; |
435 | } |
436 | |
437 | } // namespace arrow |
438 | |