1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include <algorithm>
19#include <array>
20#include <climits>
21#include <cstdint>
22#include <cstdlib>
23#include <cstring>
24#include <iomanip>
25#include <limits>
26#include <sstream>
27#include <string>
28
29#include "arrow/status.h"
30#include "arrow/util/bit-util.h"
31#include "arrow/util/decimal.h"
32#include "arrow/util/int-util.h"
33#include "arrow/util/logging.h"
34#include "arrow/util/macros.h"
35
36namespace arrow {
37
38using internal::SafeLeftShift;
39using internal::SafeSignedAdd;
40
41Decimal128::Decimal128(const std::string& str) : Decimal128() {
42 Status status(Decimal128::FromString(str, this));
43 DCHECK(status.ok()) << status.message();
44}
45
46static const Decimal128 kTenTo36(static_cast<int64_t>(0xC097CE7BC90715),
47 0xB34B9F1000000000);
48static const Decimal128 kTenTo18(0xDE0B6B3A7640000);
49
50std::string Decimal128::ToIntegerString() const {
51 Decimal128 remainder;
52 std::stringstream buf;
53 bool need_fill = false;
54
55 // get anything above 10 ** 36 and print it
56 Decimal128 top;
57 DCHECK_OK(Divide(kTenTo36, &top, &remainder));
58
59 if (top != 0) {
60 buf << static_cast<int64_t>(top);
61 remainder.Abs();
62 need_fill = true;
63 }
64
65 // now get anything above 10 ** 18 and print it
66 Decimal128 tail;
67 auto s = remainder.Divide(kTenTo18, &top, &tail);
68
69 if (need_fill || top != 0) {
70 if (need_fill) {
71 buf << std::setw(18) << std::setfill('0');
72 } else {
73 need_fill = true;
74 tail.Abs();
75 }
76
77 buf << static_cast<int64_t>(top);
78 }
79
80 // finally print the tail, which is less than 10**18
81 if (need_fill) {
82 buf << std::setw(18) << std::setfill('0');
83 }
84 buf << static_cast<int64_t>(tail);
85 return buf.str();
86}
87
88Decimal128::operator int64_t() const {
89 DCHECK(high_bits() == 0 || high_bits() == -1)
90 << "Trying to cast an Decimal128 greater than the value range of a "
91 "int64_t. high_bits_ must be equal to 0 or -1, got: "
92 << high_bits();
93 return static_cast<int64_t>(low_bits());
94}
95
96static std::string ToStringNegativeScale(const std::string& str,
97 int32_t adjusted_exponent, bool is_negative) {
98 std::stringstream buf;
99
100 size_t offset = 0;
101 buf << str[offset++];
102
103 if (is_negative) {
104 buf << str[offset++];
105 }
106
107 buf << '.' << str.substr(offset, std::string::npos) << 'E' << std::showpos
108 << adjusted_exponent;
109 return buf.str();
110}
111
112std::string Decimal128::ToString(int32_t scale) const {
113 const std::string str(ToIntegerString());
114
115 if (scale == 0) {
116 return str;
117 }
118
119 const bool is_negative = *this < 0;
120
121 const auto len = static_cast<int32_t>(str.size());
122 const auto is_negative_offset = static_cast<int32_t>(is_negative);
123 const int32_t adjusted_exponent = -scale + (len - 1 - is_negative_offset);
124
125 /// Note that the -6 is taken from the Java BigDecimal documentation.
126 if (scale < 0 || adjusted_exponent < -6) {
127 return ToStringNegativeScale(str, adjusted_exponent, is_negative);
128 }
129
130 if (is_negative) {
131 if (len - 1 > scale) {
132 const auto n = static_cast<size_t>(len - scale);
133 return str.substr(0, n) + "." + str.substr(n, static_cast<size_t>(scale));
134 }
135
136 if (len - 1 == scale) {
137 return "-0." + str.substr(1, std::string::npos);
138 }
139
140 std::string result("-0." + std::string(static_cast<size_t>(scale - len + 1), '0'));
141 return result + str.substr(1, std::string::npos);
142 }
143
144 if (len > scale) {
145 const auto n = static_cast<size_t>(len - scale);
146 return str.substr(0, n) + "." + str.substr(n, static_cast<size_t>(scale));
147 }
148
149 if (len == scale) {
150 return "0." + str;
151 }
152
153 return "0." + std::string(static_cast<size_t>(scale - len), '0') + str;
154}
155
156static constexpr auto kInt64DecimalDigits =
157 static_cast<size_t>(std::numeric_limits<int64_t>::digits10);
158static constexpr int64_t kPowersOfTen[kInt64DecimalDigits + 1] = {1LL,
159 10LL,
160 100LL,
161 1000LL,
162 10000LL,
163 100000LL,
164 1000000LL,
165 10000000LL,
166 100000000LL,
167 1000000000LL,
168 10000000000LL,
169 100000000000LL,
170 1000000000000LL,
171 10000000000000LL,
172 100000000000000LL,
173 1000000000000000LL,
174 10000000000000000LL,
175 100000000000000000LL,
176 1000000000000000000LL};
177
178static void StringToInteger(const std::string& str, Decimal128* out) {
179 using std::size_t;
180
181 DCHECK_NE(out, nullptr) << "Decimal128 output variable cannot be nullptr";
182 DCHECK_EQ(*out, 0)
183 << "When converting a string to Decimal128 the initial output must be 0";
184
185 const size_t length = str.length();
186
187 DCHECK_GT(length, 0) << "length of parsed decimal string should be greater than 0";
188
189 for (size_t posn = 0; posn < length;) {
190 const size_t group = std::min(kInt64DecimalDigits, length - posn);
191 const int64_t chunk = std::stoll(str.substr(posn, group));
192 const int64_t multiple = kPowersOfTen[group];
193
194 *out *= multiple;
195 *out += chunk;
196
197 posn += group;
198 }
199}
200
201namespace {
202
203struct DecimalComponents {
204 std::string sign;
205 std::string whole_digits;
206 std::string fractional_digits;
207 std::string exponent_sign;
208 std::string exponent_digits;
209};
210
211inline bool IsSign(char c) { return c == '-' || c == '+'; }
212
213inline bool IsDot(char c) { return c == '.'; }
214
215inline bool IsDigit(char c) { return c >= '0' && c <= '9'; }
216
217inline bool StartsExponent(char c) { return c == 'e' || c == 'E'; }
218
219inline size_t ParseDigitsRun(const char* s, size_t start, size_t size, std::string* out) {
220 size_t pos;
221 for (pos = start; pos < size; ++pos) {
222 if (!IsDigit(s[pos])) {
223 break;
224 }
225 }
226 *out = std::string(s + start, pos - start);
227 return pos;
228}
229
230bool ParseDecimalComponents(const char* s, size_t size, DecimalComponents* out) {
231 size_t pos = 0;
232
233 if (size == 0) {
234 return false;
235 }
236 // Sign of the number
237 if (IsSign(s[pos])) {
238 out->sign = std::string(s + pos, 1);
239 ++pos;
240 }
241 // First run of digits
242 pos = ParseDigitsRun(s, pos, size, &out->whole_digits);
243 if (pos == size) {
244 return !out->whole_digits.empty();
245 }
246 // Optional dot (if given in fractional form)
247 bool has_dot = IsDot(s[pos]);
248 if (has_dot) {
249 // Second run of digits
250 ++pos;
251 pos = ParseDigitsRun(s, pos, size, &out->fractional_digits);
252 }
253 if (out->whole_digits.empty() && out->fractional_digits.empty()) {
254 // Need at least some digits (whole or fractional)
255 return false;
256 }
257 if (pos == size) {
258 return true;
259 }
260 // Optional exponent
261 if (StartsExponent(s[pos])) {
262 ++pos;
263 if (pos == size) {
264 return false;
265 }
266 // Optional exponent sign
267 if (IsSign(s[pos])) {
268 out->exponent_sign = std::string(s + pos, 1);
269 ++pos;
270 }
271 pos = ParseDigitsRun(s, pos, size, &out->exponent_digits);
272 if (out->exponent_digits.empty()) {
273 // Need some exponent digits
274 return false;
275 }
276 }
277 return pos == size;
278}
279
280} // namespace
281
282Status Decimal128::FromString(const util::string_view& s, Decimal128* out,
283 int32_t* precision, int32_t* scale) {
284 if (s.empty()) {
285 return Status::Invalid("Empty string cannot be converted to decimal");
286 }
287
288 DecimalComponents dec;
289 if (!ParseDecimalComponents(s.data(), s.size(), &dec)) {
290 return Status::Invalid("The string '", s, "' is not a valid decimal number");
291 }
292 std::string exponent_value = dec.exponent_sign + dec.exponent_digits;
293
294 // Count number of significant digits (without leading zeros)
295 size_t first_non_zero = dec.whole_digits.find_first_not_of('0');
296 size_t significant_digits = dec.fractional_digits.size();
297 if (first_non_zero != std::string::npos) {
298 significant_digits += dec.whole_digits.size() - first_non_zero;
299 }
300
301 if (precision != nullptr) {
302 *precision = static_cast<int32_t>(significant_digits);
303 }
304
305 if (scale != nullptr) {
306 if (!exponent_value.empty()) {
307 auto adjusted_exponent = static_cast<int32_t>(std::stol(exponent_value));
308 auto len = static_cast<int32_t>(significant_digits);
309 *scale = -adjusted_exponent + len - 1;
310 } else {
311 *scale = static_cast<int32_t>(dec.fractional_digits.size());
312 }
313 }
314
315 if (out != nullptr) {
316 *out = 0;
317 StringToInteger(dec.whole_digits + dec.fractional_digits, out);
318 if (dec.sign == "-") {
319 out->Negate();
320 }
321
322 if (scale != nullptr && *scale < 0) {
323 const int32_t abs_scale = std::abs(*scale);
324 *out *= GetScaleMultiplier(abs_scale);
325
326 if (precision != nullptr) {
327 *precision += abs_scale;
328 }
329 *scale = 0;
330 }
331 }
332
333 return Status::OK();
334}
335
336Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* precision,
337 int32_t* scale) {
338 return FromString(util::string_view(s), out, precision, scale);
339}
340
341Status Decimal128::FromString(const char* s, Decimal128* out, int32_t* precision,
342 int32_t* scale) {
343 return FromString(util::string_view(s), out, precision, scale);
344}
345
346// Helper function used by Decimal128::FromBigEndian
347static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length) {
348 // We don't bounds check the length here because this is called by
349 // FromBigEndian that has a Decimal128 as its out parameters and
350 // that function is already checking the length of the bytes and only
351 // passes lengths between zero and eight.
352 uint64_t result = 0;
353 // Using memcpy instead of special casing for length
354 // and doing the conversion in 16, 32 parts, which could
355 // possibly create unaligned memory access on certain platforms
356 memcpy(reinterpret_cast<uint8_t*>(&result) + 8 - length, bytes, length);
357 return ::arrow::BitUtil::FromBigEndian(result);
358}
359
360Status Decimal128::FromBigEndian(const uint8_t* bytes, int32_t length, Decimal128* out) {
361 static constexpr int32_t kMinDecimalBytes = 1;
362 static constexpr int32_t kMaxDecimalBytes = 16;
363
364 int64_t high, low;
365
366 if (length < kMinDecimalBytes || length > kMaxDecimalBytes) {
367 return Status::Invalid("Length of byte array passed to Decimal128::FromBigEndian ",
368 "was ", length, ", but must be between ", kMinDecimalBytes,
369 " and ", kMaxDecimalBytes);
370 }
371
372 // Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
373 // sign bit.
374 const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;
375
376 // 1. Extract the high bytes
377 // Stop byte of the high bytes
378 const int32_t high_bits_offset = std::max(0, length - 8);
379 const auto high_bits = UInt64FromBigEndian(bytes, high_bits_offset);
380
381 if (high_bits_offset == 8) {
382 // Avoid undefined shift by 64 below
383 high = high_bits;
384 } else {
385 high = -1 * (is_negative && length < kMaxDecimalBytes);
386 // Shift left enough bits to make room for the incoming int64_t
387 high = SafeLeftShift(high, high_bits_offset * CHAR_BIT);
388 // Preserve the upper bits by inplace OR-ing the int64_t
389 high |= high_bits;
390 }
391
392 // 2. Extract the low bytes
393 // Stop byte of the low bytes
394 const int32_t low_bits_offset = std::min(length, 8);
395 const auto low_bits =
396 UInt64FromBigEndian(bytes + high_bits_offset, length - high_bits_offset);
397
398 if (low_bits_offset == 8) {
399 // Avoid undefined shift by 64 below
400 low = low_bits;
401 } else {
402 // Sign extend the low bits if necessary
403 low = -1 * (is_negative && length < 8);
404 // Shift left enough bits to make room for the incoming int64_t
405 low = SafeLeftShift(low, low_bits_offset * CHAR_BIT);
406 // Preserve the upper bits by inplace OR-ing the int64_t
407 low |= low_bits;
408 }
409
410 *out = Decimal128(high, static_cast<uint64_t>(low));
411 return Status::OK();
412}
413
414Status Decimal128::ToArrowStatus(DecimalStatus dstatus) const {
415 Status status;
416
417 switch (dstatus) {
418 case DecimalStatus::kSuccess:
419 status = Status::OK();
420 break;
421
422 case DecimalStatus::kDivideByZero:
423 status = Status::Invalid("Division by 0 in Decimal128");
424 break;
425
426 case DecimalStatus::kOverflow:
427 status = Status::Invalid("Overflow occurred during Decimal128 operation.");
428 break;
429
430 case DecimalStatus::kRescaleDataLoss:
431 status = Status::Invalid("Rescaling decimal value would cause data loss");
432 break;
433 }
434 return status;
435}
436
437} // namespace arrow
438