1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include "arrow/util/int-util.h"
19
20#include <algorithm>
21#include <cstring>
22#include <limits>
23
24#include "arrow/util/bit-util.h"
25#include "arrow/util/logging.h"
26
27namespace arrow {
28namespace internal {
29
30static constexpr uint64_t max_uint8 =
31 static_cast<uint64_t>(std::numeric_limits<uint8_t>::max());
32static constexpr uint64_t max_uint16 =
33 static_cast<uint64_t>(std::numeric_limits<uint16_t>::max());
34static constexpr uint64_t max_uint32 =
35 static_cast<uint64_t>(std::numeric_limits<uint32_t>::max());
36static constexpr uint64_t max_uint64 = std::numeric_limits<uint64_t>::max();
37
38static constexpr uint64_t mask_uint8 = ~0xffULL;
39static constexpr uint64_t mask_uint16 = ~0xffffULL;
40static constexpr uint64_t mask_uint32 = ~0xffffffffULL;
41
42//
43// Unsigned integer width detection
44//
45
46static const uint64_t max_uints[] = {0, max_uint8, max_uint16, 0, max_uint32,
47 0, 0, 0, max_uint64};
48
49// Check if we would need to expand the underlying storage type
50inline uint8_t ExpandedUIntWidth(uint64_t val, uint8_t current_width) {
51 // Optimize for the common case where width doesn't change
52 if (ARROW_PREDICT_TRUE(val <= max_uints[current_width])) {
53 return current_width;
54 }
55 if (current_width == 1 && val <= max_uint8) {
56 return 1;
57 } else if (current_width <= 2 && val <= max_uint16) {
58 return 2;
59 } else if (current_width <= 4 && val <= max_uint32) {
60 return 4;
61 } else {
62 return 8;
63 }
64}
65
66uint8_t DetectUIntWidth(const uint64_t* values, int64_t length, uint8_t min_width) {
67 uint8_t width = min_width;
68 if (min_width < 8) {
69 auto p = values;
70 const auto end = p + length;
71 while (p <= end - 16) {
72 // This is probably SIMD-izable
73 auto u = p[0];
74 auto v = p[1];
75 auto w = p[2];
76 auto x = p[3];
77 u |= p[4];
78 v |= p[5];
79 w |= p[6];
80 x |= p[7];
81 u |= p[8];
82 v |= p[9];
83 w |= p[10];
84 x |= p[11];
85 u |= p[12];
86 v |= p[13];
87 w |= p[14];
88 x |= p[15];
89 p += 16;
90 width = ExpandedUIntWidth(u | v | w | x, width);
91 if (ARROW_PREDICT_FALSE(width == 8)) {
92 break;
93 }
94 }
95 if (p <= end - 8) {
96 auto u = p[0];
97 auto v = p[1];
98 auto w = p[2];
99 auto x = p[3];
100 u |= p[4];
101 v |= p[5];
102 w |= p[6];
103 x |= p[7];
104 p += 8;
105 width = ExpandedUIntWidth(u | v | w | x, width);
106 }
107 while (p < end) {
108 width = ExpandedUIntWidth(*p++, width);
109 }
110 }
111 return width;
112}
113
114uint8_t DetectUIntWidth(const uint64_t* values, const uint8_t* valid_bytes,
115 int64_t length, uint8_t min_width) {
116 if (valid_bytes == nullptr) {
117 return DetectUIntWidth(values, length, min_width);
118 }
119 uint8_t width = min_width;
120 if (min_width < 8) {
121 auto p = values;
122 const auto end = p + length;
123 auto b = valid_bytes;
124
125#define MASK(p, b, i) p[i] * (b[i] != 0)
126
127 while (p <= end - 8) {
128 // This is probably be SIMD-izable
129 auto u = MASK(p, b, 0);
130 auto v = MASK(p, b, 1);
131 auto w = MASK(p, b, 2);
132 auto x = MASK(p, b, 3);
133 u |= MASK(p, b, 4);
134 v |= MASK(p, b, 5);
135 w |= MASK(p, b, 6);
136 x |= MASK(p, b, 7);
137 b += 8;
138 p += 8;
139 width = ExpandedUIntWidth(u | v | w | x, width);
140 if (ARROW_PREDICT_FALSE(width == 8)) {
141 break;
142 }
143 }
144 uint64_t mask = 0;
145 while (p < end) {
146 mask |= MASK(p, b, 0);
147 ++b;
148 ++p;
149 }
150 width = ExpandedUIntWidth(mask, width);
151
152#undef MASK
153 }
154 return width;
155}
156
157//
158// Signed integer width detection
159//
160
161uint8_t DetectIntWidth(const int64_t* values, int64_t length, uint8_t min_width) {
162 if (min_width == 8) {
163 return min_width;
164 }
165 uint8_t width = min_width;
166
167 auto p = values;
168 const auto end = p + length;
169 // Strategy: to determine whether `x` is between -0x80 and 0x7f,
170 // we determine whether `x + 0x80` is between 0x00 and 0xff. The
171 // latter can be done with a simple AND mask with ~0xff and, more
172 // importantly, can be computed in a single step over multiple ORed
173 // values (so we can branch once every N items instead of once every item).
174 // This strategy could probably lend itself to explicit SIMD-ization,
175 // if more performance is needed.
176 constexpr uint64_t addend8 = 0x80ULL;
177 constexpr uint64_t addend16 = 0x8000ULL;
178 constexpr uint64_t addend32 = 0x80000000ULL;
179
180 auto test_one_item = [&](uint64_t addend, uint64_t test_mask) -> bool {
181 auto v = *p++;
182 if (ARROW_PREDICT_FALSE(((v + addend) & test_mask) != 0)) {
183 --p;
184 return false;
185 } else {
186 return true;
187 }
188 };
189
190 auto test_four_items = [&](uint64_t addend, uint64_t test_mask) -> bool {
191 auto mask = (p[0] + addend) | (p[1] + addend) | (p[2] + addend) | (p[3] + addend);
192 p += 4;
193 if (ARROW_PREDICT_FALSE((mask & test_mask) != 0)) {
194 p -= 4;
195 return false;
196 } else {
197 return true;
198 }
199 };
200
201 if (width == 1) {
202 while (p <= end - 4) {
203 if (!test_four_items(addend8, mask_uint8)) {
204 width = 2;
205 goto width2;
206 }
207 }
208 while (p < end) {
209 if (!test_one_item(addend8, mask_uint8)) {
210 width = 2;
211 goto width2;
212 }
213 }
214 return 1;
215 }
216width2:
217 if (width == 2) {
218 while (p <= end - 4) {
219 if (!test_four_items(addend16, mask_uint16)) {
220 width = 4;
221 goto width4;
222 }
223 }
224 while (p < end) {
225 if (!test_one_item(addend16, mask_uint16)) {
226 width = 4;
227 goto width4;
228 }
229 }
230 return 2;
231 }
232width4:
233 if (width == 4) {
234 while (p <= end - 4) {
235 if (!test_four_items(addend32, mask_uint32)) {
236 width = 8;
237 goto width8;
238 }
239 }
240 while (p < end) {
241 if (!test_one_item(addend32, mask_uint32)) {
242 width = 8;
243 goto width8;
244 }
245 }
246 return 4;
247 }
248width8:
249 return 8;
250}
251
252uint8_t DetectIntWidth(const int64_t* values, const uint8_t* valid_bytes, int64_t length,
253 uint8_t min_width) {
254 if (valid_bytes == nullptr) {
255 return DetectIntWidth(values, length, min_width);
256 }
257
258 if (min_width == 8) {
259 return min_width;
260 }
261 uint8_t width = min_width;
262
263 auto p = values;
264 const auto end = p + length;
265 auto b = valid_bytes;
266 // Strategy is similar to the no-nulls case above, but we also
267 // have to zero any incoming items that have a zero validity byte.
268 constexpr uint64_t addend8 = 0x80ULL;
269 constexpr uint64_t addend16 = 0x8000ULL;
270 constexpr uint64_t addend32 = 0x80000000ULL;
271
272#define MASK(p, b, addend, i) (p[i] + addend) * (b[i] != 0)
273
274 auto test_one_item = [&](uint64_t addend, uint64_t test_mask) -> bool {
275 auto v = MASK(p, b, addend, 0);
276 ++b;
277 ++p;
278 if (ARROW_PREDICT_FALSE((v & test_mask) != 0)) {
279 --b;
280 --p;
281 return false;
282 } else {
283 return true;
284 }
285 };
286
287 auto test_eight_items = [&](uint64_t addend, uint64_t test_mask) -> bool {
288 auto mask1 = MASK(p, b, addend, 0) | MASK(p, b, addend, 1) | MASK(p, b, addend, 2) |
289 MASK(p, b, addend, 3);
290 auto mask2 = MASK(p, b, addend, 4) | MASK(p, b, addend, 5) | MASK(p, b, addend, 6) |
291 MASK(p, b, addend, 7);
292 b += 8;
293 p += 8;
294 if (ARROW_PREDICT_FALSE(((mask1 | mask2) & test_mask) != 0)) {
295 b -= 8;
296 p -= 8;
297 return false;
298 } else {
299 return true;
300 }
301 };
302
303#undef MASK
304
305 if (width == 1) {
306 while (p <= end - 8) {
307 if (!test_eight_items(addend8, mask_uint8)) {
308 width = 2;
309 goto width2;
310 }
311 }
312 while (p < end) {
313 if (!test_one_item(addend8, mask_uint8)) {
314 width = 2;
315 goto width2;
316 }
317 }
318 return 1;
319 }
320width2:
321 if (width == 2) {
322 while (p <= end - 8) {
323 if (!test_eight_items(addend16, mask_uint16)) {
324 width = 4;
325 goto width4;
326 }
327 }
328 while (p < end) {
329 if (!test_one_item(addend16, mask_uint16)) {
330 width = 4;
331 goto width4;
332 }
333 }
334 return 2;
335 }
336width4:
337 if (width == 4) {
338 while (p <= end - 8) {
339 if (!test_eight_items(addend32, mask_uint32)) {
340 width = 8;
341 goto width8;
342 }
343 }
344 while (p < end) {
345 if (!test_one_item(addend32, mask_uint32)) {
346 width = 8;
347 goto width8;
348 }
349 }
350 return 4;
351 }
352width8:
353 return 8;
354}
355
356template <typename Source, typename Dest>
357inline void DowncastIntsInternal(const Source* src, Dest* dest, int64_t length) {
358 while (length >= 4) {
359 dest[0] = static_cast<Dest>(src[0]);
360 dest[1] = static_cast<Dest>(src[1]);
361 dest[2] = static_cast<Dest>(src[2]);
362 dest[3] = static_cast<Dest>(src[3]);
363 length -= 4;
364 src += 4;
365 dest += 4;
366 }
367 while (length > 0) {
368 *dest++ = static_cast<Dest>(*src++);
369 --length;
370 }
371}
372
373void DowncastInts(const int64_t* source, int8_t* dest, int64_t length) {
374 DowncastIntsInternal(source, dest, length);
375}
376
377void DowncastInts(const int64_t* source, int16_t* dest, int64_t length) {
378 DowncastIntsInternal(source, dest, length);
379}
380
381void DowncastInts(const int64_t* source, int32_t* dest, int64_t length) {
382 DowncastIntsInternal(source, dest, length);
383}
384
385void DowncastInts(const int64_t* source, int64_t* dest, int64_t length) {
386 memcpy(dest, source, length * sizeof(int64_t));
387}
388
389void DowncastUInts(const uint64_t* source, uint8_t* dest, int64_t length) {
390 DowncastIntsInternal(source, dest, length);
391}
392
393void DowncastUInts(const uint64_t* source, uint16_t* dest, int64_t length) {
394 DowncastIntsInternal(source, dest, length);
395}
396
397void DowncastUInts(const uint64_t* source, uint32_t* dest, int64_t length) {
398 DowncastIntsInternal(source, dest, length);
399}
400
401void DowncastUInts(const uint64_t* source, uint64_t* dest, int64_t length) {
402 memcpy(dest, source, length * sizeof(int64_t));
403}
404
405template <typename InputInt, typename OutputInt>
406void TransposeInts(const InputInt* src, OutputInt* dest, int64_t length,
407 const int32_t* transpose_map) {
408 while (length >= 4) {
409 dest[0] = static_cast<OutputInt>(transpose_map[src[0]]);
410 dest[1] = static_cast<OutputInt>(transpose_map[src[1]]);
411 dest[2] = static_cast<OutputInt>(transpose_map[src[2]]);
412 dest[3] = static_cast<OutputInt>(transpose_map[src[3]]);
413 length -= 4;
414 src += 4;
415 dest += 4;
416 }
417 while (length > 0) {
418 *dest++ = static_cast<OutputInt>(transpose_map[*src++]);
419 --length;
420 }
421}
422
423#define INSTANTIATE(SRC, DEST) \
424 template ARROW_EXPORT void TransposeInts( \
425 const SRC* source, DEST* dest, int64_t length, const int32_t* transpose_map);
426
427#define INSTANTIATE_ALL_DEST(DEST) \
428 INSTANTIATE(int8_t, DEST) \
429 INSTANTIATE(int16_t, DEST) \
430 INSTANTIATE(int32_t, DEST) \
431 INSTANTIATE(int64_t, DEST)
432
433#define INSTANTIATE_ALL() \
434 INSTANTIATE_ALL_DEST(int8_t) \
435 INSTANTIATE_ALL_DEST(int16_t) \
436 INSTANTIATE_ALL_DEST(int32_t) \
437 INSTANTIATE_ALL_DEST(int64_t)
438
439INSTANTIATE_ALL()
440
441#undef INSTANTIATE
442#undef INSTANTIATE_ALL
443#undef INSTANTIATE_ALL_DEST
444
445} // namespace internal
446} // namespace arrow
447