1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include <algorithm>
19#include <cstdint>
20#include <random>
21#include <utility>
22#include <vector>
23
24#include <gtest/gtest.h>
25
26#include "arrow/util/int-util.h"
27
28namespace arrow {
29namespace internal {
30
31static std::vector<uint8_t> all_widths = {1, 2, 4, 8};
32
33template <typename T>
34void CheckUIntWidth(const std::vector<T>& values, uint8_t expected_width) {
35 for (const uint8_t min_width : all_widths) {
36 uint8_t width =
37 DetectUIntWidth(values.data(), static_cast<int64_t>(values.size()), min_width);
38 ASSERT_EQ(width, std::max(min_width, expected_width));
39 width = DetectUIntWidth(values.data(), nullptr, static_cast<int64_t>(values.size()),
40 min_width);
41 ASSERT_EQ(width, std::max(min_width, expected_width));
42 }
43}
44
45template <typename T>
46void CheckUIntWidth(const std::vector<T>& values, const std::vector<uint8_t>& valid_bytes,
47 uint8_t expected_width) {
48 for (const uint8_t min_width : all_widths) {
49 uint8_t width = DetectUIntWidth(values.data(), valid_bytes.data(),
50 static_cast<int64_t>(values.size()), min_width);
51 ASSERT_EQ(width, std::max(min_width, expected_width));
52 }
53}
54
55template <typename T>
56void CheckIntWidth(const std::vector<T>& values, uint8_t expected_width) {
57 for (const uint8_t min_width : all_widths) {
58 uint8_t width =
59 DetectIntWidth(values.data(), static_cast<int64_t>(values.size()), min_width);
60 ASSERT_EQ(width, std::max(min_width, expected_width));
61 width = DetectIntWidth(values.data(), nullptr, static_cast<int64_t>(values.size()),
62 min_width);
63 ASSERT_EQ(width, std::max(min_width, expected_width));
64 }
65}
66
67template <typename T>
68void CheckIntWidth(const std::vector<T>& values, const std::vector<uint8_t>& valid_bytes,
69 uint8_t expected_width) {
70 for (const uint8_t min_width : all_widths) {
71 uint8_t width = DetectIntWidth(values.data(), valid_bytes.data(),
72 static_cast<int64_t>(values.size()), min_width);
73 ASSERT_EQ(width, std::max(min_width, expected_width));
74 }
75}
76
77template <typename T>
78std::vector<T> MakeRandomVector(const std::vector<T>& base_values, int n_values) {
79 std::default_random_engine gen(42);
80 std::uniform_int_distribution<int> index_dist(0,
81 static_cast<int>(base_values.size() - 1));
82
83 std::vector<T> values(n_values);
84 for (int i = 0; i < n_values; ++i) {
85 values[i] = base_values[index_dist(gen)];
86 }
87 return values;
88}
89
90template <typename T>
91std::vector<std::pair<std::vector<T>, std::vector<uint8_t>>> AlmostAllNullValues(
92 int n_values, T null_value, T non_null_value) {
93 std::vector<std::pair<std::vector<T>, std::vector<uint8_t>>> vectors;
94 vectors.reserve(n_values);
95 for (int i = 0; i < n_values; ++i) {
96 std::vector<T> values(n_values, null_value);
97 std::vector<uint8_t> valid_bytes(n_values, 0);
98 values[i] = non_null_value;
99 valid_bytes[i] = 1;
100 vectors.push_back({std::move(values), std::move(valid_bytes)});
101 }
102 return vectors;
103}
104
105template <typename T>
106std::vector<std::vector<T>> AlmostAllZeros(int n_values, T nonzero_value) {
107 std::vector<std::vector<T>> vectors;
108 vectors.reserve(n_values);
109 for (int i = 0; i < n_values; ++i) {
110 std::vector<T> values(n_values, 0);
111 values[i] = nonzero_value;
112 vectors.push_back(std::move(values));
113 }
114 return vectors;
115}
116
117std::vector<uint64_t> valid_uint8 = {0, 0x7f, 0xff};
118std::vector<uint64_t> valid_uint16 = {0, 0x7f, 0xff, 0x1000, 0xffff};
119std::vector<uint64_t> valid_uint32 = {0, 0x7f, 0xff, 0x10000, 0xffffffffULL};
120std::vector<uint64_t> valid_uint64 = {0, 0x100000000ULL, 0xffffffffffffffffULL};
121
122TEST(UIntWidth, NoNulls) {
123 std::vector<uint64_t> values{0, 0x7f, 0xff};
124 CheckUIntWidth(values, 1);
125
126 values = {0, 0x100};
127 CheckUIntWidth(values, 2);
128
129 values = {0, 0xffff};
130 CheckUIntWidth(values, 2);
131
132 values = {0, 0x10000};
133 CheckUIntWidth(values, 4);
134
135 values = {0, 0xffffffffULL};
136 CheckUIntWidth(values, 4);
137
138 values = {0, 0x100000000ULL};
139 CheckUIntWidth(values, 8);
140
141 values = {0, 0xffffffffffffffffULL};
142 CheckUIntWidth(values, 8);
143}
144
145TEST(UIntWidth, Nulls) {
146 std::vector<uint8_t> valid10{true, false};
147 std::vector<uint8_t> valid01{false, true};
148
149 std::vector<uint64_t> values{0, 0xff};
150 CheckUIntWidth(values, valid01, 1);
151 CheckUIntWidth(values, valid10, 1);
152
153 values = {0, 0x100};
154 CheckUIntWidth(values, valid01, 2);
155 CheckUIntWidth(values, valid10, 1);
156
157 values = {0, 0xffff};
158 CheckUIntWidth(values, valid01, 2);
159 CheckUIntWidth(values, valid10, 1);
160
161 values = {0, 0x10000};
162 CheckUIntWidth(values, valid01, 4);
163 CheckUIntWidth(values, valid10, 1);
164
165 values = {0, 0xffffffffULL};
166 CheckUIntWidth(values, valid01, 4);
167 CheckUIntWidth(values, valid10, 1);
168
169 values = {0, 0x100000000ULL};
170 CheckUIntWidth(values, valid01, 8);
171 CheckUIntWidth(values, valid10, 1);
172
173 values = {0, 0xffffffffffffffffULL};
174 CheckUIntWidth(values, valid01, 8);
175 CheckUIntWidth(values, valid10, 1);
176}
177
178TEST(UIntWidth, NoNullsMany) {
179 constexpr int N = 40;
180 for (const auto& values : AlmostAllZeros<uint64_t>(N, 0xff)) {
181 CheckUIntWidth(values, 1);
182 }
183 for (const auto& values : AlmostAllZeros<uint64_t>(N, 0xffff)) {
184 CheckUIntWidth(values, 2);
185 }
186 for (const auto& values : AlmostAllZeros<uint64_t>(N, 0xffffffffULL)) {
187 CheckUIntWidth(values, 4);
188 }
189 for (const auto& values : AlmostAllZeros<uint64_t>(N, 0xffffffffffffffffULL)) {
190 CheckUIntWidth(values, 8);
191 }
192 auto values = MakeRandomVector(valid_uint8, N);
193 CheckUIntWidth(values, 1);
194
195 values = MakeRandomVector(valid_uint16, N);
196 CheckUIntWidth(values, 2);
197
198 values = MakeRandomVector(valid_uint32, N);
199 CheckUIntWidth(values, 4);
200
201 values = MakeRandomVector(valid_uint64, N);
202 CheckUIntWidth(values, 8);
203}
204
205TEST(UIntWidth, NullsMany) {
206 constexpr uint64_t huge = 0x123456789abcdefULL;
207 constexpr int N = 40;
208 for (const auto& p : AlmostAllNullValues<uint64_t>(N, 0, 0xff)) {
209 CheckUIntWidth(p.first, p.second, 1);
210 }
211 for (const auto& p : AlmostAllNullValues<uint64_t>(N, huge, 0xff)) {
212 CheckUIntWidth(p.first, p.second, 1);
213 }
214 for (const auto& p : AlmostAllNullValues<uint64_t>(N, 0, 0xffff)) {
215 CheckUIntWidth(p.first, p.second, 2);
216 }
217 for (const auto& p : AlmostAllNullValues<uint64_t>(N, huge, 0xffff)) {
218 CheckUIntWidth(p.first, p.second, 2);
219 }
220 for (const auto& p : AlmostAllNullValues<uint64_t>(N, 0, 0xffffffffULL)) {
221 CheckUIntWidth(p.first, p.second, 4);
222 }
223 for (const auto& p : AlmostAllNullValues<uint64_t>(N, huge, 0xffffffffULL)) {
224 CheckUIntWidth(p.first, p.second, 4);
225 }
226 for (const auto& p : AlmostAllNullValues<uint64_t>(N, 0, 0xffffffffffffffffULL)) {
227 CheckUIntWidth(p.first, p.second, 8);
228 }
229 for (const auto& p : AlmostAllNullValues<uint64_t>(N, huge, 0xffffffffffffffffULL)) {
230 CheckUIntWidth(p.first, p.second, 8);
231 }
232}
233
234TEST(IntWidth, NoNulls) {
235 std::vector<int64_t> values{0, 0x7f, -0x80};
236 CheckIntWidth(values, 1);
237
238 values = {0, 0x80};
239 CheckIntWidth(values, 2);
240
241 values = {0, -0x81};
242 CheckIntWidth(values, 2);
243
244 values = {0, 0x7fff, -0x8000};
245 CheckIntWidth(values, 2);
246
247 values = {0, 0x8000};
248 CheckIntWidth(values, 4);
249
250 values = {0, -0x8001};
251 CheckIntWidth(values, 4);
252
253 values = {0, 0x7fffffffLL, -0x80000000LL};
254 CheckIntWidth(values, 4);
255
256 values = {0, 0x80000000LL};
257 CheckIntWidth(values, 8);
258
259 values = {0, -0x80000001LL};
260 CheckIntWidth(values, 8);
261
262 values = {0, 0x7fffffffffffffffLL, -0x7fffffffffffffffLL - 1};
263 CheckIntWidth(values, 8);
264}
265
266TEST(IntWidth, Nulls) {
267 std::vector<uint8_t> valid100{true, false, false};
268 std::vector<uint8_t> valid010{false, true, false};
269 std::vector<uint8_t> valid001{false, false, true};
270
271 std::vector<int64_t> values{0, 0x7f, -0x80};
272 CheckIntWidth(values, valid100, 1);
273 CheckIntWidth(values, valid010, 1);
274 CheckIntWidth(values, valid001, 1);
275
276 values = {0, 0x80, -0x81};
277 CheckIntWidth(values, valid100, 1);
278 CheckIntWidth(values, valid010, 2);
279 CheckIntWidth(values, valid001, 2);
280
281 values = {0, 0x7fff, -0x8000};
282 CheckIntWidth(values, valid100, 1);
283 CheckIntWidth(values, valid010, 2);
284 CheckIntWidth(values, valid001, 2);
285
286 values = {0, 0x8000, -0x8001};
287 CheckIntWidth(values, valid100, 1);
288 CheckIntWidth(values, valid010, 4);
289 CheckIntWidth(values, valid001, 4);
290
291 values = {0, 0x7fffffffLL, -0x80000000LL};
292 CheckIntWidth(values, valid100, 1);
293 CheckIntWidth(values, valid010, 4);
294 CheckIntWidth(values, valid001, 4);
295
296 values = {0, 0x80000000LL, -0x80000001LL};
297 CheckIntWidth(values, valid100, 1);
298 CheckIntWidth(values, valid010, 8);
299 CheckIntWidth(values, valid001, 8);
300
301 values = {0, 0x7fffffffffffffffLL, -0x7fffffffffffffffLL - 1};
302 CheckIntWidth(values, valid100, 1);
303 CheckIntWidth(values, valid010, 8);
304 CheckIntWidth(values, valid001, 8);
305}
306
307TEST(IntWidth, NoNullsMany) {
308 constexpr int N = 40;
309 // 1 byte wide
310 for (const int64_t value : {0x7f, -0x80}) {
311 for (const auto& values : AlmostAllZeros<int64_t>(N, value)) {
312 CheckIntWidth(values, 1);
313 }
314 }
315 // 2 bytes wide
316 for (const int64_t value : {0x80, -0x81, 0x7fff, -0x8000}) {
317 for (const auto& values : AlmostAllZeros<int64_t>(N, value)) {
318 CheckIntWidth(values, 2);
319 }
320 }
321 // 4 bytes wide
322 for (const int64_t value : {0x8000LL, -0x8001LL, 0x7fffffffLL, -0x80000000LL}) {
323 for (const auto& values : AlmostAllZeros<int64_t>(N, value)) {
324 CheckIntWidth(values, 4);
325 }
326 }
327 // 8 bytes wide
328 for (const int64_t value : {0x80000000LL, -0x80000001LL, 0x7fffffffffffffffLL}) {
329 for (const auto& values : AlmostAllZeros<int64_t>(N, value)) {
330 CheckIntWidth(values, 8);
331 }
332 }
333}
334
335TEST(IntWidth, NullsMany) {
336 constexpr int64_t huge = 0x123456789abcdefLL;
337 constexpr int N = 40;
338 // 1 byte wide
339 for (const int64_t value : {0x7f, -0x80}) {
340 for (const auto& p : AlmostAllNullValues<int64_t>(N, 0, value)) {
341 CheckIntWidth(p.first, p.second, 1);
342 }
343 for (const auto& p : AlmostAllNullValues<int64_t>(N, huge, value)) {
344 CheckIntWidth(p.first, p.second, 1);
345 }
346 }
347 // 2 bytes wide
348 for (const int64_t value : {0x80, -0x81, 0x7fff, -0x8000}) {
349 for (const auto& p : AlmostAllNullValues<int64_t>(N, 0, value)) {
350 CheckIntWidth(p.first, p.second, 2);
351 }
352 for (const auto& p : AlmostAllNullValues<int64_t>(N, huge, value)) {
353 CheckIntWidth(p.first, p.second, 2);
354 }
355 }
356 // 4 bytes wide
357 for (const int64_t value : {0x8000LL, -0x8001LL, 0x7fffffffLL, -0x80000000LL}) {
358 for (const auto& p : AlmostAllNullValues<int64_t>(N, 0, value)) {
359 CheckIntWidth(p.first, p.second, 4);
360 }
361 for (const auto& p : AlmostAllNullValues<int64_t>(N, huge, value)) {
362 CheckIntWidth(p.first, p.second, 4);
363 }
364 }
365 // 8 bytes wide
366 for (const int64_t value : {0x80000000LL, -0x80000001LL, 0x7fffffffffffffffLL}) {
367 for (const auto& p : AlmostAllNullValues<int64_t>(N, 0, value)) {
368 CheckIntWidth(p.first, p.second, 8);
369 }
370 for (const auto& p : AlmostAllNullValues<int64_t>(N, huge, value)) {
371 CheckIntWidth(p.first, p.second, 8);
372 }
373 }
374}
375
376TEST(TransposeInts, Int8ToInt64) {
377 std::vector<int8_t> src = {1, 3, 5, 0, 3, 2};
378 std::vector<int32_t> transpose_map = {1111, 2222, 3333, 4444, 5555, 6666, 7777};
379 std::vector<int64_t> dest(src.size());
380
381 TransposeInts(src.data(), dest.data(), 6, transpose_map.data());
382 ASSERT_EQ(dest, std::vector<int64_t>({2222, 4444, 6666, 1111, 4444, 3333}));
383}
384
385} // namespace internal
386} // namespace arrow
387