1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #include <algorithm> |
19 | #include <cstdint> |
20 | #include <random> |
21 | #include <utility> |
22 | #include <vector> |
23 | |
24 | #include <gtest/gtest.h> |
25 | |
26 | #include "arrow/util/int-util.h" |
27 | |
28 | namespace arrow { |
29 | namespace internal { |
30 | |
31 | static std::vector<uint8_t> all_widths = {1, 2, 4, 8}; |
32 | |
33 | template <typename T> |
34 | void CheckUIntWidth(const std::vector<T>& values, uint8_t expected_width) { |
35 | for (const uint8_t min_width : all_widths) { |
36 | uint8_t width = |
37 | DetectUIntWidth(values.data(), static_cast<int64_t>(values.size()), min_width); |
38 | ASSERT_EQ(width, std::max(min_width, expected_width)); |
39 | width = DetectUIntWidth(values.data(), nullptr, static_cast<int64_t>(values.size()), |
40 | min_width); |
41 | ASSERT_EQ(width, std::max(min_width, expected_width)); |
42 | } |
43 | } |
44 | |
45 | template <typename T> |
46 | void CheckUIntWidth(const std::vector<T>& values, const std::vector<uint8_t>& valid_bytes, |
47 | uint8_t expected_width) { |
48 | for (const uint8_t min_width : all_widths) { |
49 | uint8_t width = DetectUIntWidth(values.data(), valid_bytes.data(), |
50 | static_cast<int64_t>(values.size()), min_width); |
51 | ASSERT_EQ(width, std::max(min_width, expected_width)); |
52 | } |
53 | } |
54 | |
55 | template <typename T> |
56 | void CheckIntWidth(const std::vector<T>& values, uint8_t expected_width) { |
57 | for (const uint8_t min_width : all_widths) { |
58 | uint8_t width = |
59 | DetectIntWidth(values.data(), static_cast<int64_t>(values.size()), min_width); |
60 | ASSERT_EQ(width, std::max(min_width, expected_width)); |
61 | width = DetectIntWidth(values.data(), nullptr, static_cast<int64_t>(values.size()), |
62 | min_width); |
63 | ASSERT_EQ(width, std::max(min_width, expected_width)); |
64 | } |
65 | } |
66 | |
67 | template <typename T> |
68 | void CheckIntWidth(const std::vector<T>& values, const std::vector<uint8_t>& valid_bytes, |
69 | uint8_t expected_width) { |
70 | for (const uint8_t min_width : all_widths) { |
71 | uint8_t width = DetectIntWidth(values.data(), valid_bytes.data(), |
72 | static_cast<int64_t>(values.size()), min_width); |
73 | ASSERT_EQ(width, std::max(min_width, expected_width)); |
74 | } |
75 | } |
76 | |
77 | template <typename T> |
78 | std::vector<T> MakeRandomVector(const std::vector<T>& base_values, int n_values) { |
79 | std::default_random_engine gen(42); |
80 | std::uniform_int_distribution<int> index_dist(0, |
81 | static_cast<int>(base_values.size() - 1)); |
82 | |
83 | std::vector<T> values(n_values); |
84 | for (int i = 0; i < n_values; ++i) { |
85 | values[i] = base_values[index_dist(gen)]; |
86 | } |
87 | return values; |
88 | } |
89 | |
90 | template <typename T> |
91 | std::vector<std::pair<std::vector<T>, std::vector<uint8_t>>> AlmostAllNullValues( |
92 | int n_values, T null_value, T non_null_value) { |
93 | std::vector<std::pair<std::vector<T>, std::vector<uint8_t>>> vectors; |
94 | vectors.reserve(n_values); |
95 | for (int i = 0; i < n_values; ++i) { |
96 | std::vector<T> values(n_values, null_value); |
97 | std::vector<uint8_t> valid_bytes(n_values, 0); |
98 | values[i] = non_null_value; |
99 | valid_bytes[i] = 1; |
100 | vectors.push_back({std::move(values), std::move(valid_bytes)}); |
101 | } |
102 | return vectors; |
103 | } |
104 | |
105 | template <typename T> |
106 | std::vector<std::vector<T>> AlmostAllZeros(int n_values, T nonzero_value) { |
107 | std::vector<std::vector<T>> vectors; |
108 | vectors.reserve(n_values); |
109 | for (int i = 0; i < n_values; ++i) { |
110 | std::vector<T> values(n_values, 0); |
111 | values[i] = nonzero_value; |
112 | vectors.push_back(std::move(values)); |
113 | } |
114 | return vectors; |
115 | } |
116 | |
117 | std::vector<uint64_t> valid_uint8 = {0, 0x7f, 0xff}; |
118 | std::vector<uint64_t> valid_uint16 = {0, 0x7f, 0xff, 0x1000, 0xffff}; |
119 | std::vector<uint64_t> valid_uint32 = {0, 0x7f, 0xff, 0x10000, 0xffffffffULL}; |
120 | std::vector<uint64_t> valid_uint64 = {0, 0x100000000ULL, 0xffffffffffffffffULL}; |
121 | |
122 | TEST(UIntWidth, NoNulls) { |
123 | std::vector<uint64_t> values{0, 0x7f, 0xff}; |
124 | CheckUIntWidth(values, 1); |
125 | |
126 | values = {0, 0x100}; |
127 | CheckUIntWidth(values, 2); |
128 | |
129 | values = {0, 0xffff}; |
130 | CheckUIntWidth(values, 2); |
131 | |
132 | values = {0, 0x10000}; |
133 | CheckUIntWidth(values, 4); |
134 | |
135 | values = {0, 0xffffffffULL}; |
136 | CheckUIntWidth(values, 4); |
137 | |
138 | values = {0, 0x100000000ULL}; |
139 | CheckUIntWidth(values, 8); |
140 | |
141 | values = {0, 0xffffffffffffffffULL}; |
142 | CheckUIntWidth(values, 8); |
143 | } |
144 | |
145 | TEST(UIntWidth, Nulls) { |
146 | std::vector<uint8_t> valid10{true, false}; |
147 | std::vector<uint8_t> valid01{false, true}; |
148 | |
149 | std::vector<uint64_t> values{0, 0xff}; |
150 | CheckUIntWidth(values, valid01, 1); |
151 | CheckUIntWidth(values, valid10, 1); |
152 | |
153 | values = {0, 0x100}; |
154 | CheckUIntWidth(values, valid01, 2); |
155 | CheckUIntWidth(values, valid10, 1); |
156 | |
157 | values = {0, 0xffff}; |
158 | CheckUIntWidth(values, valid01, 2); |
159 | CheckUIntWidth(values, valid10, 1); |
160 | |
161 | values = {0, 0x10000}; |
162 | CheckUIntWidth(values, valid01, 4); |
163 | CheckUIntWidth(values, valid10, 1); |
164 | |
165 | values = {0, 0xffffffffULL}; |
166 | CheckUIntWidth(values, valid01, 4); |
167 | CheckUIntWidth(values, valid10, 1); |
168 | |
169 | values = {0, 0x100000000ULL}; |
170 | CheckUIntWidth(values, valid01, 8); |
171 | CheckUIntWidth(values, valid10, 1); |
172 | |
173 | values = {0, 0xffffffffffffffffULL}; |
174 | CheckUIntWidth(values, valid01, 8); |
175 | CheckUIntWidth(values, valid10, 1); |
176 | } |
177 | |
178 | TEST(UIntWidth, NoNullsMany) { |
179 | constexpr int N = 40; |
180 | for (const auto& values : AlmostAllZeros<uint64_t>(N, 0xff)) { |
181 | CheckUIntWidth(values, 1); |
182 | } |
183 | for (const auto& values : AlmostAllZeros<uint64_t>(N, 0xffff)) { |
184 | CheckUIntWidth(values, 2); |
185 | } |
186 | for (const auto& values : AlmostAllZeros<uint64_t>(N, 0xffffffffULL)) { |
187 | CheckUIntWidth(values, 4); |
188 | } |
189 | for (const auto& values : AlmostAllZeros<uint64_t>(N, 0xffffffffffffffffULL)) { |
190 | CheckUIntWidth(values, 8); |
191 | } |
192 | auto values = MakeRandomVector(valid_uint8, N); |
193 | CheckUIntWidth(values, 1); |
194 | |
195 | values = MakeRandomVector(valid_uint16, N); |
196 | CheckUIntWidth(values, 2); |
197 | |
198 | values = MakeRandomVector(valid_uint32, N); |
199 | CheckUIntWidth(values, 4); |
200 | |
201 | values = MakeRandomVector(valid_uint64, N); |
202 | CheckUIntWidth(values, 8); |
203 | } |
204 | |
205 | TEST(UIntWidth, NullsMany) { |
206 | constexpr uint64_t huge = 0x123456789abcdefULL; |
207 | constexpr int N = 40; |
208 | for (const auto& p : AlmostAllNullValues<uint64_t>(N, 0, 0xff)) { |
209 | CheckUIntWidth(p.first, p.second, 1); |
210 | } |
211 | for (const auto& p : AlmostAllNullValues<uint64_t>(N, huge, 0xff)) { |
212 | CheckUIntWidth(p.first, p.second, 1); |
213 | } |
214 | for (const auto& p : AlmostAllNullValues<uint64_t>(N, 0, 0xffff)) { |
215 | CheckUIntWidth(p.first, p.second, 2); |
216 | } |
217 | for (const auto& p : AlmostAllNullValues<uint64_t>(N, huge, 0xffff)) { |
218 | CheckUIntWidth(p.first, p.second, 2); |
219 | } |
220 | for (const auto& p : AlmostAllNullValues<uint64_t>(N, 0, 0xffffffffULL)) { |
221 | CheckUIntWidth(p.first, p.second, 4); |
222 | } |
223 | for (const auto& p : AlmostAllNullValues<uint64_t>(N, huge, 0xffffffffULL)) { |
224 | CheckUIntWidth(p.first, p.second, 4); |
225 | } |
226 | for (const auto& p : AlmostAllNullValues<uint64_t>(N, 0, 0xffffffffffffffffULL)) { |
227 | CheckUIntWidth(p.first, p.second, 8); |
228 | } |
229 | for (const auto& p : AlmostAllNullValues<uint64_t>(N, huge, 0xffffffffffffffffULL)) { |
230 | CheckUIntWidth(p.first, p.second, 8); |
231 | } |
232 | } |
233 | |
234 | TEST(IntWidth, NoNulls) { |
235 | std::vector<int64_t> values{0, 0x7f, -0x80}; |
236 | CheckIntWidth(values, 1); |
237 | |
238 | values = {0, 0x80}; |
239 | CheckIntWidth(values, 2); |
240 | |
241 | values = {0, -0x81}; |
242 | CheckIntWidth(values, 2); |
243 | |
244 | values = {0, 0x7fff, -0x8000}; |
245 | CheckIntWidth(values, 2); |
246 | |
247 | values = {0, 0x8000}; |
248 | CheckIntWidth(values, 4); |
249 | |
250 | values = {0, -0x8001}; |
251 | CheckIntWidth(values, 4); |
252 | |
253 | values = {0, 0x7fffffffLL, -0x80000000LL}; |
254 | CheckIntWidth(values, 4); |
255 | |
256 | values = {0, 0x80000000LL}; |
257 | CheckIntWidth(values, 8); |
258 | |
259 | values = {0, -0x80000001LL}; |
260 | CheckIntWidth(values, 8); |
261 | |
262 | values = {0, 0x7fffffffffffffffLL, -0x7fffffffffffffffLL - 1}; |
263 | CheckIntWidth(values, 8); |
264 | } |
265 | |
266 | TEST(IntWidth, Nulls) { |
267 | std::vector<uint8_t> valid100{true, false, false}; |
268 | std::vector<uint8_t> valid010{false, true, false}; |
269 | std::vector<uint8_t> valid001{false, false, true}; |
270 | |
271 | std::vector<int64_t> values{0, 0x7f, -0x80}; |
272 | CheckIntWidth(values, valid100, 1); |
273 | CheckIntWidth(values, valid010, 1); |
274 | CheckIntWidth(values, valid001, 1); |
275 | |
276 | values = {0, 0x80, -0x81}; |
277 | CheckIntWidth(values, valid100, 1); |
278 | CheckIntWidth(values, valid010, 2); |
279 | CheckIntWidth(values, valid001, 2); |
280 | |
281 | values = {0, 0x7fff, -0x8000}; |
282 | CheckIntWidth(values, valid100, 1); |
283 | CheckIntWidth(values, valid010, 2); |
284 | CheckIntWidth(values, valid001, 2); |
285 | |
286 | values = {0, 0x8000, -0x8001}; |
287 | CheckIntWidth(values, valid100, 1); |
288 | CheckIntWidth(values, valid010, 4); |
289 | CheckIntWidth(values, valid001, 4); |
290 | |
291 | values = {0, 0x7fffffffLL, -0x80000000LL}; |
292 | CheckIntWidth(values, valid100, 1); |
293 | CheckIntWidth(values, valid010, 4); |
294 | CheckIntWidth(values, valid001, 4); |
295 | |
296 | values = {0, 0x80000000LL, -0x80000001LL}; |
297 | CheckIntWidth(values, valid100, 1); |
298 | CheckIntWidth(values, valid010, 8); |
299 | CheckIntWidth(values, valid001, 8); |
300 | |
301 | values = {0, 0x7fffffffffffffffLL, -0x7fffffffffffffffLL - 1}; |
302 | CheckIntWidth(values, valid100, 1); |
303 | CheckIntWidth(values, valid010, 8); |
304 | CheckIntWidth(values, valid001, 8); |
305 | } |
306 | |
307 | TEST(IntWidth, NoNullsMany) { |
308 | constexpr int N = 40; |
309 | // 1 byte wide |
310 | for (const int64_t value : {0x7f, -0x80}) { |
311 | for (const auto& values : AlmostAllZeros<int64_t>(N, value)) { |
312 | CheckIntWidth(values, 1); |
313 | } |
314 | } |
315 | // 2 bytes wide |
316 | for (const int64_t value : {0x80, -0x81, 0x7fff, -0x8000}) { |
317 | for (const auto& values : AlmostAllZeros<int64_t>(N, value)) { |
318 | CheckIntWidth(values, 2); |
319 | } |
320 | } |
321 | // 4 bytes wide |
322 | for (const int64_t value : {0x8000LL, -0x8001LL, 0x7fffffffLL, -0x80000000LL}) { |
323 | for (const auto& values : AlmostAllZeros<int64_t>(N, value)) { |
324 | CheckIntWidth(values, 4); |
325 | } |
326 | } |
327 | // 8 bytes wide |
328 | for (const int64_t value : {0x80000000LL, -0x80000001LL, 0x7fffffffffffffffLL}) { |
329 | for (const auto& values : AlmostAllZeros<int64_t>(N, value)) { |
330 | CheckIntWidth(values, 8); |
331 | } |
332 | } |
333 | } |
334 | |
335 | TEST(IntWidth, NullsMany) { |
336 | constexpr int64_t huge = 0x123456789abcdefLL; |
337 | constexpr int N = 40; |
338 | // 1 byte wide |
339 | for (const int64_t value : {0x7f, -0x80}) { |
340 | for (const auto& p : AlmostAllNullValues<int64_t>(N, 0, value)) { |
341 | CheckIntWidth(p.first, p.second, 1); |
342 | } |
343 | for (const auto& p : AlmostAllNullValues<int64_t>(N, huge, value)) { |
344 | CheckIntWidth(p.first, p.second, 1); |
345 | } |
346 | } |
347 | // 2 bytes wide |
348 | for (const int64_t value : {0x80, -0x81, 0x7fff, -0x8000}) { |
349 | for (const auto& p : AlmostAllNullValues<int64_t>(N, 0, value)) { |
350 | CheckIntWidth(p.first, p.second, 2); |
351 | } |
352 | for (const auto& p : AlmostAllNullValues<int64_t>(N, huge, value)) { |
353 | CheckIntWidth(p.first, p.second, 2); |
354 | } |
355 | } |
356 | // 4 bytes wide |
357 | for (const int64_t value : {0x8000LL, -0x8001LL, 0x7fffffffLL, -0x80000000LL}) { |
358 | for (const auto& p : AlmostAllNullValues<int64_t>(N, 0, value)) { |
359 | CheckIntWidth(p.first, p.second, 4); |
360 | } |
361 | for (const auto& p : AlmostAllNullValues<int64_t>(N, huge, value)) { |
362 | CheckIntWidth(p.first, p.second, 4); |
363 | } |
364 | } |
365 | // 8 bytes wide |
366 | for (const int64_t value : {0x80000000LL, -0x80000001LL, 0x7fffffffffffffffLL}) { |
367 | for (const auto& p : AlmostAllNullValues<int64_t>(N, 0, value)) { |
368 | CheckIntWidth(p.first, p.second, 8); |
369 | } |
370 | for (const auto& p : AlmostAllNullValues<int64_t>(N, huge, value)) { |
371 | CheckIntWidth(p.first, p.second, 8); |
372 | } |
373 | } |
374 | } |
375 | |
376 | TEST(TransposeInts, Int8ToInt64) { |
377 | std::vector<int8_t> src = {1, 3, 5, 0, 3, 2}; |
378 | std::vector<int32_t> transpose_map = {1111, 2222, 3333, 4444, 5555, 6666, 7777}; |
379 | std::vector<int64_t> dest(src.size()); |
380 | |
381 | TransposeInts(src.data(), dest.data(), 6, transpose_map.data()); |
382 | ASSERT_EQ(dest, std::vector<int64_t>({2222, 4444, 6666, 1111, 4444, 3333})); |
383 | } |
384 | |
385 | } // namespace internal |
386 | } // namespace arrow |
387 | |