1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #include <climits> |
19 | #include <cstdint> |
20 | #include <cstring> |
21 | #include <functional> |
22 | #include <limits> |
23 | #include <memory> |
24 | #include <vector> |
25 | |
26 | #include <gtest/gtest.h> |
27 | |
28 | #include <boost/utility.hpp> // IWYU pragma: export |
29 | |
30 | #include "arrow/buffer.h" |
31 | #include "arrow/memory_pool.h" |
32 | #include "arrow/test-common.h" |
33 | #include "arrow/test-util.h" |
34 | #include "arrow/util/bit-stream-utils.h" |
35 | #include "arrow/util/bit-util.h" |
36 | #include "arrow/util/cpu-info.h" |
37 | |
38 | namespace arrow { |
39 | |
40 | using internal::BitmapAnd; |
41 | using internal::BitmapOr; |
42 | using internal::BitmapXor; |
43 | using internal::CopyBitmap; |
44 | using internal::CountSetBits; |
45 | using internal::InvertBitmap; |
46 | |
47 | template <class BitmapWriter> |
48 | void WriteVectorToWriter(BitmapWriter& writer, const std::vector<int> values) { |
49 | for (const auto& value : values) { |
50 | if (value) { |
51 | writer.Set(); |
52 | } else { |
53 | writer.Clear(); |
54 | } |
55 | writer.Next(); |
56 | } |
57 | writer.Finish(); |
58 | } |
59 | |
60 | void BitmapFromVector(const std::vector<int>& values, int64_t bit_offset, |
61 | std::shared_ptr<Buffer>* out_buffer, int64_t* out_length) { |
62 | const int64_t length = values.size(); |
63 | *out_length = length; |
64 | ASSERT_OK(AllocateEmptyBitmap(length + bit_offset, out_buffer)); |
65 | auto writer = internal::BitmapWriter((*out_buffer)->mutable_data(), bit_offset, length); |
66 | WriteVectorToWriter(writer, values); |
67 | } |
68 | |
69 | #define ASSERT_READER_SET(reader) \ |
70 | do { \ |
71 | ASSERT_TRUE(reader.IsSet()); \ |
72 | ASSERT_FALSE(reader.IsNotSet()); \ |
73 | reader.Next(); \ |
74 | } while (false) |
75 | |
76 | #define ASSERT_READER_NOT_SET(reader) \ |
77 | do { \ |
78 | ASSERT_FALSE(reader.IsSet()); \ |
79 | ASSERT_TRUE(reader.IsNotSet()); \ |
80 | reader.Next(); \ |
81 | } while (false) |
82 | |
83 | // Assert that a BitmapReader yields the given bit values |
84 | void ASSERT_READER_VALUES(internal::BitmapReader& reader, std::vector<int> values) { |
85 | for (const auto& value : values) { |
86 | if (value) { |
87 | ASSERT_READER_SET(reader); |
88 | } else { |
89 | ASSERT_READER_NOT_SET(reader); |
90 | } |
91 | } |
92 | } |
93 | |
94 | // Assert equal contents of a memory area and a vector of bytes |
95 | void ASSERT_BYTES_EQ(const uint8_t* left, const std::vector<uint8_t>& right) { |
96 | auto left_array = std::vector<uint8_t>(left, left + right.size()); |
97 | ASSERT_EQ(left_array, right); |
98 | } |
99 | |
100 | TEST(BitUtilTests, TestIsMultipleOf64) { |
101 | using BitUtil::IsMultipleOf64; |
102 | EXPECT_TRUE(IsMultipleOf64(64)); |
103 | EXPECT_TRUE(IsMultipleOf64(0)); |
104 | EXPECT_TRUE(IsMultipleOf64(128)); |
105 | EXPECT_TRUE(IsMultipleOf64(192)); |
106 | EXPECT_FALSE(IsMultipleOf64(23)); |
107 | EXPECT_FALSE(IsMultipleOf64(32)); |
108 | } |
109 | |
110 | TEST(BitUtilTests, TestNextPower2) { |
111 | using BitUtil::NextPower2; |
112 | |
113 | ASSERT_EQ(8, NextPower2(6)); |
114 | ASSERT_EQ(8, NextPower2(8)); |
115 | |
116 | ASSERT_EQ(1, NextPower2(1)); |
117 | ASSERT_EQ(256, NextPower2(131)); |
118 | |
119 | ASSERT_EQ(1024, NextPower2(1000)); |
120 | |
121 | ASSERT_EQ(4096, NextPower2(4000)); |
122 | |
123 | ASSERT_EQ(65536, NextPower2(64000)); |
124 | |
125 | ASSERT_EQ(1LL << 32, NextPower2((1LL << 32) - 1)); |
126 | ASSERT_EQ(1LL << 31, NextPower2((1LL << 31) - 1)); |
127 | ASSERT_EQ(1LL << 62, NextPower2((1LL << 62) - 1)); |
128 | } |
129 | |
130 | TEST(BitmapReader, NormalOperation) { |
131 | std::shared_ptr<Buffer> buffer; |
132 | int64_t length; |
133 | |
134 | for (int64_t offset : {0, 1, 3, 5, 7, 8, 12, 13, 21, 38, 75, 120}) { |
135 | BitmapFromVector({0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1}, offset, &buffer, |
136 | &length); |
137 | ASSERT_EQ(length, 14); |
138 | |
139 | auto reader = internal::BitmapReader(buffer->mutable_data(), offset, length); |
140 | ASSERT_READER_VALUES(reader, {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1}); |
141 | } |
142 | } |
143 | |
144 | TEST(BitmapReader, DoesNotReadOutOfBounds) { |
145 | uint8_t bitmap[16] = {0}; |
146 | |
147 | const int length = 128; |
148 | |
149 | internal::BitmapReader r1(bitmap, 0, length); |
150 | |
151 | // If this were to read out of bounds, valgrind would tell us |
152 | for (int i = 0; i < length; ++i) { |
153 | ASSERT_TRUE(r1.IsNotSet()); |
154 | r1.Next(); |
155 | } |
156 | |
157 | internal::BitmapReader r2(bitmap, 5, length - 5); |
158 | |
159 | for (int i = 0; i < (length - 5); ++i) { |
160 | ASSERT_TRUE(r2.IsNotSet()); |
161 | r2.Next(); |
162 | } |
163 | |
164 | // Does not access invalid memory |
165 | internal::BitmapReader r3(nullptr, 0, 0); |
166 | } |
167 | |
168 | TEST(BitmapWriter, NormalOperation) { |
169 | for (const auto fill_byte_int : {0x00, 0xff}) { |
170 | const uint8_t fill_byte = static_cast<uint8_t>(fill_byte_int); |
171 | { |
172 | uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; |
173 | auto writer = internal::BitmapWriter(bitmap, 0, 12); |
174 | WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}); |
175 | // {0b00110110, 0b....1010, ........, ........} |
176 | ASSERT_BYTES_EQ(bitmap, {0x36, static_cast<uint8_t>(0x0a | (fill_byte & 0xf0)), |
177 | fill_byte, fill_byte}); |
178 | } |
179 | { |
180 | uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; |
181 | auto writer = internal::BitmapWriter(bitmap, 3, 12); |
182 | WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}); |
183 | // {0b10110..., 0b.1010001, ........, ........} |
184 | ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>(0xb0 | (fill_byte & 0x07)), |
185 | static_cast<uint8_t>(0x51 | (fill_byte & 0x80)), fill_byte, |
186 | fill_byte}); |
187 | } |
188 | { |
189 | uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; |
190 | auto writer = internal::BitmapWriter(bitmap, 20, 12); |
191 | WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}); |
192 | // {........, ........, 0b0110...., 0b10100011} |
193 | ASSERT_BYTES_EQ(bitmap, {fill_byte, fill_byte, |
194 | static_cast<uint8_t>(0x60 | (fill_byte & 0x0f)), 0xa3}); |
195 | } |
196 | // 0-length writes |
197 | for (int64_t pos = 0; pos < 32; ++pos) { |
198 | uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; |
199 | auto writer = internal::BitmapWriter(bitmap, pos, 0); |
200 | WriteVectorToWriter(writer, {}); |
201 | ASSERT_BYTES_EQ(bitmap, {fill_byte, fill_byte, fill_byte, fill_byte}); |
202 | } |
203 | } |
204 | } |
205 | |
206 | TEST(BitmapWriter, DoesNotWriteOutOfBounds) { |
207 | uint8_t bitmap[16] = {0}; |
208 | |
209 | const int length = 128; |
210 | |
211 | int64_t num_values = 0; |
212 | |
213 | internal::BitmapWriter r1(bitmap, 0, length); |
214 | |
215 | // If this were to write out of bounds, valgrind would tell us |
216 | for (int i = 0; i < length; ++i) { |
217 | r1.Set(); |
218 | r1.Clear(); |
219 | r1.Next(); |
220 | } |
221 | r1.Finish(); |
222 | num_values = r1.position(); |
223 | |
224 | ASSERT_EQ(length, num_values); |
225 | |
226 | internal::BitmapWriter r2(bitmap, 5, length - 5); |
227 | |
228 | for (int i = 0; i < (length - 5); ++i) { |
229 | r2.Set(); |
230 | r2.Clear(); |
231 | r2.Next(); |
232 | } |
233 | r2.Finish(); |
234 | num_values = r2.position(); |
235 | |
236 | ASSERT_EQ((length - 5), num_values); |
237 | } |
238 | |
239 | TEST(FirstTimeBitmapWriter, NormalOperation) { |
240 | for (const auto fill_byte_int : {0x00, 0xff}) { |
241 | const uint8_t fill_byte = static_cast<uint8_t>(fill_byte_int); |
242 | { |
243 | uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; |
244 | auto writer = internal::FirstTimeBitmapWriter(bitmap, 0, 12); |
245 | WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}); |
246 | // {0b00110110, 0b1010, 0, 0} |
247 | ASSERT_BYTES_EQ(bitmap, {0x36, 0x0a}); |
248 | } |
249 | { |
250 | uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; |
251 | auto writer = internal::FirstTimeBitmapWriter(bitmap, 4, 12); |
252 | WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}); |
253 | // {0b00110110, 0b1010, 0, 0} |
254 | ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>(0x60 | (fill_byte & 0x0f)), 0xa3}); |
255 | } |
256 | // Consecutive write chunks |
257 | { |
258 | uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; |
259 | { |
260 | auto writer = internal::FirstTimeBitmapWriter(bitmap, 0, 6); |
261 | WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1}); |
262 | } |
263 | { |
264 | auto writer = internal::FirstTimeBitmapWriter(bitmap, 6, 3); |
265 | WriteVectorToWriter(writer, {0, 0, 0}); |
266 | } |
267 | { |
268 | auto writer = internal::FirstTimeBitmapWriter(bitmap, 9, 3); |
269 | WriteVectorToWriter(writer, {1, 0, 1}); |
270 | } |
271 | ASSERT_BYTES_EQ(bitmap, {0x36, 0x0a}); |
272 | } |
273 | { |
274 | uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; |
275 | { |
276 | auto writer = internal::FirstTimeBitmapWriter(bitmap, 4, 0); |
277 | WriteVectorToWriter(writer, {}); |
278 | } |
279 | { |
280 | auto writer = internal::FirstTimeBitmapWriter(bitmap, 4, 6); |
281 | WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1}); |
282 | } |
283 | { |
284 | auto writer = internal::FirstTimeBitmapWriter(bitmap, 10, 3); |
285 | WriteVectorToWriter(writer, {0, 0, 0}); |
286 | } |
287 | { |
288 | auto writer = internal::FirstTimeBitmapWriter(bitmap, 13, 0); |
289 | WriteVectorToWriter(writer, {}); |
290 | } |
291 | { |
292 | auto writer = internal::FirstTimeBitmapWriter(bitmap, 13, 3); |
293 | WriteVectorToWriter(writer, {1, 0, 1}); |
294 | } |
295 | ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>(0x60 | (fill_byte & 0x0f)), 0xa3}); |
296 | } |
297 | } |
298 | } |
299 | |
300 | // Tests for GenerateBits and GenerateBitsUnrolled |
301 | |
302 | struct GenerateBitsFunctor { |
303 | template <class Generator> |
304 | void operator()(uint8_t* bitmap, int64_t start_offset, int64_t length, Generator&& g) { |
305 | return internal::GenerateBits(bitmap, start_offset, length, g); |
306 | } |
307 | }; |
308 | |
309 | struct GenerateBitsUnrolledFunctor { |
310 | template <class Generator> |
311 | void operator()(uint8_t* bitmap, int64_t start_offset, int64_t length, Generator&& g) { |
312 | return internal::GenerateBitsUnrolled(bitmap, start_offset, length, g); |
313 | } |
314 | }; |
315 | |
316 | template <typename T> |
317 | class TestGenerateBits : public ::testing::Test {}; |
318 | |
319 | typedef ::testing::Types<GenerateBitsFunctor, GenerateBitsUnrolledFunctor> |
320 | GenerateBitsTypes; |
321 | TYPED_TEST_CASE(TestGenerateBits, GenerateBitsTypes); |
322 | |
323 | TYPED_TEST(TestGenerateBits, NormalOperation) { |
324 | const int kSourceSize = 256; |
325 | uint8_t source[kSourceSize]; |
326 | random_bytes(kSourceSize, 0, source); |
327 | |
328 | const int64_t start_offsets[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 21, 31, 32}; |
329 | const int64_t lengths[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 16, |
330 | 17, 21, 31, 32, 100, 201, 202, 203, 204, 205, 206, 207}; |
331 | const uint8_t fill_bytes[] = {0x00, 0xff}; |
332 | |
333 | for (const int64_t start_offset : start_offsets) { |
334 | for (const int64_t length : lengths) { |
335 | for (const uint8_t fill_byte : fill_bytes) { |
336 | uint8_t bitmap[kSourceSize + 1]; |
337 | memset(bitmap, fill_byte, kSourceSize + 1); |
338 | // First call GenerateBits |
339 | { |
340 | int64_t ncalled = 0; |
341 | internal::BitmapReader reader(source, 0, length); |
342 | TypeParam()(bitmap, start_offset, length, [&]() -> bool { |
343 | bool b = reader.IsSet(); |
344 | reader.Next(); |
345 | ++ncalled; |
346 | return b; |
347 | }); |
348 | ASSERT_EQ(ncalled, length); |
349 | } |
350 | // Then check generated contents |
351 | { |
352 | internal::BitmapReader source_reader(source, 0, length); |
353 | internal::BitmapReader result_reader(bitmap, start_offset, length); |
354 | for (int64_t i = 0; i < length; ++i) { |
355 | ASSERT_EQ(source_reader.IsSet(), result_reader.IsSet()) |
356 | << "mismatch at bit #" << i; |
357 | source_reader.Next(); |
358 | result_reader.Next(); |
359 | } |
360 | } |
361 | // Check bits preceding generated contents weren't clobbered |
362 | { |
363 | internal::BitmapReader reader_before(bitmap, 0, start_offset); |
364 | for (int64_t i = 0; i < start_offset; ++i) { |
365 | ASSERT_EQ(reader_before.IsSet(), fill_byte == 0xff) |
366 | << "mismatch at preceding bit #" << start_offset - i; |
367 | } |
368 | } |
369 | // Check the byte following generated contents wasn't clobbered |
370 | auto byte_after = bitmap[BitUtil::CeilDiv(start_offset + length, 8)]; |
371 | ASSERT_EQ(byte_after, fill_byte); |
372 | } |
373 | } |
374 | } |
375 | } |
376 | |
377 | struct BitmapOperation { |
378 | virtual Status Call(MemoryPool* pool, const uint8_t* left, int64_t left_offset, |
379 | const uint8_t* right, int64_t right_offset, int64_t length, |
380 | int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) const = 0; |
381 | |
382 | virtual ~BitmapOperation() = default; |
383 | }; |
384 | |
385 | struct BitmapAndOp : public BitmapOperation { |
386 | Status Call(MemoryPool* pool, const uint8_t* left, int64_t left_offset, |
387 | const uint8_t* right, int64_t right_offset, int64_t length, |
388 | int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) const override { |
389 | return BitmapAnd(pool, left, left_offset, right, right_offset, length, out_offset, |
390 | out_buffer); |
391 | } |
392 | }; |
393 | |
394 | struct BitmapOrOp : public BitmapOperation { |
395 | Status Call(MemoryPool* pool, const uint8_t* left, int64_t left_offset, |
396 | const uint8_t* right, int64_t right_offset, int64_t length, |
397 | int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) const override { |
398 | return BitmapOr(pool, left, left_offset, right, right_offset, length, out_offset, |
399 | out_buffer); |
400 | } |
401 | }; |
402 | |
403 | struct BitmapXorOp : public BitmapOperation { |
404 | Status Call(MemoryPool* pool, const uint8_t* left, int64_t left_offset, |
405 | const uint8_t* right, int64_t right_offset, int64_t length, |
406 | int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) const override { |
407 | return BitmapXor(pool, left, left_offset, right, right_offset, length, out_offset, |
408 | out_buffer); |
409 | } |
410 | }; |
411 | |
412 | class BitmapOp : public TestBase { |
413 | public: |
414 | void TestAligned(const BitmapOperation& op, const std::vector<int>& left_bits, |
415 | const std::vector<int>& right_bits, |
416 | const std::vector<int>& result_bits) { |
417 | std::shared_ptr<Buffer> left, right, out; |
418 | int64_t length; |
419 | |
420 | for (int64_t left_offset : {0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120}) { |
421 | BitmapFromVector(left_bits, left_offset, &left, &length); |
422 | for (int64_t right_offset : {left_offset, left_offset + 8, left_offset + 40}) { |
423 | BitmapFromVector(right_bits, right_offset, &right, &length); |
424 | for (int64_t out_offset : {left_offset, left_offset + 16, left_offset + 24}) { |
425 | ASSERT_OK(op.Call(default_memory_pool(), left->mutable_data(), left_offset, |
426 | right->mutable_data(), right_offset, length, out_offset, |
427 | &out)); |
428 | auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length); |
429 | ASSERT_READER_VALUES(reader, result_bits); |
430 | } |
431 | } |
432 | } |
433 | } |
434 | |
435 | void TestUnaligned(const BitmapOperation& op, const std::vector<int>& left_bits, |
436 | const std::vector<int>& right_bits, |
437 | const std::vector<int>& result_bits) { |
438 | std::shared_ptr<Buffer> left, right, out; |
439 | int64_t length; |
440 | auto offset_values = {0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120}; |
441 | |
442 | for (int64_t left_offset : offset_values) { |
443 | BitmapFromVector(left_bits, left_offset, &left, &length); |
444 | |
445 | for (int64_t right_offset : offset_values) { |
446 | BitmapFromVector(right_bits, right_offset, &right, &length); |
447 | |
448 | for (int64_t out_offset : offset_values) { |
449 | ASSERT_OK(op.Call(default_memory_pool(), left->mutable_data(), left_offset, |
450 | right->mutable_data(), right_offset, length, out_offset, |
451 | &out)); |
452 | auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length); |
453 | ASSERT_READER_VALUES(reader, result_bits); |
454 | } |
455 | } |
456 | } |
457 | } |
458 | }; |
459 | |
460 | TEST_F(BitmapOp, And) { |
461 | BitmapAndOp op; |
462 | std::vector<int> left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1}; |
463 | std::vector<int> right = {0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0}; |
464 | std::vector<int> result = {0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}; |
465 | |
466 | TestAligned(op, left, right, result); |
467 | TestUnaligned(op, left, right, result); |
468 | } |
469 | |
470 | TEST_F(BitmapOp, Or) { |
471 | BitmapOrOp op; |
472 | std::vector<int> left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0}; |
473 | std::vector<int> right = {0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0}; |
474 | std::vector<int> result = {0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0}; |
475 | |
476 | TestAligned(op, left, right, result); |
477 | TestUnaligned(op, left, right, result); |
478 | } |
479 | |
480 | TEST_F(BitmapOp, XorAligned) { |
481 | BitmapXorOp op; |
482 | std::vector<int> left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1}; |
483 | std::vector<int> right = {0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0}; |
484 | std::vector<int> result = {0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1}; |
485 | |
486 | TestAligned(op, left, right, result); |
487 | TestUnaligned(op, left, right, result); |
488 | } |
489 | |
490 | static inline int64_t SlowCountBits(const uint8_t* data, int64_t bit_offset, |
491 | int64_t length) { |
492 | int64_t count = 0; |
493 | for (int64_t i = bit_offset; i < bit_offset + length; ++i) { |
494 | if (BitUtil::GetBit(data, i)) { |
495 | ++count; |
496 | } |
497 | } |
498 | return count; |
499 | } |
500 | |
501 | TEST(BitUtilTests, TestCountSetBits) { |
502 | const int kBufferSize = 1000; |
503 | uint8_t buffer[kBufferSize] = {0}; |
504 | |
505 | random_bytes(kBufferSize, 0, buffer); |
506 | |
507 | const int num_bits = kBufferSize * 8; |
508 | |
509 | std::vector<int64_t> offsets = { |
510 | 0, 12, 16, 32, 37, 63, 64, 128, num_bits - 30, num_bits - 64}; |
511 | for (int64_t offset : offsets) { |
512 | int64_t result = CountSetBits(buffer, offset, num_bits - offset); |
513 | int64_t expected = SlowCountBits(buffer, offset, num_bits - offset); |
514 | |
515 | ASSERT_EQ(expected, result); |
516 | } |
517 | } |
518 | |
519 | TEST(BitUtilTests, TestSetBitsTo) { |
520 | using BitUtil::SetBitsTo; |
521 | for (const auto fill_byte_int : {0x00, 0xff}) { |
522 | const uint8_t fill_byte = static_cast<uint8_t>(fill_byte_int); |
523 | { |
524 | // test set within a byte |
525 | uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; |
526 | SetBitsTo(bitmap, 2, 2, true); |
527 | SetBitsTo(bitmap, 4, 2, false); |
528 | ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>((fill_byte & ~0x3C) | 0xC)}); |
529 | } |
530 | { |
531 | // test straddling a single byte boundary |
532 | uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; |
533 | SetBitsTo(bitmap, 4, 7, true); |
534 | SetBitsTo(bitmap, 11, 7, false); |
535 | ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>((fill_byte & 0xF) | 0xF0), 0x7, |
536 | static_cast<uint8_t>(fill_byte & ~0x3)}); |
537 | } |
538 | { |
539 | // test byte aligned end |
540 | uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; |
541 | SetBitsTo(bitmap, 4, 4, true); |
542 | SetBitsTo(bitmap, 8, 8, false); |
543 | ASSERT_BYTES_EQ(bitmap, |
544 | {static_cast<uint8_t>((fill_byte & 0xF) | 0xF0), 0x00, fill_byte}); |
545 | } |
546 | { |
547 | // test byte aligned end, multiple bytes |
548 | uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; |
549 | SetBitsTo(bitmap, 0, 24, false); |
550 | uint8_t false_byte = static_cast<uint8_t>(0); |
551 | ASSERT_BYTES_EQ(bitmap, {false_byte, false_byte, false_byte, fill_byte}); |
552 | } |
553 | } |
554 | } |
555 | |
556 | TEST(BitUtilTests, TestCopyBitmap) { |
557 | const int kBufferSize = 1000; |
558 | |
559 | std::shared_ptr<Buffer> buffer; |
560 | ASSERT_OK(AllocateBuffer(kBufferSize, &buffer)); |
561 | memset(buffer->mutable_data(), 0, kBufferSize); |
562 | random_bytes(kBufferSize, 0, buffer->mutable_data()); |
563 | |
564 | const uint8_t* src = buffer->data(); |
565 | |
566 | std::vector<int64_t> lengths = {kBufferSize * 8 - 4, kBufferSize * 8}; |
567 | std::vector<int64_t> offsets = {0, 12, 16, 32, 37, 63, 64, 128}; |
568 | for (int64_t num_bits : lengths) { |
569 | for (int64_t offset : offsets) { |
570 | const int64_t copy_length = num_bits - offset; |
571 | |
572 | std::shared_ptr<Buffer> copy; |
573 | ASSERT_OK(CopyBitmap(default_memory_pool(), src, offset, copy_length, ©)); |
574 | |
575 | for (int64_t i = 0; i < copy_length; ++i) { |
576 | ASSERT_EQ(BitUtil::GetBit(src, i + offset), BitUtil::GetBit(copy->data(), i)); |
577 | } |
578 | } |
579 | } |
580 | } |
581 | |
582 | TEST(BitUtilTests, TestCopyBitmapPreAllocated) { |
583 | const int kBufferSize = 1000; |
584 | std::vector<int64_t> lengths = {kBufferSize * 8 - 4, kBufferSize * 8}; |
585 | std::vector<int64_t> offsets = {0, 12, 16, 32, 37, 63, 64, 128}; |
586 | |
587 | std::shared_ptr<Buffer> buffer; |
588 | ASSERT_OK(AllocateBuffer(kBufferSize, &buffer)); |
589 | memset(buffer->mutable_data(), 0, kBufferSize); |
590 | random_bytes(kBufferSize, 0, buffer->mutable_data()); |
591 | const uint8_t* src = buffer->data(); |
592 | |
593 | std::shared_ptr<Buffer> other_buffer; |
594 | // Add 16 byte padding on both sides |
595 | ASSERT_OK(AllocateBuffer(kBufferSize + 32, &other_buffer)); |
596 | memset(other_buffer->mutable_data(), 0, kBufferSize + 32); |
597 | random_bytes(kBufferSize + 32, 0, other_buffer->mutable_data()); |
598 | const uint8_t* other = other_buffer->data(); |
599 | |
600 | for (int64_t num_bits : lengths) { |
601 | for (int64_t offset : offsets) { |
602 | for (int64_t dest_offset : offsets) { |
603 | const int64_t copy_length = num_bits - offset; |
604 | |
605 | std::shared_ptr<Buffer> copy; |
606 | ASSERT_OK(AllocateBuffer(other_buffer->size(), ©)); |
607 | memcpy(copy->mutable_data(), other_buffer->data(), other_buffer->size()); |
608 | CopyBitmap(src, offset, copy_length, copy->mutable_data(), dest_offset); |
609 | |
610 | for (int64_t i = 0; i < dest_offset; ++i) { |
611 | ASSERT_EQ(BitUtil::GetBit(other, i), BitUtil::GetBit(copy->data(), i)); |
612 | } |
613 | for (int64_t i = 0; i < copy_length; ++i) { |
614 | ASSERT_EQ(BitUtil::GetBit(src, i + offset), |
615 | BitUtil::GetBit(copy->data(), i + dest_offset)); |
616 | } |
617 | for (int64_t i = dest_offset + copy_length; i < (other_buffer->size() * 8); ++i) { |
618 | ASSERT_EQ(BitUtil::GetBit(other, i), BitUtil::GetBit(copy->data(), i)); |
619 | } |
620 | } |
621 | } |
622 | } |
623 | } |
624 | |
625 | TEST(BitUtilTests, TestCopyAndInvertBitmapPreAllocated) { |
626 | const int kBufferSize = 1000; |
627 | std::vector<int64_t> lengths = {kBufferSize * 8 - 4, kBufferSize * 8}; |
628 | std::vector<int64_t> offsets = {0, 12, 16, 32, 37, 63, 64, 128}; |
629 | |
630 | std::shared_ptr<Buffer> buffer; |
631 | ASSERT_OK(AllocateBuffer(kBufferSize, &buffer)); |
632 | memset(buffer->mutable_data(), 0, kBufferSize); |
633 | random_bytes(kBufferSize, 0, buffer->mutable_data()); |
634 | const uint8_t* src = buffer->data(); |
635 | |
636 | std::shared_ptr<Buffer> other_buffer; |
637 | // Add 16 byte padding on both sides |
638 | ASSERT_OK(AllocateBuffer(kBufferSize + 32, &other_buffer)); |
639 | memset(other_buffer->mutable_data(), 0, kBufferSize + 32); |
640 | random_bytes(kBufferSize + 32, 0, other_buffer->mutable_data()); |
641 | const uint8_t* other = other_buffer->data(); |
642 | |
643 | for (int64_t num_bits : lengths) { |
644 | for (int64_t offset : offsets) { |
645 | for (int64_t dest_offset : offsets) { |
646 | const int64_t copy_length = num_bits - offset; |
647 | |
648 | std::shared_ptr<Buffer> copy; |
649 | ASSERT_OK(AllocateBuffer(other_buffer->size(), ©)); |
650 | memcpy(copy->mutable_data(), other_buffer->data(), other_buffer->size()); |
651 | InvertBitmap(src, offset, copy_length, copy->mutable_data(), dest_offset); |
652 | |
653 | for (int64_t i = 0; i < dest_offset; ++i) { |
654 | ASSERT_EQ(BitUtil::GetBit(other, i), BitUtil::GetBit(copy->data(), i)); |
655 | } |
656 | for (int64_t i = 0; i < copy_length; ++i) { |
657 | ASSERT_EQ(BitUtil::GetBit(src, i + offset), |
658 | !BitUtil::GetBit(copy->data(), i + dest_offset)); |
659 | } |
660 | for (int64_t i = dest_offset + copy_length; i < (other_buffer->size() * 8); ++i) { |
661 | ASSERT_EQ(BitUtil::GetBit(other, i), BitUtil::GetBit(copy->data(), i)); |
662 | } |
663 | } |
664 | } |
665 | } |
666 | } |
667 | |
668 | TEST(BitUtil, CeilDiv) { |
669 | EXPECT_EQ(BitUtil::CeilDiv(0, 1), 0); |
670 | EXPECT_EQ(BitUtil::CeilDiv(1, 1), 1); |
671 | EXPECT_EQ(BitUtil::CeilDiv(1, 2), 1); |
672 | EXPECT_EQ(BitUtil::CeilDiv(1, 8), 1); |
673 | EXPECT_EQ(BitUtil::CeilDiv(7, 8), 1); |
674 | EXPECT_EQ(BitUtil::CeilDiv(8, 8), 1); |
675 | EXPECT_EQ(BitUtil::CeilDiv(9, 8), 2); |
676 | EXPECT_EQ(BitUtil::CeilDiv(9, 9), 1); |
677 | EXPECT_EQ(BitUtil::CeilDiv(10000000000, 10), 1000000000); |
678 | EXPECT_EQ(BitUtil::CeilDiv(10, 10000000000), 1); |
679 | EXPECT_EQ(BitUtil::CeilDiv(100000000000, 10000000000), 10); |
680 | } |
681 | |
682 | TEST(BitUtil, RoundUp) { |
683 | EXPECT_EQ(BitUtil::RoundUp(0, 1), 0); |
684 | EXPECT_EQ(BitUtil::RoundUp(1, 1), 1); |
685 | EXPECT_EQ(BitUtil::RoundUp(1, 2), 2); |
686 | EXPECT_EQ(BitUtil::RoundUp(6, 2), 6); |
687 | EXPECT_EQ(BitUtil::RoundUp(7, 3), 9); |
688 | EXPECT_EQ(BitUtil::RoundUp(9, 9), 9); |
689 | EXPECT_EQ(BitUtil::RoundUp(10000000001, 10), 10000000010); |
690 | EXPECT_EQ(BitUtil::RoundUp(10, 10000000000), 10000000000); |
691 | EXPECT_EQ(BitUtil::RoundUp(100000000000, 10000000000), 100000000000); |
692 | } |
693 | |
694 | TEST(BitUtil, TrailingBits) { |
695 | EXPECT_EQ(BitUtil::TrailingBits(BOOST_BINARY(1 1 1 1 1 1 1 1), 0), 0); |
696 | EXPECT_EQ(BitUtil::TrailingBits(BOOST_BINARY(1 1 1 1 1 1 1 1), 1), 1); |
697 | EXPECT_EQ(BitUtil::TrailingBits(BOOST_BINARY(1 1 1 1 1 1 1 1), 64), |
698 | BOOST_BINARY(1 1 1 1 1 1 1 1)); |
699 | EXPECT_EQ(BitUtil::TrailingBits(BOOST_BINARY(1 1 1 1 1 1 1 1), 100), |
700 | BOOST_BINARY(1 1 1 1 1 1 1 1)); |
701 | EXPECT_EQ(BitUtil::TrailingBits(0, 1), 0); |
702 | EXPECT_EQ(BitUtil::TrailingBits(0, 64), 0); |
703 | EXPECT_EQ(BitUtil::TrailingBits(1LL << 63, 0), 0); |
704 | EXPECT_EQ(BitUtil::TrailingBits(1LL << 63, 63), 0); |
705 | EXPECT_EQ(BitUtil::TrailingBits(1LL << 63, 64), 1LL << 63); |
706 | } |
707 | |
708 | TEST(BitUtil, ByteSwap) { |
709 | EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint32_t>(0)), 0); |
710 | EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint32_t>(0x11223344)), 0x44332211); |
711 | |
712 | EXPECT_EQ(BitUtil::ByteSwap(static_cast<int32_t>(0)), 0); |
713 | EXPECT_EQ(BitUtil::ByteSwap(static_cast<int32_t>(0x11223344)), 0x44332211); |
714 | |
715 | EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint64_t>(0)), 0); |
716 | EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint64_t>(0x1122334455667788)), |
717 | 0x8877665544332211); |
718 | |
719 | EXPECT_EQ(BitUtil::ByteSwap(static_cast<int64_t>(0)), 0); |
720 | EXPECT_EQ(BitUtil::ByteSwap(static_cast<int64_t>(0x1122334455667788)), |
721 | 0x8877665544332211); |
722 | |
723 | EXPECT_EQ(BitUtil::ByteSwap(static_cast<int16_t>(0)), 0); |
724 | EXPECT_EQ(BitUtil::ByteSwap(static_cast<int16_t>(0x1122)), 0x2211); |
725 | |
726 | EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint16_t>(0)), 0); |
727 | EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint16_t>(0x1122)), 0x2211); |
728 | } |
729 | |
730 | TEST(BitUtil, Log2) { |
731 | EXPECT_EQ(BitUtil::Log2(1), 0); |
732 | EXPECT_EQ(BitUtil::Log2(2), 1); |
733 | EXPECT_EQ(BitUtil::Log2(3), 2); |
734 | EXPECT_EQ(BitUtil::Log2(4), 2); |
735 | EXPECT_EQ(BitUtil::Log2(5), 3); |
736 | EXPECT_EQ(BitUtil::Log2(8), 3); |
737 | EXPECT_EQ(BitUtil::Log2(9), 4); |
738 | EXPECT_EQ(BitUtil::Log2(INT_MAX), 31); |
739 | EXPECT_EQ(BitUtil::Log2(UINT_MAX), 32); |
740 | EXPECT_EQ(BitUtil::Log2(ULLONG_MAX), 64); |
741 | } |
742 | |
743 | TEST(BitUtil, NumRequiredBits) { |
744 | EXPECT_EQ(BitUtil::NumRequiredBits(0), 0); |
745 | EXPECT_EQ(BitUtil::NumRequiredBits(1), 1); |
746 | EXPECT_EQ(BitUtil::NumRequiredBits(2), 2); |
747 | EXPECT_EQ(BitUtil::NumRequiredBits(3), 2); |
748 | EXPECT_EQ(BitUtil::NumRequiredBits(4), 3); |
749 | EXPECT_EQ(BitUtil::NumRequiredBits(5), 3); |
750 | EXPECT_EQ(BitUtil::NumRequiredBits(7), 3); |
751 | EXPECT_EQ(BitUtil::NumRequiredBits(8), 4); |
752 | EXPECT_EQ(BitUtil::NumRequiredBits(9), 4); |
753 | EXPECT_EQ(BitUtil::NumRequiredBits(UINT_MAX - 1), 32); |
754 | EXPECT_EQ(BitUtil::NumRequiredBits(UINT_MAX), 32); |
755 | EXPECT_EQ(BitUtil::NumRequiredBits(static_cast<uint64_t>(UINT_MAX) + 1), 33); |
756 | EXPECT_EQ(BitUtil::NumRequiredBits(ULLONG_MAX / 2), 63); |
757 | EXPECT_EQ(BitUtil::NumRequiredBits(ULLONG_MAX / 2 + 1), 64); |
758 | EXPECT_EQ(BitUtil::NumRequiredBits(ULLONG_MAX - 1), 64); |
759 | EXPECT_EQ(BitUtil::NumRequiredBits(ULLONG_MAX), 64); |
760 | } |
761 | |
762 | #define U32(x) static_cast<uint32_t>(x) |
763 | #define U64(x) static_cast<uint64_t>(x) |
764 | |
765 | TEST(BitUtil, CountLeadingZeros) { |
766 | EXPECT_EQ(BitUtil::CountLeadingZeros(U32(0)), 32); |
767 | EXPECT_EQ(BitUtil::CountLeadingZeros(U32(1)), 31); |
768 | EXPECT_EQ(BitUtil::CountLeadingZeros(U32(2)), 30); |
769 | EXPECT_EQ(BitUtil::CountLeadingZeros(U32(3)), 30); |
770 | EXPECT_EQ(BitUtil::CountLeadingZeros(U32(4)), 29); |
771 | EXPECT_EQ(BitUtil::CountLeadingZeros(U32(7)), 29); |
772 | EXPECT_EQ(BitUtil::CountLeadingZeros(U32(8)), 28); |
773 | EXPECT_EQ(BitUtil::CountLeadingZeros(U32(UINT_MAX / 2)), 1); |
774 | EXPECT_EQ(BitUtil::CountLeadingZeros(U32(UINT_MAX / 2 + 1)), 0); |
775 | EXPECT_EQ(BitUtil::CountLeadingZeros(U32(UINT_MAX)), 0); |
776 | |
777 | EXPECT_EQ(BitUtil::CountLeadingZeros(U64(0)), 64); |
778 | EXPECT_EQ(BitUtil::CountLeadingZeros(U64(1)), 63); |
779 | EXPECT_EQ(BitUtil::CountLeadingZeros(U64(2)), 62); |
780 | EXPECT_EQ(BitUtil::CountLeadingZeros(U64(3)), 62); |
781 | EXPECT_EQ(BitUtil::CountLeadingZeros(U64(4)), 61); |
782 | EXPECT_EQ(BitUtil::CountLeadingZeros(U64(7)), 61); |
783 | EXPECT_EQ(BitUtil::CountLeadingZeros(U64(8)), 60); |
784 | EXPECT_EQ(BitUtil::CountLeadingZeros(U64(UINT_MAX)), 32); |
785 | EXPECT_EQ(BitUtil::CountLeadingZeros(U64(UINT_MAX) + 1), 31); |
786 | EXPECT_EQ(BitUtil::CountLeadingZeros(U64(ULLONG_MAX / 2)), 1); |
787 | EXPECT_EQ(BitUtil::CountLeadingZeros(U64(ULLONG_MAX / 2 + 1)), 0); |
788 | EXPECT_EQ(BitUtil::CountLeadingZeros(U64(ULLONG_MAX)), 0); |
789 | } |
790 | |
791 | #undef U32 |
792 | #undef U64 |
793 | |
794 | TEST(BitUtil, RoundUpToPowerOf2) { |
795 | EXPECT_EQ(BitUtil::RoundUpToPowerOf2(7, 8), 8); |
796 | EXPECT_EQ(BitUtil::RoundUpToPowerOf2(8, 8), 8); |
797 | EXPECT_EQ(BitUtil::RoundUpToPowerOf2(9, 8), 16); |
798 | } |
799 | |
800 | static void TestZigZag(int32_t v) { |
801 | uint8_t buffer[BitUtil::BitReader::MAX_VLQ_BYTE_LEN] = {}; |
802 | BitUtil::BitWriter writer(buffer, sizeof(buffer)); |
803 | BitUtil::BitReader reader(buffer, sizeof(buffer)); |
804 | writer.PutZigZagVlqInt(v); |
805 | int32_t result; |
806 | EXPECT_TRUE(reader.GetZigZagVlqInt(&result)); |
807 | EXPECT_EQ(v, result); |
808 | } |
809 | |
810 | TEST(BitStreamUtil, ZigZag) { |
811 | TestZigZag(0); |
812 | TestZigZag(1); |
813 | TestZigZag(1234); |
814 | TestZigZag(-1); |
815 | TestZigZag(-1234); |
816 | TestZigZag(std::numeric_limits<int32_t>::max()); |
817 | TestZigZag(-std::numeric_limits<int32_t>::max()); |
818 | } |
819 | |
820 | TEST(BitUtil, RoundTripLittleEndianTest) { |
821 | uint64_t value = 0xFF; |
822 | |
823 | #if ARROW_LITTLE_ENDIAN |
824 | uint64_t expected = value; |
825 | #else |
826 | uint64_t expected = std::numeric_limits<uint64_t>::max() << 56; |
827 | #endif |
828 | |
829 | uint64_t little_endian_result = BitUtil::ToLittleEndian(value); |
830 | ASSERT_EQ(expected, little_endian_result); |
831 | |
832 | uint64_t from_little_endian = BitUtil::FromLittleEndian(little_endian_result); |
833 | ASSERT_EQ(value, from_little_endian); |
834 | } |
835 | |
836 | TEST(BitUtil, RoundTripBigEndianTest) { |
837 | uint64_t value = 0xFF; |
838 | |
839 | #if ARROW_LITTLE_ENDIAN |
840 | uint64_t expected = std::numeric_limits<uint64_t>::max() << 56; |
841 | #else |
842 | uint64_t expected = value; |
843 | #endif |
844 | |
845 | uint64_t big_endian_result = BitUtil::ToBigEndian(value); |
846 | ASSERT_EQ(expected, big_endian_result); |
847 | |
848 | uint64_t from_big_endian = BitUtil::FromBigEndian(big_endian_result); |
849 | ASSERT_EQ(value, from_big_endian); |
850 | } |
851 | |
852 | } // namespace arrow |
853 | |