1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include <climits>
19#include <cstdint>
20#include <cstring>
21#include <functional>
22#include <limits>
23#include <memory>
24#include <vector>
25
26#include <gtest/gtest.h>
27
28#include <boost/utility.hpp> // IWYU pragma: export
29
30#include "arrow/buffer.h"
31#include "arrow/memory_pool.h"
32#include "arrow/test-common.h"
33#include "arrow/test-util.h"
34#include "arrow/util/bit-stream-utils.h"
35#include "arrow/util/bit-util.h"
36#include "arrow/util/cpu-info.h"
37
38namespace arrow {
39
40using internal::BitmapAnd;
41using internal::BitmapOr;
42using internal::BitmapXor;
43using internal::CopyBitmap;
44using internal::CountSetBits;
45using internal::InvertBitmap;
46
47template <class BitmapWriter>
48void WriteVectorToWriter(BitmapWriter& writer, const std::vector<int> values) {
49 for (const auto& value : values) {
50 if (value) {
51 writer.Set();
52 } else {
53 writer.Clear();
54 }
55 writer.Next();
56 }
57 writer.Finish();
58}
59
60void BitmapFromVector(const std::vector<int>& values, int64_t bit_offset,
61 std::shared_ptr<Buffer>* out_buffer, int64_t* out_length) {
62 const int64_t length = values.size();
63 *out_length = length;
64 ASSERT_OK(AllocateEmptyBitmap(length + bit_offset, out_buffer));
65 auto writer = internal::BitmapWriter((*out_buffer)->mutable_data(), bit_offset, length);
66 WriteVectorToWriter(writer, values);
67}
68
69#define ASSERT_READER_SET(reader) \
70 do { \
71 ASSERT_TRUE(reader.IsSet()); \
72 ASSERT_FALSE(reader.IsNotSet()); \
73 reader.Next(); \
74 } while (false)
75
76#define ASSERT_READER_NOT_SET(reader) \
77 do { \
78 ASSERT_FALSE(reader.IsSet()); \
79 ASSERT_TRUE(reader.IsNotSet()); \
80 reader.Next(); \
81 } while (false)
82
83// Assert that a BitmapReader yields the given bit values
84void ASSERT_READER_VALUES(internal::BitmapReader& reader, std::vector<int> values) {
85 for (const auto& value : values) {
86 if (value) {
87 ASSERT_READER_SET(reader);
88 } else {
89 ASSERT_READER_NOT_SET(reader);
90 }
91 }
92}
93
94// Assert equal contents of a memory area and a vector of bytes
95void ASSERT_BYTES_EQ(const uint8_t* left, const std::vector<uint8_t>& right) {
96 auto left_array = std::vector<uint8_t>(left, left + right.size());
97 ASSERT_EQ(left_array, right);
98}
99
100TEST(BitUtilTests, TestIsMultipleOf64) {
101 using BitUtil::IsMultipleOf64;
102 EXPECT_TRUE(IsMultipleOf64(64));
103 EXPECT_TRUE(IsMultipleOf64(0));
104 EXPECT_TRUE(IsMultipleOf64(128));
105 EXPECT_TRUE(IsMultipleOf64(192));
106 EXPECT_FALSE(IsMultipleOf64(23));
107 EXPECT_FALSE(IsMultipleOf64(32));
108}
109
110TEST(BitUtilTests, TestNextPower2) {
111 using BitUtil::NextPower2;
112
113 ASSERT_EQ(8, NextPower2(6));
114 ASSERT_EQ(8, NextPower2(8));
115
116 ASSERT_EQ(1, NextPower2(1));
117 ASSERT_EQ(256, NextPower2(131));
118
119 ASSERT_EQ(1024, NextPower2(1000));
120
121 ASSERT_EQ(4096, NextPower2(4000));
122
123 ASSERT_EQ(65536, NextPower2(64000));
124
125 ASSERT_EQ(1LL << 32, NextPower2((1LL << 32) - 1));
126 ASSERT_EQ(1LL << 31, NextPower2((1LL << 31) - 1));
127 ASSERT_EQ(1LL << 62, NextPower2((1LL << 62) - 1));
128}
129
130TEST(BitmapReader, NormalOperation) {
131 std::shared_ptr<Buffer> buffer;
132 int64_t length;
133
134 for (int64_t offset : {0, 1, 3, 5, 7, 8, 12, 13, 21, 38, 75, 120}) {
135 BitmapFromVector({0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1}, offset, &buffer,
136 &length);
137 ASSERT_EQ(length, 14);
138
139 auto reader = internal::BitmapReader(buffer->mutable_data(), offset, length);
140 ASSERT_READER_VALUES(reader, {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1});
141 }
142}
143
144TEST(BitmapReader, DoesNotReadOutOfBounds) {
145 uint8_t bitmap[16] = {0};
146
147 const int length = 128;
148
149 internal::BitmapReader r1(bitmap, 0, length);
150
151 // If this were to read out of bounds, valgrind would tell us
152 for (int i = 0; i < length; ++i) {
153 ASSERT_TRUE(r1.IsNotSet());
154 r1.Next();
155 }
156
157 internal::BitmapReader r2(bitmap, 5, length - 5);
158
159 for (int i = 0; i < (length - 5); ++i) {
160 ASSERT_TRUE(r2.IsNotSet());
161 r2.Next();
162 }
163
164 // Does not access invalid memory
165 internal::BitmapReader r3(nullptr, 0, 0);
166}
167
168TEST(BitmapWriter, NormalOperation) {
169 for (const auto fill_byte_int : {0x00, 0xff}) {
170 const uint8_t fill_byte = static_cast<uint8_t>(fill_byte_int);
171 {
172 uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
173 auto writer = internal::BitmapWriter(bitmap, 0, 12);
174 WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1});
175 // {0b00110110, 0b....1010, ........, ........}
176 ASSERT_BYTES_EQ(bitmap, {0x36, static_cast<uint8_t>(0x0a | (fill_byte & 0xf0)),
177 fill_byte, fill_byte});
178 }
179 {
180 uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
181 auto writer = internal::BitmapWriter(bitmap, 3, 12);
182 WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1});
183 // {0b10110..., 0b.1010001, ........, ........}
184 ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>(0xb0 | (fill_byte & 0x07)),
185 static_cast<uint8_t>(0x51 | (fill_byte & 0x80)), fill_byte,
186 fill_byte});
187 }
188 {
189 uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
190 auto writer = internal::BitmapWriter(bitmap, 20, 12);
191 WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1});
192 // {........, ........, 0b0110...., 0b10100011}
193 ASSERT_BYTES_EQ(bitmap, {fill_byte, fill_byte,
194 static_cast<uint8_t>(0x60 | (fill_byte & 0x0f)), 0xa3});
195 }
196 // 0-length writes
197 for (int64_t pos = 0; pos < 32; ++pos) {
198 uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
199 auto writer = internal::BitmapWriter(bitmap, pos, 0);
200 WriteVectorToWriter(writer, {});
201 ASSERT_BYTES_EQ(bitmap, {fill_byte, fill_byte, fill_byte, fill_byte});
202 }
203 }
204}
205
206TEST(BitmapWriter, DoesNotWriteOutOfBounds) {
207 uint8_t bitmap[16] = {0};
208
209 const int length = 128;
210
211 int64_t num_values = 0;
212
213 internal::BitmapWriter r1(bitmap, 0, length);
214
215 // If this were to write out of bounds, valgrind would tell us
216 for (int i = 0; i < length; ++i) {
217 r1.Set();
218 r1.Clear();
219 r1.Next();
220 }
221 r1.Finish();
222 num_values = r1.position();
223
224 ASSERT_EQ(length, num_values);
225
226 internal::BitmapWriter r2(bitmap, 5, length - 5);
227
228 for (int i = 0; i < (length - 5); ++i) {
229 r2.Set();
230 r2.Clear();
231 r2.Next();
232 }
233 r2.Finish();
234 num_values = r2.position();
235
236 ASSERT_EQ((length - 5), num_values);
237}
238
239TEST(FirstTimeBitmapWriter, NormalOperation) {
240 for (const auto fill_byte_int : {0x00, 0xff}) {
241 const uint8_t fill_byte = static_cast<uint8_t>(fill_byte_int);
242 {
243 uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
244 auto writer = internal::FirstTimeBitmapWriter(bitmap, 0, 12);
245 WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1});
246 // {0b00110110, 0b1010, 0, 0}
247 ASSERT_BYTES_EQ(bitmap, {0x36, 0x0a});
248 }
249 {
250 uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
251 auto writer = internal::FirstTimeBitmapWriter(bitmap, 4, 12);
252 WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1});
253 // {0b00110110, 0b1010, 0, 0}
254 ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>(0x60 | (fill_byte & 0x0f)), 0xa3});
255 }
256 // Consecutive write chunks
257 {
258 uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
259 {
260 auto writer = internal::FirstTimeBitmapWriter(bitmap, 0, 6);
261 WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1});
262 }
263 {
264 auto writer = internal::FirstTimeBitmapWriter(bitmap, 6, 3);
265 WriteVectorToWriter(writer, {0, 0, 0});
266 }
267 {
268 auto writer = internal::FirstTimeBitmapWriter(bitmap, 9, 3);
269 WriteVectorToWriter(writer, {1, 0, 1});
270 }
271 ASSERT_BYTES_EQ(bitmap, {0x36, 0x0a});
272 }
273 {
274 uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
275 {
276 auto writer = internal::FirstTimeBitmapWriter(bitmap, 4, 0);
277 WriteVectorToWriter(writer, {});
278 }
279 {
280 auto writer = internal::FirstTimeBitmapWriter(bitmap, 4, 6);
281 WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1});
282 }
283 {
284 auto writer = internal::FirstTimeBitmapWriter(bitmap, 10, 3);
285 WriteVectorToWriter(writer, {0, 0, 0});
286 }
287 {
288 auto writer = internal::FirstTimeBitmapWriter(bitmap, 13, 0);
289 WriteVectorToWriter(writer, {});
290 }
291 {
292 auto writer = internal::FirstTimeBitmapWriter(bitmap, 13, 3);
293 WriteVectorToWriter(writer, {1, 0, 1});
294 }
295 ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>(0x60 | (fill_byte & 0x0f)), 0xa3});
296 }
297 }
298}
299
300// Tests for GenerateBits and GenerateBitsUnrolled
301
302struct GenerateBitsFunctor {
303 template <class Generator>
304 void operator()(uint8_t* bitmap, int64_t start_offset, int64_t length, Generator&& g) {
305 return internal::GenerateBits(bitmap, start_offset, length, g);
306 }
307};
308
309struct GenerateBitsUnrolledFunctor {
310 template <class Generator>
311 void operator()(uint8_t* bitmap, int64_t start_offset, int64_t length, Generator&& g) {
312 return internal::GenerateBitsUnrolled(bitmap, start_offset, length, g);
313 }
314};
315
316template <typename T>
317class TestGenerateBits : public ::testing::Test {};
318
319typedef ::testing::Types<GenerateBitsFunctor, GenerateBitsUnrolledFunctor>
320 GenerateBitsTypes;
321TYPED_TEST_CASE(TestGenerateBits, GenerateBitsTypes);
322
323TYPED_TEST(TestGenerateBits, NormalOperation) {
324 const int kSourceSize = 256;
325 uint8_t source[kSourceSize];
326 random_bytes(kSourceSize, 0, source);
327
328 const int64_t start_offsets[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 21, 31, 32};
329 const int64_t lengths[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 16,
330 17, 21, 31, 32, 100, 201, 202, 203, 204, 205, 206, 207};
331 const uint8_t fill_bytes[] = {0x00, 0xff};
332
333 for (const int64_t start_offset : start_offsets) {
334 for (const int64_t length : lengths) {
335 for (const uint8_t fill_byte : fill_bytes) {
336 uint8_t bitmap[kSourceSize + 1];
337 memset(bitmap, fill_byte, kSourceSize + 1);
338 // First call GenerateBits
339 {
340 int64_t ncalled = 0;
341 internal::BitmapReader reader(source, 0, length);
342 TypeParam()(bitmap, start_offset, length, [&]() -> bool {
343 bool b = reader.IsSet();
344 reader.Next();
345 ++ncalled;
346 return b;
347 });
348 ASSERT_EQ(ncalled, length);
349 }
350 // Then check generated contents
351 {
352 internal::BitmapReader source_reader(source, 0, length);
353 internal::BitmapReader result_reader(bitmap, start_offset, length);
354 for (int64_t i = 0; i < length; ++i) {
355 ASSERT_EQ(source_reader.IsSet(), result_reader.IsSet())
356 << "mismatch at bit #" << i;
357 source_reader.Next();
358 result_reader.Next();
359 }
360 }
361 // Check bits preceding generated contents weren't clobbered
362 {
363 internal::BitmapReader reader_before(bitmap, 0, start_offset);
364 for (int64_t i = 0; i < start_offset; ++i) {
365 ASSERT_EQ(reader_before.IsSet(), fill_byte == 0xff)
366 << "mismatch at preceding bit #" << start_offset - i;
367 }
368 }
369 // Check the byte following generated contents wasn't clobbered
370 auto byte_after = bitmap[BitUtil::CeilDiv(start_offset + length, 8)];
371 ASSERT_EQ(byte_after, fill_byte);
372 }
373 }
374 }
375}
376
377struct BitmapOperation {
378 virtual Status Call(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
379 const uint8_t* right, int64_t right_offset, int64_t length,
380 int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) const = 0;
381
382 virtual ~BitmapOperation() = default;
383};
384
385struct BitmapAndOp : public BitmapOperation {
386 Status Call(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
387 const uint8_t* right, int64_t right_offset, int64_t length,
388 int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) const override {
389 return BitmapAnd(pool, left, left_offset, right, right_offset, length, out_offset,
390 out_buffer);
391 }
392};
393
394struct BitmapOrOp : public BitmapOperation {
395 Status Call(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
396 const uint8_t* right, int64_t right_offset, int64_t length,
397 int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) const override {
398 return BitmapOr(pool, left, left_offset, right, right_offset, length, out_offset,
399 out_buffer);
400 }
401};
402
403struct BitmapXorOp : public BitmapOperation {
404 Status Call(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
405 const uint8_t* right, int64_t right_offset, int64_t length,
406 int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) const override {
407 return BitmapXor(pool, left, left_offset, right, right_offset, length, out_offset,
408 out_buffer);
409 }
410};
411
412class BitmapOp : public TestBase {
413 public:
414 void TestAligned(const BitmapOperation& op, const std::vector<int>& left_bits,
415 const std::vector<int>& right_bits,
416 const std::vector<int>& result_bits) {
417 std::shared_ptr<Buffer> left, right, out;
418 int64_t length;
419
420 for (int64_t left_offset : {0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120}) {
421 BitmapFromVector(left_bits, left_offset, &left, &length);
422 for (int64_t right_offset : {left_offset, left_offset + 8, left_offset + 40}) {
423 BitmapFromVector(right_bits, right_offset, &right, &length);
424 for (int64_t out_offset : {left_offset, left_offset + 16, left_offset + 24}) {
425 ASSERT_OK(op.Call(default_memory_pool(), left->mutable_data(), left_offset,
426 right->mutable_data(), right_offset, length, out_offset,
427 &out));
428 auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
429 ASSERT_READER_VALUES(reader, result_bits);
430 }
431 }
432 }
433 }
434
435 void TestUnaligned(const BitmapOperation& op, const std::vector<int>& left_bits,
436 const std::vector<int>& right_bits,
437 const std::vector<int>& result_bits) {
438 std::shared_ptr<Buffer> left, right, out;
439 int64_t length;
440 auto offset_values = {0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120};
441
442 for (int64_t left_offset : offset_values) {
443 BitmapFromVector(left_bits, left_offset, &left, &length);
444
445 for (int64_t right_offset : offset_values) {
446 BitmapFromVector(right_bits, right_offset, &right, &length);
447
448 for (int64_t out_offset : offset_values) {
449 ASSERT_OK(op.Call(default_memory_pool(), left->mutable_data(), left_offset,
450 right->mutable_data(), right_offset, length, out_offset,
451 &out));
452 auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
453 ASSERT_READER_VALUES(reader, result_bits);
454 }
455 }
456 }
457 }
458};
459
460TEST_F(BitmapOp, And) {
461 BitmapAndOp op;
462 std::vector<int> left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1};
463 std::vector<int> right = {0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0};
464 std::vector<int> result = {0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0};
465
466 TestAligned(op, left, right, result);
467 TestUnaligned(op, left, right, result);
468}
469
470TEST_F(BitmapOp, Or) {
471 BitmapOrOp op;
472 std::vector<int> left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0};
473 std::vector<int> right = {0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0};
474 std::vector<int> result = {0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0};
475
476 TestAligned(op, left, right, result);
477 TestUnaligned(op, left, right, result);
478}
479
480TEST_F(BitmapOp, XorAligned) {
481 BitmapXorOp op;
482 std::vector<int> left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1};
483 std::vector<int> right = {0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0};
484 std::vector<int> result = {0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
485
486 TestAligned(op, left, right, result);
487 TestUnaligned(op, left, right, result);
488}
489
490static inline int64_t SlowCountBits(const uint8_t* data, int64_t bit_offset,
491 int64_t length) {
492 int64_t count = 0;
493 for (int64_t i = bit_offset; i < bit_offset + length; ++i) {
494 if (BitUtil::GetBit(data, i)) {
495 ++count;
496 }
497 }
498 return count;
499}
500
501TEST(BitUtilTests, TestCountSetBits) {
502 const int kBufferSize = 1000;
503 uint8_t buffer[kBufferSize] = {0};
504
505 random_bytes(kBufferSize, 0, buffer);
506
507 const int num_bits = kBufferSize * 8;
508
509 std::vector<int64_t> offsets = {
510 0, 12, 16, 32, 37, 63, 64, 128, num_bits - 30, num_bits - 64};
511 for (int64_t offset : offsets) {
512 int64_t result = CountSetBits(buffer, offset, num_bits - offset);
513 int64_t expected = SlowCountBits(buffer, offset, num_bits - offset);
514
515 ASSERT_EQ(expected, result);
516 }
517}
518
519TEST(BitUtilTests, TestSetBitsTo) {
520 using BitUtil::SetBitsTo;
521 for (const auto fill_byte_int : {0x00, 0xff}) {
522 const uint8_t fill_byte = static_cast<uint8_t>(fill_byte_int);
523 {
524 // test set within a byte
525 uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
526 SetBitsTo(bitmap, 2, 2, true);
527 SetBitsTo(bitmap, 4, 2, false);
528 ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>((fill_byte & ~0x3C) | 0xC)});
529 }
530 {
531 // test straddling a single byte boundary
532 uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
533 SetBitsTo(bitmap, 4, 7, true);
534 SetBitsTo(bitmap, 11, 7, false);
535 ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>((fill_byte & 0xF) | 0xF0), 0x7,
536 static_cast<uint8_t>(fill_byte & ~0x3)});
537 }
538 {
539 // test byte aligned end
540 uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
541 SetBitsTo(bitmap, 4, 4, true);
542 SetBitsTo(bitmap, 8, 8, false);
543 ASSERT_BYTES_EQ(bitmap,
544 {static_cast<uint8_t>((fill_byte & 0xF) | 0xF0), 0x00, fill_byte});
545 }
546 {
547 // test byte aligned end, multiple bytes
548 uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
549 SetBitsTo(bitmap, 0, 24, false);
550 uint8_t false_byte = static_cast<uint8_t>(0);
551 ASSERT_BYTES_EQ(bitmap, {false_byte, false_byte, false_byte, fill_byte});
552 }
553 }
554}
555
556TEST(BitUtilTests, TestCopyBitmap) {
557 const int kBufferSize = 1000;
558
559 std::shared_ptr<Buffer> buffer;
560 ASSERT_OK(AllocateBuffer(kBufferSize, &buffer));
561 memset(buffer->mutable_data(), 0, kBufferSize);
562 random_bytes(kBufferSize, 0, buffer->mutable_data());
563
564 const uint8_t* src = buffer->data();
565
566 std::vector<int64_t> lengths = {kBufferSize * 8 - 4, kBufferSize * 8};
567 std::vector<int64_t> offsets = {0, 12, 16, 32, 37, 63, 64, 128};
568 for (int64_t num_bits : lengths) {
569 for (int64_t offset : offsets) {
570 const int64_t copy_length = num_bits - offset;
571
572 std::shared_ptr<Buffer> copy;
573 ASSERT_OK(CopyBitmap(default_memory_pool(), src, offset, copy_length, &copy));
574
575 for (int64_t i = 0; i < copy_length; ++i) {
576 ASSERT_EQ(BitUtil::GetBit(src, i + offset), BitUtil::GetBit(copy->data(), i));
577 }
578 }
579 }
580}
581
582TEST(BitUtilTests, TestCopyBitmapPreAllocated) {
583 const int kBufferSize = 1000;
584 std::vector<int64_t> lengths = {kBufferSize * 8 - 4, kBufferSize * 8};
585 std::vector<int64_t> offsets = {0, 12, 16, 32, 37, 63, 64, 128};
586
587 std::shared_ptr<Buffer> buffer;
588 ASSERT_OK(AllocateBuffer(kBufferSize, &buffer));
589 memset(buffer->mutable_data(), 0, kBufferSize);
590 random_bytes(kBufferSize, 0, buffer->mutable_data());
591 const uint8_t* src = buffer->data();
592
593 std::shared_ptr<Buffer> other_buffer;
594 // Add 16 byte padding on both sides
595 ASSERT_OK(AllocateBuffer(kBufferSize + 32, &other_buffer));
596 memset(other_buffer->mutable_data(), 0, kBufferSize + 32);
597 random_bytes(kBufferSize + 32, 0, other_buffer->mutable_data());
598 const uint8_t* other = other_buffer->data();
599
600 for (int64_t num_bits : lengths) {
601 for (int64_t offset : offsets) {
602 for (int64_t dest_offset : offsets) {
603 const int64_t copy_length = num_bits - offset;
604
605 std::shared_ptr<Buffer> copy;
606 ASSERT_OK(AllocateBuffer(other_buffer->size(), &copy));
607 memcpy(copy->mutable_data(), other_buffer->data(), other_buffer->size());
608 CopyBitmap(src, offset, copy_length, copy->mutable_data(), dest_offset);
609
610 for (int64_t i = 0; i < dest_offset; ++i) {
611 ASSERT_EQ(BitUtil::GetBit(other, i), BitUtil::GetBit(copy->data(), i));
612 }
613 for (int64_t i = 0; i < copy_length; ++i) {
614 ASSERT_EQ(BitUtil::GetBit(src, i + offset),
615 BitUtil::GetBit(copy->data(), i + dest_offset));
616 }
617 for (int64_t i = dest_offset + copy_length; i < (other_buffer->size() * 8); ++i) {
618 ASSERT_EQ(BitUtil::GetBit(other, i), BitUtil::GetBit(copy->data(), i));
619 }
620 }
621 }
622 }
623}
624
625TEST(BitUtilTests, TestCopyAndInvertBitmapPreAllocated) {
626 const int kBufferSize = 1000;
627 std::vector<int64_t> lengths = {kBufferSize * 8 - 4, kBufferSize * 8};
628 std::vector<int64_t> offsets = {0, 12, 16, 32, 37, 63, 64, 128};
629
630 std::shared_ptr<Buffer> buffer;
631 ASSERT_OK(AllocateBuffer(kBufferSize, &buffer));
632 memset(buffer->mutable_data(), 0, kBufferSize);
633 random_bytes(kBufferSize, 0, buffer->mutable_data());
634 const uint8_t* src = buffer->data();
635
636 std::shared_ptr<Buffer> other_buffer;
637 // Add 16 byte padding on both sides
638 ASSERT_OK(AllocateBuffer(kBufferSize + 32, &other_buffer));
639 memset(other_buffer->mutable_data(), 0, kBufferSize + 32);
640 random_bytes(kBufferSize + 32, 0, other_buffer->mutable_data());
641 const uint8_t* other = other_buffer->data();
642
643 for (int64_t num_bits : lengths) {
644 for (int64_t offset : offsets) {
645 for (int64_t dest_offset : offsets) {
646 const int64_t copy_length = num_bits - offset;
647
648 std::shared_ptr<Buffer> copy;
649 ASSERT_OK(AllocateBuffer(other_buffer->size(), &copy));
650 memcpy(copy->mutable_data(), other_buffer->data(), other_buffer->size());
651 InvertBitmap(src, offset, copy_length, copy->mutable_data(), dest_offset);
652
653 for (int64_t i = 0; i < dest_offset; ++i) {
654 ASSERT_EQ(BitUtil::GetBit(other, i), BitUtil::GetBit(copy->data(), i));
655 }
656 for (int64_t i = 0; i < copy_length; ++i) {
657 ASSERT_EQ(BitUtil::GetBit(src, i + offset),
658 !BitUtil::GetBit(copy->data(), i + dest_offset));
659 }
660 for (int64_t i = dest_offset + copy_length; i < (other_buffer->size() * 8); ++i) {
661 ASSERT_EQ(BitUtil::GetBit(other, i), BitUtil::GetBit(copy->data(), i));
662 }
663 }
664 }
665 }
666}
667
668TEST(BitUtil, CeilDiv) {
669 EXPECT_EQ(BitUtil::CeilDiv(0, 1), 0);
670 EXPECT_EQ(BitUtil::CeilDiv(1, 1), 1);
671 EXPECT_EQ(BitUtil::CeilDiv(1, 2), 1);
672 EXPECT_EQ(BitUtil::CeilDiv(1, 8), 1);
673 EXPECT_EQ(BitUtil::CeilDiv(7, 8), 1);
674 EXPECT_EQ(BitUtil::CeilDiv(8, 8), 1);
675 EXPECT_EQ(BitUtil::CeilDiv(9, 8), 2);
676 EXPECT_EQ(BitUtil::CeilDiv(9, 9), 1);
677 EXPECT_EQ(BitUtil::CeilDiv(10000000000, 10), 1000000000);
678 EXPECT_EQ(BitUtil::CeilDiv(10, 10000000000), 1);
679 EXPECT_EQ(BitUtil::CeilDiv(100000000000, 10000000000), 10);
680}
681
682TEST(BitUtil, RoundUp) {
683 EXPECT_EQ(BitUtil::RoundUp(0, 1), 0);
684 EXPECT_EQ(BitUtil::RoundUp(1, 1), 1);
685 EXPECT_EQ(BitUtil::RoundUp(1, 2), 2);
686 EXPECT_EQ(BitUtil::RoundUp(6, 2), 6);
687 EXPECT_EQ(BitUtil::RoundUp(7, 3), 9);
688 EXPECT_EQ(BitUtil::RoundUp(9, 9), 9);
689 EXPECT_EQ(BitUtil::RoundUp(10000000001, 10), 10000000010);
690 EXPECT_EQ(BitUtil::RoundUp(10, 10000000000), 10000000000);
691 EXPECT_EQ(BitUtil::RoundUp(100000000000, 10000000000), 100000000000);
692}
693
694TEST(BitUtil, TrailingBits) {
695 EXPECT_EQ(BitUtil::TrailingBits(BOOST_BINARY(1 1 1 1 1 1 1 1), 0), 0);
696 EXPECT_EQ(BitUtil::TrailingBits(BOOST_BINARY(1 1 1 1 1 1 1 1), 1), 1);
697 EXPECT_EQ(BitUtil::TrailingBits(BOOST_BINARY(1 1 1 1 1 1 1 1), 64),
698 BOOST_BINARY(1 1 1 1 1 1 1 1));
699 EXPECT_EQ(BitUtil::TrailingBits(BOOST_BINARY(1 1 1 1 1 1 1 1), 100),
700 BOOST_BINARY(1 1 1 1 1 1 1 1));
701 EXPECT_EQ(BitUtil::TrailingBits(0, 1), 0);
702 EXPECT_EQ(BitUtil::TrailingBits(0, 64), 0);
703 EXPECT_EQ(BitUtil::TrailingBits(1LL << 63, 0), 0);
704 EXPECT_EQ(BitUtil::TrailingBits(1LL << 63, 63), 0);
705 EXPECT_EQ(BitUtil::TrailingBits(1LL << 63, 64), 1LL << 63);
706}
707
708TEST(BitUtil, ByteSwap) {
709 EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint32_t>(0)), 0);
710 EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint32_t>(0x11223344)), 0x44332211);
711
712 EXPECT_EQ(BitUtil::ByteSwap(static_cast<int32_t>(0)), 0);
713 EXPECT_EQ(BitUtil::ByteSwap(static_cast<int32_t>(0x11223344)), 0x44332211);
714
715 EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint64_t>(0)), 0);
716 EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint64_t>(0x1122334455667788)),
717 0x8877665544332211);
718
719 EXPECT_EQ(BitUtil::ByteSwap(static_cast<int64_t>(0)), 0);
720 EXPECT_EQ(BitUtil::ByteSwap(static_cast<int64_t>(0x1122334455667788)),
721 0x8877665544332211);
722
723 EXPECT_EQ(BitUtil::ByteSwap(static_cast<int16_t>(0)), 0);
724 EXPECT_EQ(BitUtil::ByteSwap(static_cast<int16_t>(0x1122)), 0x2211);
725
726 EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint16_t>(0)), 0);
727 EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint16_t>(0x1122)), 0x2211);
728}
729
730TEST(BitUtil, Log2) {
731 EXPECT_EQ(BitUtil::Log2(1), 0);
732 EXPECT_EQ(BitUtil::Log2(2), 1);
733 EXPECT_EQ(BitUtil::Log2(3), 2);
734 EXPECT_EQ(BitUtil::Log2(4), 2);
735 EXPECT_EQ(BitUtil::Log2(5), 3);
736 EXPECT_EQ(BitUtil::Log2(8), 3);
737 EXPECT_EQ(BitUtil::Log2(9), 4);
738 EXPECT_EQ(BitUtil::Log2(INT_MAX), 31);
739 EXPECT_EQ(BitUtil::Log2(UINT_MAX), 32);
740 EXPECT_EQ(BitUtil::Log2(ULLONG_MAX), 64);
741}
742
743TEST(BitUtil, NumRequiredBits) {
744 EXPECT_EQ(BitUtil::NumRequiredBits(0), 0);
745 EXPECT_EQ(BitUtil::NumRequiredBits(1), 1);
746 EXPECT_EQ(BitUtil::NumRequiredBits(2), 2);
747 EXPECT_EQ(BitUtil::NumRequiredBits(3), 2);
748 EXPECT_EQ(BitUtil::NumRequiredBits(4), 3);
749 EXPECT_EQ(BitUtil::NumRequiredBits(5), 3);
750 EXPECT_EQ(BitUtil::NumRequiredBits(7), 3);
751 EXPECT_EQ(BitUtil::NumRequiredBits(8), 4);
752 EXPECT_EQ(BitUtil::NumRequiredBits(9), 4);
753 EXPECT_EQ(BitUtil::NumRequiredBits(UINT_MAX - 1), 32);
754 EXPECT_EQ(BitUtil::NumRequiredBits(UINT_MAX), 32);
755 EXPECT_EQ(BitUtil::NumRequiredBits(static_cast<uint64_t>(UINT_MAX) + 1), 33);
756 EXPECT_EQ(BitUtil::NumRequiredBits(ULLONG_MAX / 2), 63);
757 EXPECT_EQ(BitUtil::NumRequiredBits(ULLONG_MAX / 2 + 1), 64);
758 EXPECT_EQ(BitUtil::NumRequiredBits(ULLONG_MAX - 1), 64);
759 EXPECT_EQ(BitUtil::NumRequiredBits(ULLONG_MAX), 64);
760}
761
762#define U32(x) static_cast<uint32_t>(x)
763#define U64(x) static_cast<uint64_t>(x)
764
765TEST(BitUtil, CountLeadingZeros) {
766 EXPECT_EQ(BitUtil::CountLeadingZeros(U32(0)), 32);
767 EXPECT_EQ(BitUtil::CountLeadingZeros(U32(1)), 31);
768 EXPECT_EQ(BitUtil::CountLeadingZeros(U32(2)), 30);
769 EXPECT_EQ(BitUtil::CountLeadingZeros(U32(3)), 30);
770 EXPECT_EQ(BitUtil::CountLeadingZeros(U32(4)), 29);
771 EXPECT_EQ(BitUtil::CountLeadingZeros(U32(7)), 29);
772 EXPECT_EQ(BitUtil::CountLeadingZeros(U32(8)), 28);
773 EXPECT_EQ(BitUtil::CountLeadingZeros(U32(UINT_MAX / 2)), 1);
774 EXPECT_EQ(BitUtil::CountLeadingZeros(U32(UINT_MAX / 2 + 1)), 0);
775 EXPECT_EQ(BitUtil::CountLeadingZeros(U32(UINT_MAX)), 0);
776
777 EXPECT_EQ(BitUtil::CountLeadingZeros(U64(0)), 64);
778 EXPECT_EQ(BitUtil::CountLeadingZeros(U64(1)), 63);
779 EXPECT_EQ(BitUtil::CountLeadingZeros(U64(2)), 62);
780 EXPECT_EQ(BitUtil::CountLeadingZeros(U64(3)), 62);
781 EXPECT_EQ(BitUtil::CountLeadingZeros(U64(4)), 61);
782 EXPECT_EQ(BitUtil::CountLeadingZeros(U64(7)), 61);
783 EXPECT_EQ(BitUtil::CountLeadingZeros(U64(8)), 60);
784 EXPECT_EQ(BitUtil::CountLeadingZeros(U64(UINT_MAX)), 32);
785 EXPECT_EQ(BitUtil::CountLeadingZeros(U64(UINT_MAX) + 1), 31);
786 EXPECT_EQ(BitUtil::CountLeadingZeros(U64(ULLONG_MAX / 2)), 1);
787 EXPECT_EQ(BitUtil::CountLeadingZeros(U64(ULLONG_MAX / 2 + 1)), 0);
788 EXPECT_EQ(BitUtil::CountLeadingZeros(U64(ULLONG_MAX)), 0);
789}
790
791#undef U32
792#undef U64
793
794TEST(BitUtil, RoundUpToPowerOf2) {
795 EXPECT_EQ(BitUtil::RoundUpToPowerOf2(7, 8), 8);
796 EXPECT_EQ(BitUtil::RoundUpToPowerOf2(8, 8), 8);
797 EXPECT_EQ(BitUtil::RoundUpToPowerOf2(9, 8), 16);
798}
799
800static void TestZigZag(int32_t v) {
801 uint8_t buffer[BitUtil::BitReader::MAX_VLQ_BYTE_LEN] = {};
802 BitUtil::BitWriter writer(buffer, sizeof(buffer));
803 BitUtil::BitReader reader(buffer, sizeof(buffer));
804 writer.PutZigZagVlqInt(v);
805 int32_t result;
806 EXPECT_TRUE(reader.GetZigZagVlqInt(&result));
807 EXPECT_EQ(v, result);
808}
809
810TEST(BitStreamUtil, ZigZag) {
811 TestZigZag(0);
812 TestZigZag(1);
813 TestZigZag(1234);
814 TestZigZag(-1);
815 TestZigZag(-1234);
816 TestZigZag(std::numeric_limits<int32_t>::max());
817 TestZigZag(-std::numeric_limits<int32_t>::max());
818}
819
820TEST(BitUtil, RoundTripLittleEndianTest) {
821 uint64_t value = 0xFF;
822
823#if ARROW_LITTLE_ENDIAN
824 uint64_t expected = value;
825#else
826 uint64_t expected = std::numeric_limits<uint64_t>::max() << 56;
827#endif
828
829 uint64_t little_endian_result = BitUtil::ToLittleEndian(value);
830 ASSERT_EQ(expected, little_endian_result);
831
832 uint64_t from_little_endian = BitUtil::FromLittleEndian(little_endian_result);
833 ASSERT_EQ(value, from_little_endian);
834}
835
836TEST(BitUtil, RoundTripBigEndianTest) {
837 uint64_t value = 0xFF;
838
839#if ARROW_LITTLE_ENDIAN
840 uint64_t expected = std::numeric_limits<uint64_t>::max() << 56;
841#else
842 uint64_t expected = value;
843#endif
844
845 uint64_t big_endian_result = BitUtil::ToBigEndian(value);
846 ASSERT_EQ(expected, big_endian_result);
847
848 uint64_t from_big_endian = BitUtil::FromBigEndian(big_endian_result);
849 ASSERT_EQ(value, from_big_endian);
850}
851
852} // namespace arrow
853