1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// Alias MSVC popcount to GCC name
19#ifdef _MSC_VER
20#include <intrin.h>
21#define __builtin_popcount __popcnt
22#include <nmmintrin.h>
23#define __builtin_popcountll _mm_popcnt_u64
24#endif
25
26#include <algorithm>
27#include <cstdint>
28#include <cstring>
29#include <functional>
30#include <memory>
31#include <vector>
32
33#include "arrow/buffer.h"
34#include "arrow/status.h"
35#include "arrow/util/bit-util.h"
36#include "arrow/util/logging.h"
37
38namespace arrow {
39
40class MemoryPool;
41
42namespace BitUtil {
43namespace {
44
45void FillBitsFromBytes(const std::vector<uint8_t>& bytes, uint8_t* bits) {
46 for (size_t i = 0; i < bytes.size(); ++i) {
47 if (bytes[i] > 0) {
48 SetBit(bits, i);
49 }
50 }
51}
52
53} // namespace
54
55Status BytesToBits(const std::vector<uint8_t>& bytes, MemoryPool* pool,
56 std::shared_ptr<Buffer>* out) {
57 int64_t bit_length = BytesForBits(bytes.size());
58
59 std::shared_ptr<Buffer> buffer;
60 RETURN_NOT_OK(AllocateBuffer(pool, bit_length, &buffer));
61 uint8_t* out_buf = buffer->mutable_data();
62 memset(out_buf, 0, static_cast<size_t>(buffer->capacity()));
63 FillBitsFromBytes(bytes, out_buf);
64
65 *out = buffer;
66 return Status::OK();
67}
68
69} // namespace BitUtil
70
71namespace internal {
72
73int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length) {
74 constexpr int64_t pop_len = sizeof(uint64_t) * 8;
75
76 int64_t count = 0;
77
78 // The first bit offset where we can use a 64-bit wide hardware popcount
79 const int64_t fast_count_start = BitUtil::RoundUp(bit_offset, pop_len);
80
81 // The number of bits until fast_count_start
82 const int64_t initial_bits = std::min(length, fast_count_start - bit_offset);
83 for (int64_t i = bit_offset; i < bit_offset + initial_bits; ++i) {
84 if (BitUtil::GetBit(data, i)) {
85 ++count;
86 }
87 }
88
89 const int64_t fast_counts = (length - initial_bits) / pop_len;
90
91 // Advance until the first aligned 8-byte word after the initial bits
92 const uint64_t* u64_data =
93 reinterpret_cast<const uint64_t*>(data) + fast_count_start / pop_len;
94
95 const uint64_t* end = u64_data + fast_counts;
96
97 // popcount as much as possible with the widest possible count
98 for (auto iter = u64_data; iter < end; ++iter) {
99 count += __builtin_popcountll(*iter);
100 }
101
102 // Account for left over bit (in theory we could fall back to smaller
103 // versions of popcount but the code complexity is likely not worth it)
104 const int64_t tail_index = bit_offset + initial_bits + fast_counts * pop_len;
105 for (int64_t i = tail_index; i < bit_offset + length; ++i) {
106 if (BitUtil::GetBit(data, i)) {
107 ++count;
108 }
109 }
110
111 return count;
112}
113
114template <bool invert_bits, bool restore_trailing_bits>
115void TransferBitmap(const uint8_t* data, int64_t offset, int64_t length,
116 int64_t dest_offset, uint8_t* dest) {
117 int64_t byte_offset = offset / 8;
118 int64_t bit_offset = offset % 8;
119 int64_t dest_byte_offset = dest_offset / 8;
120 int64_t dest_bit_offset = dest_offset % 8;
121 int64_t num_bytes = BitUtil::BytesForBits(length);
122 // Shift dest by its byte offset
123 dest += dest_byte_offset;
124
125 if (dest_bit_offset > 0) {
126 internal::BitmapReader valid_reader(data, offset, length);
127 internal::BitmapWriter valid_writer(dest, dest_bit_offset, length);
128
129 for (int64_t i = 0; i < length; i++) {
130 if (invert_bits ^ valid_reader.IsSet()) {
131 valid_writer.Set();
132 } else {
133 valid_writer.Clear();
134 }
135 valid_reader.Next();
136 valid_writer.Next();
137 }
138 valid_writer.Finish();
139 } else {
140 // Take care of the trailing bits in the last byte
141 int64_t trailing_bits = num_bytes * 8 - length;
142 uint8_t trail = 0;
143 if (trailing_bits && restore_trailing_bits) {
144 trail = dest[num_bytes - 1];
145 }
146
147 if (bit_offset > 0) {
148 uint8_t carry_mask = BitUtil::kPrecedingBitmask[bit_offset];
149 uint8_t carry_shift = static_cast<uint8_t>(8U - static_cast<uint8_t>(bit_offset));
150
151 uint8_t carry = 0U;
152 if (BitUtil::BytesForBits(length + bit_offset) > num_bytes) {
153 carry = static_cast<uint8_t>((data[byte_offset + num_bytes] & carry_mask)
154 << carry_shift);
155 }
156
157 int64_t i = num_bytes - 1;
158 while (i + 1 > 0) {
159 uint8_t cur_byte = data[byte_offset + i];
160 if (invert_bits) {
161 dest[i] = static_cast<uint8_t>(~((cur_byte >> bit_offset) | carry));
162 } else {
163 dest[i] = static_cast<uint8_t>((cur_byte >> bit_offset) | carry);
164 }
165 carry = static_cast<uint8_t>((cur_byte & carry_mask) << carry_shift);
166 --i;
167 }
168 } else {
169 if (invert_bits) {
170 for (int64_t i = 0; i < num_bytes; i++) {
171 dest[i] = static_cast<uint8_t>(~(data[byte_offset + i]));
172 }
173 } else {
174 std::memcpy(dest, data + byte_offset, static_cast<size_t>(num_bytes));
175 }
176 }
177
178 if (restore_trailing_bits) {
179 for (int i = 0; i < trailing_bits; i++) {
180 if (BitUtil::GetBit(&trail, i + 8 - trailing_bits)) {
181 BitUtil::SetBit(dest, length + i);
182 } else {
183 BitUtil::ClearBit(dest, length + i);
184 }
185 }
186 }
187 }
188}
189
190template <bool invert_bits>
191Status TransferBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset,
192 int64_t length, std::shared_ptr<Buffer>* out) {
193 std::shared_ptr<Buffer> buffer;
194 RETURN_NOT_OK(AllocateEmptyBitmap(pool, length, &buffer));
195 uint8_t* dest = buffer->mutable_data();
196
197 TransferBitmap<invert_bits, false>(data, offset, length, 0, dest);
198
199 // As we have freshly allocated this bitmap, we should take care of zeroing the
200 // remaining bits.
201 int64_t num_bytes = BitUtil::BytesForBits(length);
202 int64_t bits_to_zero = num_bytes * 8 - length;
203 for (int64_t i = length; i < length + bits_to_zero; ++i) {
204 // Both branches may copy extra bits - unsetting to match specification.
205 BitUtil::ClearBit(dest, i);
206 }
207
208 *out = buffer;
209 return Status::OK();
210}
211
212void CopyBitmap(const uint8_t* data, int64_t offset, int64_t length, uint8_t* dest,
213 int64_t dest_offset) {
214 TransferBitmap<false, true>(data, offset, length, dest_offset, dest);
215}
216
217void InvertBitmap(const uint8_t* data, int64_t offset, int64_t length, uint8_t* dest,
218 int64_t dest_offset) {
219 TransferBitmap<true, true>(data, offset, length, dest_offset, dest);
220}
221
222Status CopyBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset, int64_t length,
223 std::shared_ptr<Buffer>* out) {
224 return TransferBitmap<false>(pool, data, offset, length, out);
225}
226
227Status InvertBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset, int64_t length,
228 std::shared_ptr<Buffer>* out) {
229 return TransferBitmap<true>(pool, data, offset, length, out);
230}
231
232bool BitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right,
233 int64_t right_offset, int64_t bit_length) {
234 if (left_offset % 8 == 0 && right_offset % 8 == 0) {
235 // byte aligned, can use memcmp
236 bool bytes_equal = std::memcmp(left + left_offset / 8, right + right_offset / 8,
237 bit_length / 8) == 0;
238 if (!bytes_equal) {
239 return false;
240 }
241 for (int64_t i = (bit_length / 8) * 8; i < bit_length; ++i) {
242 if (BitUtil::GetBit(left, left_offset + i) !=
243 BitUtil::GetBit(right, right_offset + i)) {
244 return false;
245 }
246 }
247 return true;
248 }
249
250 // Unaligned slow case
251 for (int64_t i = 0; i < bit_length; ++i) {
252 if (BitUtil::GetBit(left, left_offset + i) !=
253 BitUtil::GetBit(right, right_offset + i)) {
254 return false;
255 }
256 }
257 return true;
258}
259
260namespace {
261
262template <typename Op>
263void AlignedBitmapOp(const uint8_t* left, int64_t left_offset, const uint8_t* right,
264 int64_t right_offset, uint8_t* out, int64_t out_offset,
265 int64_t length) {
266 Op op;
267 DCHECK_EQ(left_offset % 8, right_offset % 8);
268 DCHECK_EQ(left_offset % 8, out_offset % 8);
269
270 const int64_t nbytes = BitUtil::BytesForBits(length + left_offset);
271 left += left_offset / 8;
272 right += right_offset / 8;
273 out += out_offset / 8;
274 for (int64_t i = 0; i < nbytes; ++i) {
275 out[i] = op(left[i], right[i]);
276 }
277}
278
279template <typename Op>
280void UnalignedBitmapOp(const uint8_t* left, int64_t left_offset, const uint8_t* right,
281 int64_t right_offset, uint8_t* out, int64_t out_offset,
282 int64_t length) {
283 Op op;
284 auto left_reader = internal::BitmapReader(left, left_offset, length);
285 auto right_reader = internal::BitmapReader(right, right_offset, length);
286 auto writer = internal::BitmapWriter(out, out_offset, length);
287 for (int64_t i = 0; i < length; ++i) {
288 if (op(left_reader.IsSet(), right_reader.IsSet())) {
289 writer.Set();
290 }
291 left_reader.Next();
292 right_reader.Next();
293 writer.Next();
294 }
295 writer.Finish();
296}
297
298template <typename BitOp, typename LogicalOp>
299Status BitmapOp(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
300 const uint8_t* right, int64_t right_offset, int64_t length,
301 int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) {
302 if ((out_offset % 8 == left_offset % 8) && (out_offset % 8 == right_offset % 8)) {
303 // Fast case: can use bytewise AND
304 const int64_t phys_bits = length + out_offset;
305 RETURN_NOT_OK(AllocateEmptyBitmap(pool, phys_bits, out_buffer));
306 AlignedBitmapOp<BitOp>(left, left_offset, right, right_offset,
307 (*out_buffer)->mutable_data(), out_offset, length);
308 } else {
309 // Unaligned
310 RETURN_NOT_OK(AllocateEmptyBitmap(pool, length + out_offset, out_buffer));
311 UnalignedBitmapOp<LogicalOp>(left, left_offset, right, right_offset,
312 (*out_buffer)->mutable_data(), out_offset, length);
313 }
314 return Status::OK();
315}
316
317} // namespace
318
319Status BitmapAnd(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
320 const uint8_t* right, int64_t right_offset, int64_t length,
321 int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) {
322 return BitmapOp<std::bit_and<uint8_t>, std::logical_and<bool>>(
323 pool, left, left_offset, right, right_offset, length, out_offset, out_buffer);
324}
325
326Status BitmapOr(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
327 const uint8_t* right, int64_t right_offset, int64_t length,
328 int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) {
329 return BitmapOp<std::bit_or<uint8_t>, std::logical_or<bool>>(
330 pool, left, left_offset, right, right_offset, length, out_offset, out_buffer);
331}
332
333Status BitmapXor(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
334 const uint8_t* right, int64_t right_offset, int64_t length,
335 int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) {
336 return BitmapOp<std::bit_xor<uint8_t>, std::bit_xor<bool>>(
337 pool, left, left_offset, right, right_offset, length, out_offset, out_buffer);
338}
339
340} // namespace internal
341} // namespace arrow
342