1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | // Alias MSVC popcount to GCC name |
19 | #ifdef _MSC_VER |
20 | #include <intrin.h> |
21 | #define __builtin_popcount __popcnt |
22 | #include <nmmintrin.h> |
23 | #define __builtin_popcountll _mm_popcnt_u64 |
24 | #endif |
25 | |
26 | #include <algorithm> |
27 | #include <cstdint> |
28 | #include <cstring> |
29 | #include <functional> |
30 | #include <memory> |
31 | #include <vector> |
32 | |
33 | #include "arrow/buffer.h" |
34 | #include "arrow/status.h" |
35 | #include "arrow/util/bit-util.h" |
36 | #include "arrow/util/logging.h" |
37 | |
38 | namespace arrow { |
39 | |
40 | class MemoryPool; |
41 | |
42 | namespace BitUtil { |
43 | namespace { |
44 | |
45 | void FillBitsFromBytes(const std::vector<uint8_t>& bytes, uint8_t* bits) { |
46 | for (size_t i = 0; i < bytes.size(); ++i) { |
47 | if (bytes[i] > 0) { |
48 | SetBit(bits, i); |
49 | } |
50 | } |
51 | } |
52 | |
53 | } // namespace |
54 | |
55 | Status BytesToBits(const std::vector<uint8_t>& bytes, MemoryPool* pool, |
56 | std::shared_ptr<Buffer>* out) { |
57 | int64_t bit_length = BytesForBits(bytes.size()); |
58 | |
59 | std::shared_ptr<Buffer> buffer; |
60 | RETURN_NOT_OK(AllocateBuffer(pool, bit_length, &buffer)); |
61 | uint8_t* out_buf = buffer->mutable_data(); |
62 | memset(out_buf, 0, static_cast<size_t>(buffer->capacity())); |
63 | FillBitsFromBytes(bytes, out_buf); |
64 | |
65 | *out = buffer; |
66 | return Status::OK(); |
67 | } |
68 | |
69 | } // namespace BitUtil |
70 | |
71 | namespace internal { |
72 | |
73 | int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length) { |
74 | constexpr int64_t pop_len = sizeof(uint64_t) * 8; |
75 | |
76 | int64_t count = 0; |
77 | |
78 | // The first bit offset where we can use a 64-bit wide hardware popcount |
79 | const int64_t fast_count_start = BitUtil::RoundUp(bit_offset, pop_len); |
80 | |
81 | // The number of bits until fast_count_start |
82 | const int64_t initial_bits = std::min(length, fast_count_start - bit_offset); |
83 | for (int64_t i = bit_offset; i < bit_offset + initial_bits; ++i) { |
84 | if (BitUtil::GetBit(data, i)) { |
85 | ++count; |
86 | } |
87 | } |
88 | |
89 | const int64_t fast_counts = (length - initial_bits) / pop_len; |
90 | |
91 | // Advance until the first aligned 8-byte word after the initial bits |
92 | const uint64_t* u64_data = |
93 | reinterpret_cast<const uint64_t*>(data) + fast_count_start / pop_len; |
94 | |
95 | const uint64_t* end = u64_data + fast_counts; |
96 | |
97 | // popcount as much as possible with the widest possible count |
98 | for (auto iter = u64_data; iter < end; ++iter) { |
99 | count += __builtin_popcountll(*iter); |
100 | } |
101 | |
102 | // Account for left over bit (in theory we could fall back to smaller |
103 | // versions of popcount but the code complexity is likely not worth it) |
104 | const int64_t tail_index = bit_offset + initial_bits + fast_counts * pop_len; |
105 | for (int64_t i = tail_index; i < bit_offset + length; ++i) { |
106 | if (BitUtil::GetBit(data, i)) { |
107 | ++count; |
108 | } |
109 | } |
110 | |
111 | return count; |
112 | } |
113 | |
114 | template <bool invert_bits, bool restore_trailing_bits> |
115 | void TransferBitmap(const uint8_t* data, int64_t offset, int64_t length, |
116 | int64_t dest_offset, uint8_t* dest) { |
117 | int64_t byte_offset = offset / 8; |
118 | int64_t bit_offset = offset % 8; |
119 | int64_t dest_byte_offset = dest_offset / 8; |
120 | int64_t dest_bit_offset = dest_offset % 8; |
121 | int64_t num_bytes = BitUtil::BytesForBits(length); |
122 | // Shift dest by its byte offset |
123 | dest += dest_byte_offset; |
124 | |
125 | if (dest_bit_offset > 0) { |
126 | internal::BitmapReader valid_reader(data, offset, length); |
127 | internal::BitmapWriter valid_writer(dest, dest_bit_offset, length); |
128 | |
129 | for (int64_t i = 0; i < length; i++) { |
130 | if (invert_bits ^ valid_reader.IsSet()) { |
131 | valid_writer.Set(); |
132 | } else { |
133 | valid_writer.Clear(); |
134 | } |
135 | valid_reader.Next(); |
136 | valid_writer.Next(); |
137 | } |
138 | valid_writer.Finish(); |
139 | } else { |
140 | // Take care of the trailing bits in the last byte |
141 | int64_t trailing_bits = num_bytes * 8 - length; |
142 | uint8_t trail = 0; |
143 | if (trailing_bits && restore_trailing_bits) { |
144 | trail = dest[num_bytes - 1]; |
145 | } |
146 | |
147 | if (bit_offset > 0) { |
148 | uint8_t carry_mask = BitUtil::kPrecedingBitmask[bit_offset]; |
149 | uint8_t carry_shift = static_cast<uint8_t>(8U - static_cast<uint8_t>(bit_offset)); |
150 | |
151 | uint8_t carry = 0U; |
152 | if (BitUtil::BytesForBits(length + bit_offset) > num_bytes) { |
153 | carry = static_cast<uint8_t>((data[byte_offset + num_bytes] & carry_mask) |
154 | << carry_shift); |
155 | } |
156 | |
157 | int64_t i = num_bytes - 1; |
158 | while (i + 1 > 0) { |
159 | uint8_t cur_byte = data[byte_offset + i]; |
160 | if (invert_bits) { |
161 | dest[i] = static_cast<uint8_t>(~((cur_byte >> bit_offset) | carry)); |
162 | } else { |
163 | dest[i] = static_cast<uint8_t>((cur_byte >> bit_offset) | carry); |
164 | } |
165 | carry = static_cast<uint8_t>((cur_byte & carry_mask) << carry_shift); |
166 | --i; |
167 | } |
168 | } else { |
169 | if (invert_bits) { |
170 | for (int64_t i = 0; i < num_bytes; i++) { |
171 | dest[i] = static_cast<uint8_t>(~(data[byte_offset + i])); |
172 | } |
173 | } else { |
174 | std::memcpy(dest, data + byte_offset, static_cast<size_t>(num_bytes)); |
175 | } |
176 | } |
177 | |
178 | if (restore_trailing_bits) { |
179 | for (int i = 0; i < trailing_bits; i++) { |
180 | if (BitUtil::GetBit(&trail, i + 8 - trailing_bits)) { |
181 | BitUtil::SetBit(dest, length + i); |
182 | } else { |
183 | BitUtil::ClearBit(dest, length + i); |
184 | } |
185 | } |
186 | } |
187 | } |
188 | } |
189 | |
190 | template <bool invert_bits> |
191 | Status TransferBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset, |
192 | int64_t length, std::shared_ptr<Buffer>* out) { |
193 | std::shared_ptr<Buffer> buffer; |
194 | RETURN_NOT_OK(AllocateEmptyBitmap(pool, length, &buffer)); |
195 | uint8_t* dest = buffer->mutable_data(); |
196 | |
197 | TransferBitmap<invert_bits, false>(data, offset, length, 0, dest); |
198 | |
199 | // As we have freshly allocated this bitmap, we should take care of zeroing the |
200 | // remaining bits. |
201 | int64_t num_bytes = BitUtil::BytesForBits(length); |
202 | int64_t bits_to_zero = num_bytes * 8 - length; |
203 | for (int64_t i = length; i < length + bits_to_zero; ++i) { |
204 | // Both branches may copy extra bits - unsetting to match specification. |
205 | BitUtil::ClearBit(dest, i); |
206 | } |
207 | |
208 | *out = buffer; |
209 | return Status::OK(); |
210 | } |
211 | |
212 | void CopyBitmap(const uint8_t* data, int64_t offset, int64_t length, uint8_t* dest, |
213 | int64_t dest_offset) { |
214 | TransferBitmap<false, true>(data, offset, length, dest_offset, dest); |
215 | } |
216 | |
217 | void InvertBitmap(const uint8_t* data, int64_t offset, int64_t length, uint8_t* dest, |
218 | int64_t dest_offset) { |
219 | TransferBitmap<true, true>(data, offset, length, dest_offset, dest); |
220 | } |
221 | |
222 | Status CopyBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset, int64_t length, |
223 | std::shared_ptr<Buffer>* out) { |
224 | return TransferBitmap<false>(pool, data, offset, length, out); |
225 | } |
226 | |
227 | Status InvertBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset, int64_t length, |
228 | std::shared_ptr<Buffer>* out) { |
229 | return TransferBitmap<true>(pool, data, offset, length, out); |
230 | } |
231 | |
232 | bool BitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right, |
233 | int64_t right_offset, int64_t bit_length) { |
234 | if (left_offset % 8 == 0 && right_offset % 8 == 0) { |
235 | // byte aligned, can use memcmp |
236 | bool bytes_equal = std::memcmp(left + left_offset / 8, right + right_offset / 8, |
237 | bit_length / 8) == 0; |
238 | if (!bytes_equal) { |
239 | return false; |
240 | } |
241 | for (int64_t i = (bit_length / 8) * 8; i < bit_length; ++i) { |
242 | if (BitUtil::GetBit(left, left_offset + i) != |
243 | BitUtil::GetBit(right, right_offset + i)) { |
244 | return false; |
245 | } |
246 | } |
247 | return true; |
248 | } |
249 | |
250 | // Unaligned slow case |
251 | for (int64_t i = 0; i < bit_length; ++i) { |
252 | if (BitUtil::GetBit(left, left_offset + i) != |
253 | BitUtil::GetBit(right, right_offset + i)) { |
254 | return false; |
255 | } |
256 | } |
257 | return true; |
258 | } |
259 | |
260 | namespace { |
261 | |
262 | template <typename Op> |
263 | void AlignedBitmapOp(const uint8_t* left, int64_t left_offset, const uint8_t* right, |
264 | int64_t right_offset, uint8_t* out, int64_t out_offset, |
265 | int64_t length) { |
266 | Op op; |
267 | DCHECK_EQ(left_offset % 8, right_offset % 8); |
268 | DCHECK_EQ(left_offset % 8, out_offset % 8); |
269 | |
270 | const int64_t nbytes = BitUtil::BytesForBits(length + left_offset); |
271 | left += left_offset / 8; |
272 | right += right_offset / 8; |
273 | out += out_offset / 8; |
274 | for (int64_t i = 0; i < nbytes; ++i) { |
275 | out[i] = op(left[i], right[i]); |
276 | } |
277 | } |
278 | |
279 | template <typename Op> |
280 | void UnalignedBitmapOp(const uint8_t* left, int64_t left_offset, const uint8_t* right, |
281 | int64_t right_offset, uint8_t* out, int64_t out_offset, |
282 | int64_t length) { |
283 | Op op; |
284 | auto left_reader = internal::BitmapReader(left, left_offset, length); |
285 | auto right_reader = internal::BitmapReader(right, right_offset, length); |
286 | auto writer = internal::BitmapWriter(out, out_offset, length); |
287 | for (int64_t i = 0; i < length; ++i) { |
288 | if (op(left_reader.IsSet(), right_reader.IsSet())) { |
289 | writer.Set(); |
290 | } |
291 | left_reader.Next(); |
292 | right_reader.Next(); |
293 | writer.Next(); |
294 | } |
295 | writer.Finish(); |
296 | } |
297 | |
298 | template <typename BitOp, typename LogicalOp> |
299 | Status BitmapOp(MemoryPool* pool, const uint8_t* left, int64_t left_offset, |
300 | const uint8_t* right, int64_t right_offset, int64_t length, |
301 | int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) { |
302 | if ((out_offset % 8 == left_offset % 8) && (out_offset % 8 == right_offset % 8)) { |
303 | // Fast case: can use bytewise AND |
304 | const int64_t phys_bits = length + out_offset; |
305 | RETURN_NOT_OK(AllocateEmptyBitmap(pool, phys_bits, out_buffer)); |
306 | AlignedBitmapOp<BitOp>(left, left_offset, right, right_offset, |
307 | (*out_buffer)->mutable_data(), out_offset, length); |
308 | } else { |
309 | // Unaligned |
310 | RETURN_NOT_OK(AllocateEmptyBitmap(pool, length + out_offset, out_buffer)); |
311 | UnalignedBitmapOp<LogicalOp>(left, left_offset, right, right_offset, |
312 | (*out_buffer)->mutable_data(), out_offset, length); |
313 | } |
314 | return Status::OK(); |
315 | } |
316 | |
317 | } // namespace |
318 | |
319 | Status BitmapAnd(MemoryPool* pool, const uint8_t* left, int64_t left_offset, |
320 | const uint8_t* right, int64_t right_offset, int64_t length, |
321 | int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) { |
322 | return BitmapOp<std::bit_and<uint8_t>, std::logical_and<bool>>( |
323 | pool, left, left_offset, right, right_offset, length, out_offset, out_buffer); |
324 | } |
325 | |
326 | Status BitmapOr(MemoryPool* pool, const uint8_t* left, int64_t left_offset, |
327 | const uint8_t* right, int64_t right_offset, int64_t length, |
328 | int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) { |
329 | return BitmapOp<std::bit_or<uint8_t>, std::logical_or<bool>>( |
330 | pool, left, left_offset, right, right_offset, length, out_offset, out_buffer); |
331 | } |
332 | |
333 | Status BitmapXor(MemoryPool* pool, const uint8_t* left, int64_t left_offset, |
334 | const uint8_t* right, int64_t right_offset, int64_t length, |
335 | int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) { |
336 | return BitmapOp<std::bit_xor<uint8_t>, std::bit_xor<bool>>( |
337 | pool, left, left_offset, right, right_offset, length, out_offset, out_buffer); |
338 | } |
339 | |
340 | } // namespace internal |
341 | } // namespace arrow |
342 | |