1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #include <algorithm> |
19 | #include <cstdint> |
20 | #include <cstring> |
21 | #include <limits> |
22 | #include <memory> |
23 | #include <string> |
24 | #include <utility> |
25 | #include <vector> |
26 | |
27 | #include <gtest/gtest.h> |
28 | |
29 | #include "arrow/buffer-builder.h" |
30 | #include "arrow/buffer.h" |
31 | #include "arrow/memory_pool.h" |
32 | #include "arrow/status.h" |
33 | #include "arrow/test-util.h" |
34 | |
35 | using std::string; |
36 | |
37 | namespace arrow { |
38 | |
39 | TEST(TestAllocate, Bitmap) { |
40 | std::shared_ptr<Buffer> new_buffer; |
41 | EXPECT_OK(AllocateBitmap(default_memory_pool(), 100, &new_buffer)); |
42 | EXPECT_GE(new_buffer->size(), 13); |
43 | EXPECT_EQ(new_buffer->capacity() % 8, 0); |
44 | } |
45 | |
46 | TEST(TestAllocate, EmptyBitmap) { |
47 | std::shared_ptr<Buffer> new_buffer; |
48 | EXPECT_OK(AllocateEmptyBitmap(default_memory_pool(), 100, &new_buffer)); |
49 | EXPECT_EQ(new_buffer->size(), 13); |
50 | EXPECT_EQ(new_buffer->capacity() % 8, 0); |
51 | EXPECT_TRUE(std::all_of(new_buffer->data(), new_buffer->data() + new_buffer->capacity(), |
52 | [](int8_t byte) { return byte == 0; })); |
53 | } |
54 | |
55 | TEST(TestBuffer, FromStdString) { |
56 | std::string val = "hello, world" ; |
57 | |
58 | Buffer buf(val); |
59 | ASSERT_EQ(0, memcmp(buf.data(), val.c_str(), val.size())); |
60 | ASSERT_EQ(static_cast<int64_t>(val.size()), buf.size()); |
61 | } |
62 | |
63 | TEST(TestBuffer, FromStdStringWithMemory) { |
64 | std::string expected = "hello, world" ; |
65 | std::shared_ptr<Buffer> buf; |
66 | |
67 | { |
68 | std::string temp = "hello, world" ; |
69 | ASSERT_OK(Buffer::FromString(temp, &buf)); |
70 | ASSERT_EQ(0, memcmp(buf->data(), temp.c_str(), temp.size())); |
71 | ASSERT_EQ(static_cast<int64_t>(temp.size()), buf->size()); |
72 | } |
73 | |
74 | // Now temp goes out of scope and we check if created buffer |
75 | // is still valid to make sure it actually owns its space |
76 | ASSERT_EQ(0, memcmp(buf->data(), expected.c_str(), expected.size())); |
77 | ASSERT_EQ(static_cast<int64_t>(expected.size()), buf->size()); |
78 | } |
79 | |
80 | TEST(TestBuffer, EqualsWithSameContent) { |
81 | MemoryPool* pool = default_memory_pool(); |
82 | const int32_t bufferSize = 128 * 1024; |
83 | uint8_t* rawBuffer1; |
84 | ASSERT_OK(pool->Allocate(bufferSize, &rawBuffer1)); |
85 | memset(rawBuffer1, 12, bufferSize); |
86 | uint8_t* rawBuffer2; |
87 | ASSERT_OK(pool->Allocate(bufferSize, &rawBuffer2)); |
88 | memset(rawBuffer2, 12, bufferSize); |
89 | uint8_t* rawBuffer3; |
90 | ASSERT_OK(pool->Allocate(bufferSize, &rawBuffer3)); |
91 | memset(rawBuffer3, 3, bufferSize); |
92 | |
93 | Buffer buffer1(rawBuffer1, bufferSize); |
94 | Buffer buffer2(rawBuffer2, bufferSize); |
95 | Buffer buffer3(rawBuffer3, bufferSize); |
96 | ASSERT_TRUE(buffer1.Equals(buffer2)); |
97 | ASSERT_FALSE(buffer1.Equals(buffer3)); |
98 | |
99 | pool->Free(rawBuffer1, bufferSize); |
100 | pool->Free(rawBuffer2, bufferSize); |
101 | pool->Free(rawBuffer3, bufferSize); |
102 | } |
103 | |
104 | TEST(TestBuffer, EqualsWithSameBuffer) { |
105 | MemoryPool* pool = default_memory_pool(); |
106 | const int32_t bufferSize = 128 * 1024; |
107 | uint8_t* rawBuffer; |
108 | ASSERT_OK(pool->Allocate(bufferSize, &rawBuffer)); |
109 | memset(rawBuffer, 111, bufferSize); |
110 | |
111 | Buffer buffer1(rawBuffer, bufferSize); |
112 | Buffer buffer2(rawBuffer, bufferSize); |
113 | ASSERT_TRUE(buffer1.Equals(buffer2)); |
114 | |
115 | const int64_t nbytes = bufferSize / 2; |
116 | Buffer buffer3(rawBuffer, nbytes); |
117 | ASSERT_TRUE(buffer1.Equals(buffer3, nbytes)); |
118 | ASSERT_FALSE(buffer1.Equals(buffer3, nbytes + 1)); |
119 | |
120 | pool->Free(rawBuffer, bufferSize); |
121 | } |
122 | |
123 | TEST(TestBuffer, Copy) { |
124 | std::string data_str = "some data to copy" ; |
125 | |
126 | auto data = reinterpret_cast<const uint8_t*>(data_str.c_str()); |
127 | |
128 | Buffer buf(data, data_str.size()); |
129 | |
130 | std::shared_ptr<Buffer> out; |
131 | |
132 | ASSERT_OK(buf.Copy(5, 4, &out)); |
133 | |
134 | Buffer expected(data + 5, 4); |
135 | ASSERT_TRUE(out->Equals(expected)); |
136 | // assert the padding is zeroed |
137 | std::vector<uint8_t> zeros(out->capacity() - out->size()); |
138 | ASSERT_EQ(0, memcmp(out->data() + out->size(), zeros.data(), zeros.size())); |
139 | } |
140 | |
141 | TEST(TestBuffer, SliceBuffer) { |
142 | std::string data_str = "some data to slice" ; |
143 | |
144 | auto data = reinterpret_cast<const uint8_t*>(data_str.c_str()); |
145 | |
146 | auto buf = std::make_shared<Buffer>(data, data_str.size()); |
147 | |
148 | std::shared_ptr<Buffer> out = SliceBuffer(buf, 5, 4); |
149 | Buffer expected(data + 5, 4); |
150 | ASSERT_TRUE(out->Equals(expected)); |
151 | |
152 | ASSERT_EQ(2, buf.use_count()); |
153 | } |
154 | |
155 | TEST(TestMutableBuffer, Wrap) { |
156 | std::vector<int32_t> values = {1, 2, 3}; |
157 | |
158 | auto buf = MutableBuffer::Wrap(values.data(), values.size()); |
159 | reinterpret_cast<int32_t*>(buf->mutable_data())[1] = 4; |
160 | |
161 | ASSERT_EQ(4, values[1]); |
162 | } |
163 | |
164 | TEST(TestBuffer, FromStringRvalue) { |
165 | std::string expected = "input data" ; |
166 | |
167 | std::shared_ptr<Buffer> buffer; |
168 | { |
169 | std::string data_str = "input data" ; |
170 | buffer = Buffer::FromString(std::move(data_str)); |
171 | } |
172 | |
173 | ASSERT_FALSE(buffer->is_mutable()); |
174 | |
175 | ASSERT_EQ(0, memcmp(buffer->data(), expected.c_str(), expected.size())); |
176 | ASSERT_EQ(static_cast<int64_t>(expected.size()), buffer->size()); |
177 | } |
178 | |
179 | TEST(TestBuffer, SliceMutableBuffer) { |
180 | std::string data_str = "some data to slice" ; |
181 | auto data = reinterpret_cast<const uint8_t*>(data_str.c_str()); |
182 | |
183 | std::shared_ptr<Buffer> buffer; |
184 | ASSERT_OK(AllocateBuffer(50, &buffer)); |
185 | |
186 | memcpy(buffer->mutable_data(), data, data_str.size()); |
187 | |
188 | std::shared_ptr<Buffer> slice = SliceMutableBuffer(buffer, 5, 10); |
189 | ASSERT_TRUE(slice->is_mutable()); |
190 | ASSERT_EQ(10, slice->size()); |
191 | |
192 | Buffer expected(data + 5, 10); |
193 | ASSERT_TRUE(slice->Equals(expected)); |
194 | } |
195 | |
196 | template <typename AllocateFunction> |
197 | void TestZeroSizeAllocateBuffer(MemoryPool* pool, AllocateFunction&& allocate_func) { |
198 | auto allocated_bytes = pool->bytes_allocated(); |
199 | { |
200 | std::shared_ptr<Buffer> buffer; |
201 | |
202 | ASSERT_OK(allocate_func(pool, 0, &buffer)); |
203 | ASSERT_EQ(buffer->size(), 0); |
204 | // Even 0-sized buffers should not have a null data pointer |
205 | ASSERT_NE(buffer->data(), nullptr); |
206 | ASSERT_EQ(buffer->mutable_data(), buffer->data()); |
207 | |
208 | ASSERT_GE(pool->bytes_allocated(), allocated_bytes); |
209 | } |
210 | ASSERT_EQ(pool->bytes_allocated(), allocated_bytes); |
211 | } |
212 | |
213 | TEST(TestAllocateBuffer, ZeroSize) { |
214 | MemoryPool* pool = default_memory_pool(); |
215 | auto allocate_func = [](MemoryPool* pool, int64_t size, std::shared_ptr<Buffer>* out) { |
216 | return AllocateBuffer(pool, size, out); |
217 | }; |
218 | TestZeroSizeAllocateBuffer(pool, allocate_func); |
219 | } |
220 | |
221 | TEST(TestAllocateResizableBuffer, ZeroSize) { |
222 | MemoryPool* pool = default_memory_pool(); |
223 | auto allocate_func = [](MemoryPool* pool, int64_t size, std::shared_ptr<Buffer>* out) { |
224 | std::shared_ptr<ResizableBuffer> res; |
225 | RETURN_NOT_OK(AllocateResizableBuffer(pool, size, &res)); |
226 | *out = res; |
227 | return Status::OK(); |
228 | }; |
229 | TestZeroSizeAllocateBuffer(pool, allocate_func); |
230 | } |
231 | |
232 | TEST(TestAllocateResizableBuffer, ZeroResize) { |
233 | MemoryPool* pool = default_memory_pool(); |
234 | auto allocated_bytes = pool->bytes_allocated(); |
235 | { |
236 | std::shared_ptr<ResizableBuffer> buffer; |
237 | |
238 | ASSERT_OK(AllocateResizableBuffer(pool, 1000, &buffer)); |
239 | ASSERT_EQ(buffer->size(), 1000); |
240 | ASSERT_NE(buffer->data(), nullptr); |
241 | ASSERT_EQ(buffer->mutable_data(), buffer->data()); |
242 | |
243 | ASSERT_GE(pool->bytes_allocated(), allocated_bytes + 1000); |
244 | |
245 | ASSERT_OK(buffer->Resize(0)); |
246 | ASSERT_NE(buffer->data(), nullptr); |
247 | ASSERT_EQ(buffer->mutable_data(), buffer->data()); |
248 | |
249 | ASSERT_GE(pool->bytes_allocated(), allocated_bytes); |
250 | ASSERT_LT(pool->bytes_allocated(), allocated_bytes + 1000); |
251 | } |
252 | ASSERT_EQ(pool->bytes_allocated(), allocated_bytes); |
253 | } |
254 | |
255 | TEST(TestBufferBuilder, ResizeReserve) { |
256 | const std::string data = "some data" ; |
257 | auto data_ptr = data.c_str(); |
258 | |
259 | BufferBuilder builder; |
260 | |
261 | ASSERT_OK(builder.Append(data_ptr, 9)); |
262 | ASSERT_EQ(9, builder.length()); |
263 | |
264 | ASSERT_OK(builder.Resize(128)); |
265 | ASSERT_EQ(128, builder.capacity()); |
266 | |
267 | // Do not shrink to fit |
268 | ASSERT_OK(builder.Resize(64, false)); |
269 | ASSERT_EQ(128, builder.capacity()); |
270 | |
271 | // Shrink to fit |
272 | ASSERT_OK(builder.Resize(64)); |
273 | ASSERT_EQ(64, builder.capacity()); |
274 | |
275 | // Reserve elements |
276 | ASSERT_OK(builder.Reserve(60)); |
277 | ASSERT_EQ(128, builder.capacity()); |
278 | } |
279 | |
280 | template <typename T> |
281 | class TypedTestBufferBuilder : public ::testing::Test {}; |
282 | |
283 | using BufferBuilderElements = ::testing::Types<int16_t, uint32_t, double>; |
284 | |
285 | TYPED_TEST_CASE(TypedTestBufferBuilder, BufferBuilderElements); |
286 | |
287 | TYPED_TEST(TypedTestBufferBuilder, BasicTypedBufferBuilderUsage) { |
288 | TypedBufferBuilder<TypeParam> builder; |
289 | |
290 | ASSERT_OK(builder.Append(static_cast<TypeParam>(0))); |
291 | ASSERT_EQ(builder.length(), 1); |
292 | ASSERT_EQ(builder.capacity(), 64 / sizeof(TypeParam)); |
293 | |
294 | constexpr int nvalues = 4; |
295 | TypeParam values[nvalues]; |
296 | for (int i = 0; i != nvalues; ++i) { |
297 | values[i] = static_cast<TypeParam>(i); |
298 | } |
299 | ASSERT_OK(builder.Append(values, nvalues)); |
300 | ASSERT_EQ(builder.length(), nvalues + 1); |
301 | |
302 | std::shared_ptr<Buffer> built; |
303 | ASSERT_OK(builder.Finish(&built)); |
304 | |
305 | auto data = reinterpret_cast<const TypeParam*>(built->data()); |
306 | ASSERT_EQ(data[0], static_cast<TypeParam>(0)); |
307 | for (auto value : values) { |
308 | ++data; |
309 | ASSERT_EQ(*data, value); |
310 | } |
311 | } |
312 | |
313 | TEST(TestBufferBuilder, BasicBoolBufferBuilderUsage) { |
314 | TypedBufferBuilder<bool> builder; |
315 | |
316 | ASSERT_OK(builder.Append(false)); |
317 | ASSERT_EQ(builder.length(), 1); |
318 | ASSERT_EQ(builder.capacity(), 64 * 8); |
319 | |
320 | constexpr int nvalues = 4; |
321 | uint8_t values[nvalues]; |
322 | for (int i = 0; i != nvalues; ++i) { |
323 | values[i] = static_cast<uint8_t>(i); |
324 | } |
325 | ASSERT_OK(builder.Append(values, nvalues)); |
326 | ASSERT_EQ(builder.length(), nvalues + 1); |
327 | |
328 | ASSERT_EQ(builder.false_count(), 2); |
329 | |
330 | std::shared_ptr<Buffer> built; |
331 | ASSERT_OK(builder.Finish(&built)); |
332 | |
333 | ASSERT_EQ(BitUtil::GetBit(built->data(), 0), false); |
334 | for (int i = 0; i != nvalues; ++i) { |
335 | ASSERT_EQ(BitUtil::GetBit(built->data(), i + 1), static_cast<bool>(values[i])); |
336 | } |
337 | } |
338 | |
339 | TEST(TestBufferBuilder, BoolBufferBuilderAppendCopies) { |
340 | TypedBufferBuilder<bool> builder; |
341 | |
342 | ASSERT_OK(builder.Append(13, true)); |
343 | ASSERT_OK(builder.Append(17, false)); |
344 | ASSERT_EQ(builder.length(), 13 + 17); |
345 | ASSERT_EQ(builder.capacity(), 64 * 8); |
346 | ASSERT_EQ(builder.false_count(), 17); |
347 | |
348 | std::shared_ptr<Buffer> built; |
349 | ASSERT_OK(builder.Finish(&built)); |
350 | |
351 | for (int i = 0; i != 13 + 17; ++i) { |
352 | EXPECT_EQ(BitUtil::GetBit(built->data(), i), i < 13) << "index = " << i; |
353 | } |
354 | } |
355 | |
356 | template <typename T> |
357 | class TypedTestBuffer : public ::testing::Test {}; |
358 | |
359 | using BufferPtrs = |
360 | ::testing::Types<std::shared_ptr<ResizableBuffer>, std::unique_ptr<ResizableBuffer>>; |
361 | |
362 | TYPED_TEST_CASE(TypedTestBuffer, BufferPtrs); |
363 | |
364 | TYPED_TEST(TypedTestBuffer, IsMutableFlag) { |
365 | Buffer buf(nullptr, 0); |
366 | |
367 | ASSERT_FALSE(buf.is_mutable()); |
368 | |
369 | MutableBuffer mbuf(nullptr, 0); |
370 | ASSERT_TRUE(mbuf.is_mutable()); |
371 | |
372 | TypeParam pool_buf; |
373 | ASSERT_OK(AllocateResizableBuffer(0, &pool_buf)); |
374 | ASSERT_TRUE(pool_buf->is_mutable()); |
375 | } |
376 | |
377 | TYPED_TEST(TypedTestBuffer, Resize) { |
378 | TypeParam buf; |
379 | ASSERT_OK(AllocateResizableBuffer(0, &buf)); |
380 | |
381 | ASSERT_EQ(0, buf->size()); |
382 | ASSERT_OK(buf->Resize(100)); |
383 | ASSERT_EQ(100, buf->size()); |
384 | ASSERT_OK(buf->Resize(200)); |
385 | ASSERT_EQ(200, buf->size()); |
386 | |
387 | // Make it smaller, too |
388 | ASSERT_OK(buf->Resize(50, true)); |
389 | ASSERT_EQ(50, buf->size()); |
390 | // We have actually shrunken in size |
391 | // The spec requires that capacity is a multiple of 64 |
392 | ASSERT_EQ(64, buf->capacity()); |
393 | |
394 | // Resize to a larger capacity again to test shrink_to_fit = false |
395 | ASSERT_OK(buf->Resize(100)); |
396 | ASSERT_EQ(128, buf->capacity()); |
397 | ASSERT_OK(buf->Resize(50, false)); |
398 | ASSERT_EQ(128, buf->capacity()); |
399 | } |
400 | |
401 | TYPED_TEST(TypedTestBuffer, TypedResize) { |
402 | TypeParam buf; |
403 | ASSERT_OK(AllocateResizableBuffer(0, &buf)); |
404 | |
405 | ASSERT_EQ(0, buf->size()); |
406 | ASSERT_OK(buf->template TypedResize<double>(100)); |
407 | ASSERT_EQ(800, buf->size()); |
408 | ASSERT_OK(buf->template TypedResize<double>(200)); |
409 | ASSERT_EQ(1600, buf->size()); |
410 | |
411 | ASSERT_OK(buf->template TypedResize<double>(50, true)); |
412 | ASSERT_EQ(400, buf->size()); |
413 | ASSERT_EQ(448, buf->capacity()); |
414 | |
415 | ASSERT_OK(buf->template TypedResize<double>(100)); |
416 | ASSERT_EQ(832, buf->capacity()); |
417 | ASSERT_OK(buf->template TypedResize<double>(50, false)); |
418 | ASSERT_EQ(832, buf->capacity()); |
419 | } |
420 | |
421 | TYPED_TEST(TypedTestBuffer, ResizeOOM) { |
422 | // This test doesn't play nice with AddressSanitizer |
423 | #ifndef ADDRESS_SANITIZER |
424 | // realloc fails, even though there may be no explicit limit |
425 | TypeParam buf; |
426 | ASSERT_OK(AllocateResizableBuffer(0, &buf)); |
427 | ASSERT_OK(buf->Resize(100)); |
428 | int64_t to_alloc = std::min<uint64_t>(std::numeric_limits<int64_t>::max(), |
429 | std::numeric_limits<size_t>::max()); |
430 | // subtract 63 to prevent overflow after the size is aligned |
431 | to_alloc -= 63; |
432 | ASSERT_RAISES(OutOfMemory, buf->Resize(to_alloc)); |
433 | #endif |
434 | } |
435 | |
436 | } // namespace arrow |
437 | |