1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#pragma once
19
20#include <algorithm>
21#include <cstdint>
22#include <cstdlib>
23#include <cstring>
24#include <iostream>
25#include <limits>
26#include <memory>
27#include <random>
28#include <sstream>
29#include <string>
30#include <type_traits>
31#include <vector>
32
33#include <gtest/gtest.h>
34
35#include "arrow/array.h"
36#include "arrow/buffer.h"
37#include "arrow/builder.h"
38#include "arrow/memory_pool.h"
39#include "arrow/pretty_print.h"
40#include "arrow/record_batch.h"
41#include "arrow/status.h"
42#include "arrow/type.h"
43#include "arrow/type_traits.h"
44#include "arrow/util/bit-util.h"
45#include "arrow/util/logging.h"
46#include "arrow/util/macros.h"
47#include "arrow/util/visibility.h"
48
49#define ASSERT_RAISES(ENUM, expr) \
50 do { \
51 ::arrow::Status s = (expr); \
52 if (!s.Is##ENUM()) { \
53 FAIL() << "Expected '" ARROW_STRINGIFY(expr) "' to fail with " ARROW_STRINGIFY( \
54 ENUM) ", but got " \
55 << s.ToString(); \
56 } \
57 } while (false)
58
59#define ASSERT_RAISES_WITH_MESSAGE(ENUM, message, expr) \
60 do { \
61 ::arrow::Status s = (expr); \
62 if (!s.Is##ENUM()) { \
63 FAIL() << "Expected '" ARROW_STRINGIFY(expr) "' to fail with " ARROW_STRINGIFY( \
64 ENUM) ", but got " \
65 << s.ToString(); \
66 } \
67 ASSERT_EQ((message), s.ToString()); \
68 } while (false)
69
70#define ASSERT_OK(expr) \
71 do { \
72 ::arrow::Status _s = (expr); \
73 if (!_s.ok()) { \
74 FAIL() << "'" ARROW_STRINGIFY(expr) "' failed with " << _s.ToString(); \
75 } \
76 } while (false)
77
78#define ASSERT_OK_NO_THROW(expr) ASSERT_NO_THROW(ASSERT_OK(expr))
79
80#define EXPECT_OK(expr) \
81 do { \
82 ::arrow::Status s = (expr); \
83 EXPECT_TRUE(s.ok()); \
84 } while (false)
85
86#define ABORT_NOT_OK(s) \
87 do { \
88 ::arrow::Status _s = (s); \
89 if (ARROW_PREDICT_FALSE(!_s.ok())) { \
90 std::cerr << s.ToString() << "\n"; \
91 std::abort(); \
92 } \
93 } while (false);
94
95namespace arrow {
96
97class ChunkedArray;
98class Column;
99class Table;
100
101using ArrayVector = std::vector<std::shared_ptr<Array>>;
102
103#define ASSERT_ARRAYS_EQUAL(LEFT, RIGHT) \
104 do { \
105 if (!(LEFT).Equals((RIGHT))) { \
106 std::stringstream pp_result; \
107 std::stringstream pp_expected; \
108 \
109 EXPECT_OK(PrettyPrint(RIGHT, 0, &pp_result)); \
110 EXPECT_OK(PrettyPrint(LEFT, 0, &pp_expected)); \
111 FAIL() << "Got: \n" << pp_result.str() << "\nExpected: \n" << pp_expected.str(); \
112 } \
113 } while (false)
114
115template <typename T, typename U>
116void randint(int64_t N, T lower, T upper, std::vector<U>* out) {
117 const int random_seed = 0;
118 std::default_random_engine gen(random_seed);
119 std::uniform_int_distribution<T> d(lower, upper);
120 out->resize(N, static_cast<T>(0));
121 std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
122}
123
124template <typename T, typename U>
125void random_real(int64_t n, uint32_t seed, T min_value, T max_value,
126 std::vector<U>* out) {
127 std::default_random_engine gen(seed);
128 std::uniform_real_distribution<T> d(min_value, max_value);
129 out->resize(n, static_cast<T>(0));
130 std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
131}
132
133template <typename T>
134inline Status CopyBufferFromVector(const std::vector<T>& values, MemoryPool* pool,
135 std::shared_ptr<Buffer>* result) {
136 int64_t nbytes = static_cast<int>(values.size()) * sizeof(T);
137
138 std::shared_ptr<Buffer> buffer;
139 RETURN_NOT_OK(AllocateBuffer(pool, nbytes, &buffer));
140 auto immutable_data = reinterpret_cast<const uint8_t*>(values.data());
141 std::copy(immutable_data, immutable_data + nbytes, buffer->mutable_data());
142 memset(buffer->mutable_data() + nbytes, 0,
143 static_cast<size_t>(buffer->capacity() - nbytes));
144
145 *result = buffer;
146 return Status::OK();
147}
148
149template <typename T>
150static inline Status GetBitmapFromVector(const std::vector<T>& is_valid,
151 std::shared_ptr<Buffer>* result) {
152 size_t length = is_valid.size();
153
154 std::shared_ptr<Buffer> buffer;
155 RETURN_NOT_OK(AllocateEmptyBitmap(length, &buffer));
156
157 uint8_t* bitmap = buffer->mutable_data();
158 for (size_t i = 0; i < static_cast<size_t>(length); ++i) {
159 if (is_valid[i]) {
160 BitUtil::SetBit(bitmap, i);
161 }
162 }
163
164 *result = buffer;
165 return Status::OK();
166}
167
168template <typename T>
169inline void BitmapFromVector(const std::vector<T>& is_valid,
170 std::shared_ptr<Buffer>* out) {
171 ASSERT_OK(GetBitmapFromVector(is_valid, out));
172}
173
174// Sets approximately pct_null of the first n bytes in null_bytes to zero
175// and the rest to non-zero (true) values.
176ARROW_EXPORT void random_null_bytes(int64_t n, double pct_null, uint8_t* null_bytes);
177ARROW_EXPORT void random_is_valid(int64_t n, double pct_null,
178 std::vector<bool>* is_valid);
179ARROW_EXPORT void random_bytes(int64_t n, uint32_t seed, uint8_t* out);
180ARROW_EXPORT int32_t DecimalSize(int32_t precision);
181ARROW_EXPORT void random_decimals(int64_t n, uint32_t seed, int32_t precision,
182 uint8_t* out);
183ARROW_EXPORT void random_ascii(int64_t n, uint32_t seed, uint8_t* out);
184ARROW_EXPORT int64_t CountNulls(const std::vector<uint8_t>& valid_bytes);
185
186ARROW_EXPORT Status MakeRandomByteBuffer(int64_t length, MemoryPool* pool,
187 std::shared_ptr<ResizableBuffer>* out,
188 uint32_t seed = 0);
189
190ARROW_EXPORT void AssertArraysEqual(const Array& expected, const Array& actual);
191ARROW_EXPORT void AssertChunkedEqual(const ChunkedArray& expected,
192 const ChunkedArray& actual);
193ARROW_EXPORT void AssertChunkedEqual(const ChunkedArray& actual,
194 const ArrayVector& expected);
195ARROW_EXPORT void AssertBufferEqual(const Buffer& buffer,
196 const std::vector<uint8_t>& expected);
197ARROW_EXPORT void AssertBufferEqual(const Buffer& buffer, const std::string& expected);
198ARROW_EXPORT void AssertBufferEqual(const Buffer& buffer, const Buffer& expected);
199ARROW_EXPORT void AssertSchemaEqual(const Schema& lhs, const Schema& rhs);
200
201ARROW_EXPORT void PrintColumn(const Column& col, std::stringstream* ss);
202ARROW_EXPORT void AssertTablesEqual(const Table& expected, const Table& actual,
203 bool same_chunk_layout = true);
204
205template <typename C_TYPE>
206void AssertNumericDataEqual(const C_TYPE* raw_data,
207 const std::vector<C_TYPE>& expected_values) {
208 for (auto expected : expected_values) {
209 ASSERT_EQ(expected, *raw_data);
210 ++raw_data;
211 }
212}
213
214ARROW_EXPORT void CompareBatch(const RecordBatch& left, const RecordBatch& right);
215
216// Check if the padding of the buffers of the array is zero.
217// Also cause valgrind warnings if the padding bytes are uninitialized.
218ARROW_EXPORT void AssertZeroPadded(const Array& array);
219
220// Check if the valid buffer bytes are initialized
221// and cause valgrind warnings otherwise.
222ARROW_EXPORT void TestInitialized(const Array& array);
223
224template <typename BuilderType>
225void FinishAndCheckPadding(BuilderType* builder, std::shared_ptr<Array>* out) {
226 ASSERT_OK(builder->Finish(out));
227 AssertZeroPadded(**out);
228 TestInitialized(**out);
229}
230
231template <typename T, typename U>
232void rand_uniform_int(int64_t n, uint32_t seed, T min_value, T max_value, U* out) {
233 DCHECK(out || (n == 0));
234 std::default_random_engine gen(seed);
235 std::uniform_int_distribution<T> d(min_value, max_value);
236 std::generate(out, out + n, [&d, &gen] { return static_cast<U>(d(gen)); });
237}
238
239template <typename T, typename Enable = void>
240struct GenerateRandom {};
241
242template <typename T>
243struct GenerateRandom<T, typename std::enable_if<std::is_integral<T>::value>::type> {
244 static void Gen(int64_t length, uint32_t seed, void* out) {
245 rand_uniform_int(length, seed, std::numeric_limits<T>::min(),
246 std::numeric_limits<T>::max(), reinterpret_cast<T*>(out));
247 }
248};
249
250template <typename T>
251Status MakeRandomBuffer(int64_t length, MemoryPool* pool,
252 std::shared_ptr<ResizableBuffer>* out, uint32_t seed = 0) {
253 DCHECK(pool);
254 std::shared_ptr<ResizableBuffer> result;
255 RETURN_NOT_OK(AllocateResizableBuffer(pool, sizeof(T) * length, &result));
256 GenerateRandom<T>::Gen(length, seed, result->mutable_data());
257 *out = result;
258 return Status::OK();
259}
260
261// ArrayFromJSON: construct an Array from a simple JSON representation
262
263ARROW_EXPORT
264std::shared_ptr<Array> ArrayFromJSON(const std::shared_ptr<DataType>&,
265 const std::string& json);
266
267// ArrayFromVector: construct an Array from vectors of C values
268
269template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
270void ArrayFromVector(const std::shared_ptr<DataType>& type,
271 const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values,
272 std::shared_ptr<Array>* out) {
273 DCHECK_EQ(TYPE::type_id, type->id())
274 << "template parameter and concrete DataType instance don't agree";
275
276 std::unique_ptr<ArrayBuilder> builder_ptr;
277 ASSERT_OK(MakeBuilder(default_memory_pool(), type, &builder_ptr));
278 // Get the concrete builder class to access its Append() specializations
279 auto& builder = dynamic_cast<typename TypeTraits<TYPE>::BuilderType&>(*builder_ptr);
280
281 for (size_t i = 0; i < values.size(); ++i) {
282 if (is_valid[i]) {
283 ASSERT_OK(builder.Append(values[i]));
284 } else {
285 ASSERT_OK(builder.AppendNull());
286 }
287 }
288 ASSERT_OK(builder.Finish(out));
289}
290
291template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
292void ArrayFromVector(const std::shared_ptr<DataType>& type,
293 const std::vector<C_TYPE>& values, std::shared_ptr<Array>* out) {
294 DCHECK_EQ(TYPE::type_id, type->id())
295 << "template parameter and concrete DataType instance don't agree";
296
297 std::unique_ptr<ArrayBuilder> builder_ptr;
298 ASSERT_OK(MakeBuilder(default_memory_pool(), type, &builder_ptr));
299 // Get the concrete builder class to access its Append() specializations
300 auto& builder = dynamic_cast<typename TypeTraits<TYPE>::BuilderType&>(*builder_ptr);
301
302 for (size_t i = 0; i < values.size(); ++i) {
303 ASSERT_OK(builder.Append(values[i]));
304 }
305 ASSERT_OK(builder.Finish(out));
306}
307
308// Overloads without a DataType argument, for parameterless types
309
310template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
311void ArrayFromVector(const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values,
312 std::shared_ptr<Array>* out) {
313 auto type = TypeTraits<TYPE>::type_singleton();
314 ArrayFromVector<TYPE, C_TYPE>(type, is_valid, values, out);
315}
316
317template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
318void ArrayFromVector(const std::vector<C_TYPE>& values, std::shared_ptr<Array>* out) {
319 auto type = TypeTraits<TYPE>::type_singleton();
320 ArrayFromVector<TYPE, C_TYPE>(type, values, out);
321}
322
323// ChunkedArrayFromVector: construct a ChunkedArray from vectors of C values
324
325template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
326void ChunkedArrayFromVector(const std::shared_ptr<DataType>& type,
327 const std::vector<std::vector<bool>>& is_valid,
328 const std::vector<std::vector<C_TYPE>>& values,
329 std::shared_ptr<ChunkedArray>* out) {
330 ArrayVector chunks;
331 DCHECK_EQ(is_valid.size(), values.size());
332 for (size_t i = 0; i < values.size(); ++i) {
333 std::shared_ptr<Array> array;
334 ArrayFromVector<TYPE, C_TYPE>(type, is_valid[i], values[i], &array);
335 chunks.push_back(array);
336 }
337 *out = std::make_shared<ChunkedArray>(chunks);
338}
339
340template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
341void ChunkedArrayFromVector(const std::shared_ptr<DataType>& type,
342 const std::vector<std::vector<C_TYPE>>& values,
343 std::shared_ptr<ChunkedArray>* out) {
344 ArrayVector chunks;
345 for (size_t i = 0; i < values.size(); ++i) {
346 std::shared_ptr<Array> array;
347 ArrayFromVector<TYPE, C_TYPE>(type, values[i], &array);
348 chunks.push_back(array);
349 }
350 *out = std::make_shared<ChunkedArray>(chunks);
351}
352
353// Overloads without a DataType argument, for parameterless types
354
355template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
356void ChunkedArrayFromVector(const std::vector<std::vector<bool>>& is_valid,
357 const std::vector<std::vector<C_TYPE>>& values,
358 std::shared_ptr<ChunkedArray>* out) {
359 auto type = TypeTraits<TYPE>::type_singleton();
360 ChunkedArrayFromVector<TYPE, C_TYPE>(type, is_valid, values, out);
361}
362
363template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
364void ChunkedArrayFromVector(const std::vector<std::vector<C_TYPE>>& values,
365 std::shared_ptr<ChunkedArray>* out) {
366 auto type = TypeTraits<TYPE>::type_singleton();
367 ChunkedArrayFromVector<TYPE, C_TYPE>(type, values, out);
368}
369
370template <class T, class Builder>
371Status MakeArray(const std::vector<uint8_t>& valid_bytes, const std::vector<T>& values,
372 int64_t size, Builder* builder, std::shared_ptr<Array>* out) {
373 // Append the first 1000
374 for (int64_t i = 0; i < size; ++i) {
375 if (valid_bytes[i] > 0) {
376 RETURN_NOT_OK(builder->Append(values[i]));
377 } else {
378 RETURN_NOT_OK(builder->AppendNull());
379 }
380 }
381 return builder->Finish(out);
382}
383
384#define DECL_T() typedef typename TestFixture::T T;
385
386#define DECL_TYPE() typedef typename TestFixture::Type Type;
387
388#define ASSERT_BATCHES_EQUAL(LEFT, RIGHT) \
389 do { \
390 if (!(LEFT).ApproxEquals(RIGHT)) { \
391 std::stringstream ss; \
392 ss << "Left:\n"; \
393 ASSERT_OK(PrettyPrint(LEFT, 0, &ss)); \
394 \
395 ss << "\nRight:\n"; \
396 ASSERT_OK(PrettyPrint(RIGHT, 0, &ss)); \
397 FAIL() << ss.str(); \
398 } \
399 } while (false)
400
401// ----------------------------------------------------------------------
402// A RecordBatchReader for serving a sequence of in-memory record batches
403
404class BatchIterator : public RecordBatchReader {
405 public:
406 BatchIterator(const std::shared_ptr<Schema>& schema,
407 const std::vector<std::shared_ptr<RecordBatch>>& batches)
408 : schema_(schema), batches_(batches), position_(0) {}
409
410 std::shared_ptr<Schema> schema() const override { return schema_; }
411
412 Status ReadNext(std::shared_ptr<RecordBatch>* out) override {
413 if (position_ >= batches_.size()) {
414 *out = nullptr;
415 } else {
416 *out = batches_[position_++];
417 }
418 return Status::OK();
419 }
420
421 private:
422 std::shared_ptr<Schema> schema_;
423 std::vector<std::shared_ptr<RecordBatch>> batches_;
424 size_t position_;
425};
426
427} // namespace arrow
428