1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#ifndef ARROW_IPC_TEST_COMMON_H
19#define ARROW_IPC_TEST_COMMON_H
20
21#include <algorithm>
22#include <cstdint>
23#include <memory>
24#include <numeric>
25#include <string>
26#include <vector>
27
28#include "arrow/array.h"
29#include "arrow/buffer.h"
30#include "arrow/builder.h"
31#include "arrow/memory_pool.h"
32#include "arrow/pretty_print.h"
33#include "arrow/record_batch.h"
34#include "arrow/status.h"
35#include "arrow/test-util.h"
36#include "arrow/type.h"
37#include "arrow/util/bit-util.h"
38
39namespace arrow {
40namespace ipc {
41
42static inline void CompareArraysDetailed(int index, const Array& result,
43 const Array& expected) {
44 if (!expected.Equals(result)) {
45 std::stringstream pp_result;
46 std::stringstream pp_expected;
47
48 ASSERT_OK(PrettyPrint(expected, 0, &pp_expected));
49 ASSERT_OK(PrettyPrint(result, 0, &pp_result));
50
51 FAIL() << "Index: " << index << " Expected: " << pp_expected.str()
52 << "\nGot: " << pp_result.str();
53 }
54}
55
56static inline void CompareBatchColumnsDetailed(const RecordBatch& result,
57 const RecordBatch& expected) {
58 for (int i = 0; i < expected.num_columns(); ++i) {
59 auto left = result.column(i);
60 auto right = expected.column(i);
61 CompareArraysDetailed(i, *left, *right);
62 }
63}
64
65const auto kListInt32 = list(int32());
66const auto kListListInt32 = list(kListInt32);
67
68Status MakeRandomInt32Array(int64_t length, bool include_nulls, MemoryPool* pool,
69 std::shared_ptr<Array>* out, uint32_t seed = 0) {
70 std::shared_ptr<ResizableBuffer> data;
71 RETURN_NOT_OK(MakeRandomBuffer<int32_t>(length, pool, &data, seed));
72 Int32Builder builder(int32(), pool);
73 RETURN_NOT_OK(builder.Resize(length));
74 if (include_nulls) {
75 std::shared_ptr<ResizableBuffer> valid_bytes;
76 RETURN_NOT_OK(MakeRandomByteBuffer(length, pool, &valid_bytes));
77 RETURN_NOT_OK(builder.AppendValues(reinterpret_cast<const int32_t*>(data->data()),
78 length, valid_bytes->data()));
79 return builder.Finish(out);
80 }
81 RETURN_NOT_OK(
82 builder.AppendValues(reinterpret_cast<const int32_t*>(data->data()), length));
83 return builder.Finish(out);
84}
85
86Status MakeRandomListArray(const std::shared_ptr<Array>& child_array, int num_lists,
87 bool include_nulls, MemoryPool* pool,
88 std::shared_ptr<Array>* out) {
89 // Create the null list values
90 std::vector<uint8_t> valid_lists(num_lists);
91 const double null_percent = include_nulls ? 0.1 : 0;
92 random_null_bytes(num_lists, null_percent, valid_lists.data());
93
94 // Create list offsets
95 const int max_list_size = 10;
96
97 std::vector<int32_t> list_sizes(num_lists, 0);
98 std::vector<int32_t> offsets(
99 num_lists + 1, 0); // +1 so we can shift for nulls. See partial sum below.
100 const uint32_t seed = static_cast<uint32_t>(child_array->length());
101
102 if (num_lists > 0) {
103 rand_uniform_int(num_lists, seed, 0, max_list_size, list_sizes.data());
104 // make sure sizes are consistent with null
105 std::transform(list_sizes.begin(), list_sizes.end(), valid_lists.begin(),
106 list_sizes.begin(),
107 [](int32_t size, int32_t valid) { return valid == 0 ? 0 : size; });
108 std::partial_sum(list_sizes.begin(), list_sizes.end(), ++offsets.begin());
109
110 // Force invariants
111 const int32_t child_length = static_cast<int32_t>(child_array->length());
112 offsets[0] = 0;
113 std::replace_if(offsets.begin(), offsets.end(),
114 [child_length](int32_t offset) { return offset > child_length; },
115 child_length);
116 }
117
118 offsets[num_lists] = static_cast<int32_t>(child_array->length());
119
120 /// TODO(wesm): Implement support for nulls in ListArray::FromArrays
121 std::shared_ptr<Buffer> null_bitmap, offsets_buffer;
122 RETURN_NOT_OK(GetBitmapFromVector(valid_lists, &null_bitmap));
123 RETURN_NOT_OK(CopyBufferFromVector(offsets, pool, &offsets_buffer));
124
125 *out = std::make_shared<ListArray>(list(child_array->type()), num_lists, offsets_buffer,
126 child_array, null_bitmap, kUnknownNullCount);
127 return ValidateArray(**out);
128}
129
130typedef Status MakeRecordBatch(std::shared_ptr<RecordBatch>* out);
131
132Status MakeRandomBooleanArray(const int length, bool include_nulls,
133 std::shared_ptr<Array>* out) {
134 std::vector<uint8_t> values(length);
135 random_null_bytes(length, 0.5, values.data());
136 std::shared_ptr<Buffer> data;
137 RETURN_NOT_OK(BitUtil::BytesToBits(values, default_memory_pool(), &data));
138
139 if (include_nulls) {
140 std::vector<uint8_t> valid_bytes(length);
141 std::shared_ptr<Buffer> null_bitmap;
142 RETURN_NOT_OK(BitUtil::BytesToBits(valid_bytes, default_memory_pool(), &null_bitmap));
143 random_null_bytes(length, 0.1, valid_bytes.data());
144 *out = std::make_shared<BooleanArray>(length, data, null_bitmap, -1);
145 } else {
146 *out = std::make_shared<BooleanArray>(length, data, NULLPTR, 0);
147 }
148 return Status::OK();
149}
150
151Status MakeBooleanBatchSized(const int length, std::shared_ptr<RecordBatch>* out) {
152 // Make the schema
153 auto f0 = field("f0", boolean());
154 auto f1 = field("f1", boolean());
155 auto schema = ::arrow::schema({f0, f1});
156
157 std::shared_ptr<Array> a0, a1;
158 RETURN_NOT_OK(MakeRandomBooleanArray(length, true, &a0));
159 RETURN_NOT_OK(MakeRandomBooleanArray(length, false, &a1));
160 *out = RecordBatch::Make(schema, length, {a0, a1});
161 return Status::OK();
162}
163
164Status MakeBooleanBatch(std::shared_ptr<RecordBatch>* out) {
165 return MakeBooleanBatchSized(1000, out);
166}
167
168Status MakeIntBatchSized(int length, std::shared_ptr<RecordBatch>* out,
169 uint32_t seed = 0) {
170 // Make the schema
171 auto f0 = field("f0", int32());
172 auto f1 = field("f1", int32());
173 auto schema = ::arrow::schema({f0, f1});
174
175 // Example data
176 std::shared_ptr<Array> a0, a1;
177 MemoryPool* pool = default_memory_pool();
178 RETURN_NOT_OK(MakeRandomInt32Array(length, false, pool, &a0, seed));
179 RETURN_NOT_OK(MakeRandomInt32Array(length, true, pool, &a1, seed + 1));
180 *out = RecordBatch::Make(schema, length, {a0, a1});
181 return Status::OK();
182}
183
184Status MakeIntRecordBatch(std::shared_ptr<RecordBatch>* out) {
185 return MakeIntBatchSized(10, out);
186}
187
188template <class Builder, class RawType>
189Status MakeRandomBinaryArray(int64_t length, bool include_nulls, MemoryPool* pool,
190 std::shared_ptr<Array>* out) {
191 const std::vector<std::string> values = {"", "", "abc", "123",
192 "efg", "456!@#!@#", "12312"};
193 Builder builder(pool);
194 const size_t values_len = values.size();
195 for (int64_t i = 0; i < length; ++i) {
196 int64_t values_index = i % values_len;
197 if (include_nulls && values_index == 0) {
198 RETURN_NOT_OK(builder.AppendNull());
199 } else {
200 const std::string& value = values[values_index];
201 RETURN_NOT_OK(builder.Append(reinterpret_cast<const RawType*>(value.data()),
202 static_cast<int32_t>(value.size())));
203 }
204 }
205 return builder.Finish(out);
206}
207
208template <class Builder, class RawType>
209Status MakeBinaryArrayWithUniqueValues(int64_t length, bool include_nulls,
210 MemoryPool* pool, std::shared_ptr<Array>* out) {
211 Builder builder(pool);
212 for (int64_t i = 0; i < length; ++i) {
213 if (include_nulls && (i % 7 == 0)) {
214 RETURN_NOT_OK(builder.AppendNull());
215 } else {
216 const std::string value = std::to_string(i);
217 RETURN_NOT_OK(builder.Append(reinterpret_cast<const RawType*>(value.data()),
218 static_cast<int32_t>(value.size())));
219 }
220 }
221 return builder.Finish(out);
222}
223
224Status MakeStringTypesRecordBatch(std::shared_ptr<RecordBatch>* out,
225 bool with_nulls = true) {
226 const int64_t length = 500;
227 auto string_type = utf8();
228 auto binary_type = binary();
229 auto f0 = field("f0", string_type);
230 auto f1 = field("f1", binary_type);
231 auto schema = ::arrow::schema({f0, f1});
232
233 std::shared_ptr<Array> a0, a1;
234 MemoryPool* pool = default_memory_pool();
235
236 // Quirk with RETURN_NOT_OK macro and templated functions
237 {
238 auto s = MakeBinaryArrayWithUniqueValues<StringBuilder, char>(length, with_nulls,
239 pool, &a0);
240 RETURN_NOT_OK(s);
241 }
242
243 {
244 auto s = MakeBinaryArrayWithUniqueValues<BinaryBuilder, uint8_t>(length, with_nulls,
245 pool, &a1);
246 RETURN_NOT_OK(s);
247 }
248 *out = RecordBatch::Make(schema, length, {a0, a1});
249 return Status::OK();
250}
251
252Status MakeStringTypesRecordBatchWithNulls(std::shared_ptr<RecordBatch>* out) {
253 return MakeStringTypesRecordBatch(out, true);
254}
255
256Status MakeNullRecordBatch(std::shared_ptr<RecordBatch>* out) {
257 const int64_t length = 500;
258 auto f0 = field("f0", null());
259 auto schema = ::arrow::schema({f0});
260 std::shared_ptr<Array> a0 = std::make_shared<NullArray>(length);
261 *out = RecordBatch::Make(schema, length, {a0});
262 return Status::OK();
263}
264
265Status MakeListRecordBatch(std::shared_ptr<RecordBatch>* out) {
266 // Make the schema
267 auto f0 = field("f0", kListInt32);
268 auto f1 = field("f1", kListListInt32);
269 auto f2 = field("f2", int32());
270 auto schema = ::arrow::schema({f0, f1, f2});
271
272 // Example data
273
274 MemoryPool* pool = default_memory_pool();
275 const int length = 200;
276 std::shared_ptr<Array> leaf_values, list_array, list_list_array, flat_array;
277 const bool include_nulls = true;
278 RETURN_NOT_OK(MakeRandomInt32Array(1000, include_nulls, pool, &leaf_values));
279 RETURN_NOT_OK(
280 MakeRandomListArray(leaf_values, length, include_nulls, pool, &list_array));
281 RETURN_NOT_OK(
282 MakeRandomListArray(list_array, length, include_nulls, pool, &list_list_array));
283 RETURN_NOT_OK(MakeRandomInt32Array(length, include_nulls, pool, &flat_array));
284 *out = RecordBatch::Make(schema, length, {list_array, list_list_array, flat_array});
285 return Status::OK();
286}
287
288Status MakeZeroLengthRecordBatch(std::shared_ptr<RecordBatch>* out) {
289 // Make the schema
290 auto f0 = field("f0", kListInt32);
291 auto f1 = field("f1", kListListInt32);
292 auto f2 = field("f2", int32());
293 auto schema = ::arrow::schema({f0, f1, f2});
294
295 // Example data
296 MemoryPool* pool = default_memory_pool();
297 const bool include_nulls = true;
298 std::shared_ptr<Array> leaf_values, list_array, list_list_array, flat_array;
299 RETURN_NOT_OK(MakeRandomInt32Array(0, include_nulls, pool, &leaf_values));
300 RETURN_NOT_OK(MakeRandomListArray(leaf_values, 0, include_nulls, pool, &list_array));
301 RETURN_NOT_OK(
302 MakeRandomListArray(list_array, 0, include_nulls, pool, &list_list_array));
303 RETURN_NOT_OK(MakeRandomInt32Array(0, include_nulls, pool, &flat_array));
304 *out = RecordBatch::Make(schema, 0, {list_array, list_list_array, flat_array});
305 return Status::OK();
306}
307
308Status MakeNonNullRecordBatch(std::shared_ptr<RecordBatch>* out) {
309 // Make the schema
310 auto f0 = field("f0", kListInt32);
311 auto f1 = field("f1", kListListInt32);
312 auto f2 = field("f2", int32());
313 auto schema = ::arrow::schema({f0, f1, f2});
314
315 // Example data
316 MemoryPool* pool = default_memory_pool();
317 const int length = 50;
318 std::shared_ptr<Array> leaf_values, list_array, list_list_array, flat_array;
319
320 RETURN_NOT_OK(MakeRandomInt32Array(1000, true, pool, &leaf_values));
321 bool include_nulls = false;
322 RETURN_NOT_OK(
323 MakeRandomListArray(leaf_values, length, include_nulls, pool, &list_array));
324 RETURN_NOT_OK(
325 MakeRandomListArray(list_array, length, include_nulls, pool, &list_list_array));
326 RETURN_NOT_OK(MakeRandomInt32Array(length, include_nulls, pool, &flat_array));
327 *out = RecordBatch::Make(schema, length, {list_array, list_list_array, flat_array});
328 return Status::OK();
329}
330
331Status MakeDeeplyNestedList(std::shared_ptr<RecordBatch>* out) {
332 const int batch_length = 5;
333 auto type = int32();
334
335 MemoryPool* pool = default_memory_pool();
336 std::shared_ptr<Array> array;
337 const bool include_nulls = true;
338 RETURN_NOT_OK(MakeRandomInt32Array(1000, include_nulls, pool, &array));
339 for (int i = 0; i < 63; ++i) {
340 type = std::static_pointer_cast<DataType>(list(type));
341 RETURN_NOT_OK(MakeRandomListArray(array, batch_length, include_nulls, pool, &array));
342 }
343
344 auto f0 = field("f0", type);
345 auto schema = ::arrow::schema({f0});
346 std::vector<std::shared_ptr<Array>> arrays = {array};
347 *out = RecordBatch::Make(schema, batch_length, arrays);
348 return Status::OK();
349}
350
351Status MakeStruct(std::shared_ptr<RecordBatch>* out) {
352 // reuse constructed list columns
353 std::shared_ptr<RecordBatch> list_batch;
354 RETURN_NOT_OK(MakeListRecordBatch(&list_batch));
355 std::vector<std::shared_ptr<Array>> columns = {
356 list_batch->column(0), list_batch->column(1), list_batch->column(2)};
357 auto list_schema = list_batch->schema();
358
359 // Define schema
360 std::shared_ptr<DataType> type(new StructType(
361 {list_schema->field(0), list_schema->field(1), list_schema->field(2)}));
362 auto f0 = field("non_null_struct", type);
363 auto f1 = field("null_struct", type);
364 auto schema = ::arrow::schema({f0, f1});
365
366 // construct individual nullable/non-nullable struct arrays
367 std::shared_ptr<Array> no_nulls(new StructArray(type, list_batch->num_rows(), columns));
368 std::vector<uint8_t> null_bytes(list_batch->num_rows(), 1);
369 null_bytes[0] = 0;
370 std::shared_ptr<Buffer> null_bitmask;
371 RETURN_NOT_OK(BitUtil::BytesToBits(null_bytes, default_memory_pool(), &null_bitmask));
372 std::shared_ptr<Array> with_nulls(
373 new StructArray(type, list_batch->num_rows(), columns, null_bitmask, 1));
374
375 // construct batch
376 std::vector<std::shared_ptr<Array>> arrays = {no_nulls, with_nulls};
377 *out = RecordBatch::Make(schema, list_batch->num_rows(), arrays);
378 return Status::OK();
379}
380
381Status MakeUnion(std::shared_ptr<RecordBatch>* out) {
382 // Define schema
383 std::vector<std::shared_ptr<Field>> union_types(
384 {field("u0", int32()), field("u1", uint8())});
385
386 std::vector<uint8_t> type_codes = {5, 10};
387 auto sparse_type =
388 std::make_shared<UnionType>(union_types, type_codes, UnionMode::SPARSE);
389
390 auto dense_type =
391 std::make_shared<UnionType>(union_types, type_codes, UnionMode::DENSE);
392
393 auto f0 = field("sparse_nonnull", sparse_type, false);
394 auto f1 = field("sparse", sparse_type);
395 auto f2 = field("dense", dense_type);
396
397 auto schema = ::arrow::schema({f0, f1, f2});
398
399 // Create data
400 std::vector<std::shared_ptr<Array>> sparse_children(2);
401 std::vector<std::shared_ptr<Array>> dense_children(2);
402
403 const int64_t length = 7;
404
405 std::shared_ptr<Buffer> type_ids_buffer;
406 std::vector<uint8_t> type_ids = {5, 10, 5, 5, 10, 10, 5};
407 RETURN_NOT_OK(CopyBufferFromVector(type_ids, default_memory_pool(), &type_ids_buffer));
408
409 std::vector<int32_t> u0_values = {0, 1, 2, 3, 4, 5, 6};
410 ArrayFromVector<Int32Type, int32_t>(u0_values, &sparse_children[0]);
411
412 std::vector<uint8_t> u1_values = {10, 11, 12, 13, 14, 15, 16};
413 ArrayFromVector<UInt8Type, uint8_t>(u1_values, &sparse_children[1]);
414
415 // dense children
416 u0_values = {0, 2, 3, 7};
417 ArrayFromVector<Int32Type, int32_t>(u0_values, &dense_children[0]);
418
419 u1_values = {11, 14, 15};
420 ArrayFromVector<UInt8Type, uint8_t>(u1_values, &dense_children[1]);
421
422 std::shared_ptr<Buffer> offsets_buffer;
423 std::vector<int32_t> offsets = {0, 0, 1, 2, 1, 2, 3};
424 RETURN_NOT_OK(CopyBufferFromVector(offsets, default_memory_pool(), &offsets_buffer));
425
426 std::vector<uint8_t> null_bytes(length, 1);
427 null_bytes[2] = 0;
428 std::shared_ptr<Buffer> null_bitmask;
429 RETURN_NOT_OK(BitUtil::BytesToBits(null_bytes, default_memory_pool(), &null_bitmask));
430
431 // construct individual nullable/non-nullable struct arrays
432 auto sparse_no_nulls =
433 std::make_shared<UnionArray>(sparse_type, length, sparse_children, type_ids_buffer);
434 auto sparse = std::make_shared<UnionArray>(sparse_type, length, sparse_children,
435 type_ids_buffer, NULLPTR, null_bitmask, 1);
436
437 auto dense =
438 std::make_shared<UnionArray>(dense_type, length, dense_children, type_ids_buffer,
439 offsets_buffer, null_bitmask, 1);
440
441 // construct batch
442 std::vector<std::shared_ptr<Array>> arrays = {sparse_no_nulls, sparse, dense};
443 *out = RecordBatch::Make(schema, length, arrays);
444 return Status::OK();
445}
446
447Status MakeDictionary(std::shared_ptr<RecordBatch>* out) {
448 const int64_t length = 6;
449
450 std::vector<bool> is_valid = {true, true, false, true, true, true};
451 std::shared_ptr<Array> dict1, dict2;
452
453 std::vector<std::string> dict1_values = {"foo", "bar", "baz"};
454 std::vector<std::string> dict2_values = {"foo", "bar", "baz", "qux"};
455
456 ArrayFromVector<StringType, std::string>(dict1_values, &dict1);
457 ArrayFromVector<StringType, std::string>(dict2_values, &dict2);
458
459 auto f0_type = arrow::dictionary(arrow::int32(), dict1);
460 auto f1_type = arrow::dictionary(arrow::int8(), dict1, true);
461 auto f2_type = arrow::dictionary(arrow::int32(), dict2);
462
463 std::shared_ptr<Array> indices0, indices1, indices2;
464 std::vector<int32_t> indices0_values = {1, 2, -1, 0, 2, 0};
465 std::vector<int8_t> indices1_values = {0, 0, 2, 2, 1, 1};
466 std::vector<int32_t> indices2_values = {3, 0, 2, 1, 0, 2};
467
468 ArrayFromVector<Int32Type, int32_t>(is_valid, indices0_values, &indices0);
469 ArrayFromVector<Int8Type, int8_t>(is_valid, indices1_values, &indices1);
470 ArrayFromVector<Int32Type, int32_t>(is_valid, indices2_values, &indices2);
471
472 auto a0 = std::make_shared<DictionaryArray>(f0_type, indices0);
473 auto a1 = std::make_shared<DictionaryArray>(f1_type, indices1);
474 auto a2 = std::make_shared<DictionaryArray>(f2_type, indices2);
475
476 // List of dictionary-encoded string
477 auto f3_type = list(f1_type);
478
479 std::vector<int32_t> list_offsets = {0, 0, 2, 2, 5, 6, 9};
480 std::shared_ptr<Array> offsets, indices3;
481 ArrayFromVector<Int32Type, int32_t>(std::vector<bool>(list_offsets.size(), true),
482 list_offsets, &offsets);
483
484 std::vector<int8_t> indices3_values = {0, 1, 2, 0, 1, 2, 0, 1, 2};
485 std::vector<bool> is_valid3(9, true);
486 ArrayFromVector<Int8Type, int8_t>(is_valid3, indices3_values, &indices3);
487
488 std::shared_ptr<Buffer> null_bitmap;
489 RETURN_NOT_OK(GetBitmapFromVector(is_valid, &null_bitmap));
490
491 std::shared_ptr<Array> a3 = std::make_shared<ListArray>(
492 f3_type, length, std::static_pointer_cast<PrimitiveArray>(offsets)->values(),
493 std::make_shared<DictionaryArray>(f1_type, indices3), null_bitmap, 1);
494
495 // Dictionary-encoded list of integer
496 auto f4_value_type = list(int8());
497
498 std::shared_ptr<Array> offsets4, values4, indices4;
499
500 std::vector<int32_t> list_offsets4 = {0, 2, 2, 3};
501 ArrayFromVector<Int32Type, int32_t>(std::vector<bool>(4, true), list_offsets4,
502 &offsets4);
503
504 std::vector<int8_t> list_values4 = {0, 1, 2};
505 ArrayFromVector<Int8Type, int8_t>(std::vector<bool>(3, true), list_values4, &values4);
506
507 auto dict3 = std::make_shared<ListArray>(
508 f4_value_type, 3, std::static_pointer_cast<PrimitiveArray>(offsets4)->values(),
509 values4);
510
511 std::vector<int8_t> indices4_values = {0, 1, 2, 0, 1, 2};
512 ArrayFromVector<Int8Type, int8_t>(is_valid, indices4_values, &indices4);
513
514 auto f4_type = dictionary(int8(), dict3);
515 auto a4 = std::make_shared<DictionaryArray>(f4_type, indices4);
516
517 // construct batch
518 auto schema = ::arrow::schema(
519 {field("dict1", f0_type), field("sparse", f1_type), field("dense", f2_type),
520 field("list of encoded string", f3_type), field("encoded list<int8>", f4_type)});
521
522 std::vector<std::shared_ptr<Array>> arrays = {a0, a1, a2, a3, a4};
523
524 *out = RecordBatch::Make(schema, length, arrays);
525 return Status::OK();
526}
527
528Status MakeDictionaryFlat(std::shared_ptr<RecordBatch>* out) {
529 const int64_t length = 6;
530
531 std::vector<bool> is_valid = {true, true, false, true, true, true};
532 std::shared_ptr<Array> dict1, dict2;
533
534 std::vector<std::string> dict1_values = {"foo", "bar", "baz"};
535 std::vector<std::string> dict2_values = {"foo", "bar", "baz", "qux"};
536
537 ArrayFromVector<StringType, std::string>(dict1_values, &dict1);
538 ArrayFromVector<StringType, std::string>(dict2_values, &dict2);
539
540 auto f0_type = arrow::dictionary(arrow::int32(), dict1);
541 auto f1_type = arrow::dictionary(arrow::int8(), dict1);
542 auto f2_type = arrow::dictionary(arrow::int32(), dict2);
543
544 std::shared_ptr<Array> indices0, indices1, indices2;
545 std::vector<int32_t> indices0_values = {1, 2, -1, 0, 2, 0};
546 std::vector<int8_t> indices1_values = {0, 0, 2, 2, 1, 1};
547 std::vector<int32_t> indices2_values = {3, 0, 2, 1, 0, 2};
548
549 ArrayFromVector<Int32Type, int32_t>(is_valid, indices0_values, &indices0);
550 ArrayFromVector<Int8Type, int8_t>(is_valid, indices1_values, &indices1);
551 ArrayFromVector<Int32Type, int32_t>(is_valid, indices2_values, &indices2);
552
553 auto a0 = std::make_shared<DictionaryArray>(f0_type, indices0);
554 auto a1 = std::make_shared<DictionaryArray>(f1_type, indices1);
555 auto a2 = std::make_shared<DictionaryArray>(f2_type, indices2);
556
557 // construct batch
558 auto schema = ::arrow::schema(
559 {field("dict1", f0_type), field("sparse", f1_type), field("dense", f2_type)});
560
561 std::vector<std::shared_ptr<Array>> arrays = {a0, a1, a2};
562 *out = RecordBatch::Make(schema, length, arrays);
563 return Status::OK();
564}
565
566Status MakeDates(std::shared_ptr<RecordBatch>* out) {
567 std::vector<bool> is_valid = {true, true, true, false, true, true, true};
568 auto f0 = field("f0", date32());
569 auto f1 = field("f1", date64());
570 auto schema = ::arrow::schema({f0, f1});
571
572 std::vector<int32_t> date32_values = {0, 1, 2, 3, 4, 5, 6};
573 std::shared_ptr<Array> date32_array;
574 ArrayFromVector<Date32Type, int32_t>(is_valid, date32_values, &date32_array);
575
576 std::vector<int64_t> date64_values = {1489269000000, 1489270000000, 1489271000000,
577 1489272000000, 1489272000000, 1489273000000,
578 1489274000000};
579 std::shared_ptr<Array> date64_array;
580 ArrayFromVector<Date64Type, int64_t>(is_valid, date64_values, &date64_array);
581
582 *out = RecordBatch::Make(schema, date32_array->length(), {date32_array, date64_array});
583 return Status::OK();
584}
585
586Status MakeTimestamps(std::shared_ptr<RecordBatch>* out) {
587 std::vector<bool> is_valid = {true, true, true, false, true, true, true};
588 auto f0 = field("f0", timestamp(TimeUnit::MILLI));
589 auto f1 = field("f1", timestamp(TimeUnit::NANO, "America/New_York"));
590 auto f2 = field("f2", timestamp(TimeUnit::SECOND));
591 auto schema = ::arrow::schema({f0, f1, f2});
592
593 std::vector<int64_t> ts_values = {1489269000000, 1489270000000, 1489271000000,
594 1489272000000, 1489272000000, 1489273000000};
595
596 std::shared_ptr<Array> a0, a1, a2;
597 ArrayFromVector<TimestampType, int64_t>(f0->type(), is_valid, ts_values, &a0);
598 ArrayFromVector<TimestampType, int64_t>(f1->type(), is_valid, ts_values, &a1);
599 ArrayFromVector<TimestampType, int64_t>(f2->type(), is_valid, ts_values, &a2);
600
601 *out = RecordBatch::Make(schema, a0->length(), {a0, a1, a2});
602 return Status::OK();
603}
604
605Status MakeTimes(std::shared_ptr<RecordBatch>* out) {
606 std::vector<bool> is_valid = {true, true, true, false, true, true, true};
607 auto f0 = field("f0", time32(TimeUnit::MILLI));
608 auto f1 = field("f1", time64(TimeUnit::NANO));
609 auto f2 = field("f2", time32(TimeUnit::SECOND));
610 auto f3 = field("f3", time64(TimeUnit::NANO));
611 auto schema = ::arrow::schema({f0, f1, f2, f3});
612
613 std::vector<int32_t> t32_values = {1489269000, 1489270000, 1489271000,
614 1489272000, 1489272000, 1489273000};
615 std::vector<int64_t> t64_values = {1489269000000, 1489270000000, 1489271000000,
616 1489272000000, 1489272000000, 1489273000000};
617
618 std::shared_ptr<Array> a0, a1, a2, a3;
619 ArrayFromVector<Time32Type, int32_t>(f0->type(), is_valid, t32_values, &a0);
620 ArrayFromVector<Time64Type, int64_t>(f1->type(), is_valid, t64_values, &a1);
621 ArrayFromVector<Time32Type, int32_t>(f2->type(), is_valid, t32_values, &a2);
622 ArrayFromVector<Time64Type, int64_t>(f3->type(), is_valid, t64_values, &a3);
623
624 *out = RecordBatch::Make(schema, a0->length(), {a0, a1, a2, a3});
625 return Status::OK();
626}
627
628template <typename BuilderType, typename T>
629void AppendValues(const std::vector<bool>& is_valid, const std::vector<T>& values,
630 BuilderType* builder) {
631 for (size_t i = 0; i < values.size(); ++i) {
632 if (is_valid[i]) {
633 ASSERT_OK(builder->Append(values[i]));
634 } else {
635 ASSERT_OK(builder->AppendNull());
636 }
637 }
638}
639
640Status MakeFWBinary(std::shared_ptr<RecordBatch>* out) {
641 std::vector<bool> is_valid = {true, true, true, false};
642 auto f0 = field("f0", fixed_size_binary(4));
643 auto f1 = field("f1", fixed_size_binary(0));
644 auto schema = ::arrow::schema({f0, f1});
645
646 std::shared_ptr<Array> a1, a2;
647
648 FixedSizeBinaryBuilder b1(f0->type());
649 FixedSizeBinaryBuilder b2(f1->type());
650
651 std::vector<std::string> values1 = {"foo1", "foo2", "foo3", "foo4"};
652 AppendValues(is_valid, values1, &b1);
653
654 std::vector<std::string> values2 = {"", "", "", ""};
655 AppendValues(is_valid, values2, &b2);
656
657 RETURN_NOT_OK(b1.Finish(&a1));
658 RETURN_NOT_OK(b2.Finish(&a2));
659
660 *out = RecordBatch::Make(schema, a1->length(), {a1, a2});
661 return Status::OK();
662}
663
664Status MakeDecimal(std::shared_ptr<RecordBatch>* out) {
665 constexpr int kDecimalPrecision = 38;
666 auto type = decimal(kDecimalPrecision, 4);
667 auto f0 = field("f0", type);
668 auto f1 = field("f1", type);
669 auto schema = ::arrow::schema({f0, f1});
670
671 constexpr int kDecimalSize = 16;
672 constexpr int length = 10;
673
674 std::shared_ptr<Buffer> data, is_valid;
675 std::vector<uint8_t> is_valid_bytes(length);
676
677 RETURN_NOT_OK(AllocateBuffer(kDecimalSize * length, &data));
678
679 random_decimals(length, 1, kDecimalPrecision, data->mutable_data());
680 random_null_bytes(length, 0.1, is_valid_bytes.data());
681
682 RETURN_NOT_OK(BitUtil::BytesToBits(is_valid_bytes, default_memory_pool(), &is_valid));
683
684 auto a1 = std::make_shared<Decimal128Array>(f0->type(), length, data, is_valid,
685 kUnknownNullCount);
686
687 auto a2 = std::make_shared<Decimal128Array>(f1->type(), length, data);
688
689 *out = RecordBatch::Make(schema, length, {a1, a2});
690 return Status::OK();
691}
692
693Status MakeNull(std::shared_ptr<RecordBatch>* out) {
694 auto f0 = field("f0", null());
695
696 // Also put a non-null field to make sure we handle the null array buffers properly
697 auto f1 = field("f1", int64());
698
699 auto schema = ::arrow::schema({f0, f1});
700
701 auto a1 = std::make_shared<NullArray>(10);
702
703 std::vector<int64_t> int_values = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
704 std::vector<bool> is_valid = {true, true, true, false, false,
705 true, true, true, true, true};
706 std::shared_ptr<Array> a2;
707 ArrayFromVector<Int64Type, int64_t>(f1->type(), is_valid, int_values, &a2);
708
709 *out = RecordBatch::Make(schema, a1->length(), {a1, a2});
710 return Status::OK();
711}
712
713} // namespace ipc
714} // namespace arrow
715
716#endif // ARROW_IPC_TEST_COMMON_H
717