1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include <cstdint>
19#include <memory>
20#include <vector>
21
22#include <gtest/gtest.h>
23
24#include "arrow/array.h"
25#include "arrow/record_batch.h"
26#include "arrow/status.h"
27#include "arrow/table.h"
28#include "arrow/test-common.h"
29#include "arrow/test-util.h"
30#include "arrow/type.h"
31
32using std::shared_ptr;
33using std::vector;
34
35namespace arrow {
36
37std::shared_ptr<Column> column(const std::shared_ptr<Field>& field,
38 const std::vector<std::shared_ptr<Array>>& arrays) {
39 return std::make_shared<Column>(field, arrays);
40}
41
42class TestChunkedArray : public TestBase {
43 protected:
44 virtual void Construct() {
45 one_ = std::make_shared<ChunkedArray>(arrays_one_);
46 if (!arrays_another_.empty()) {
47 another_ = std::make_shared<ChunkedArray>(arrays_another_);
48 }
49 }
50
51 ArrayVector arrays_one_;
52 ArrayVector arrays_another_;
53
54 std::shared_ptr<ChunkedArray> one_;
55 std::shared_ptr<ChunkedArray> another_;
56};
57
58TEST_F(TestChunkedArray, BasicEquals) {
59 std::vector<bool> null_bitmap(100, true);
60 std::vector<int32_t> data(100, 1);
61 std::shared_ptr<Array> array;
62 ArrayFromVector<Int32Type, int32_t>(null_bitmap, data, &array);
63 arrays_one_.push_back(array);
64 arrays_another_.push_back(array);
65
66 Construct();
67 ASSERT_TRUE(one_->Equals(one_));
68 ASSERT_FALSE(one_->Equals(nullptr));
69 ASSERT_TRUE(one_->Equals(another_));
70 ASSERT_TRUE(one_->Equals(*another_.get()));
71}
72
73TEST_F(TestChunkedArray, EqualsDifferingTypes) {
74 std::vector<bool> null_bitmap(100, true);
75 std::vector<int32_t> data32(100, 1);
76 std::vector<int64_t> data64(100, 1);
77 std::shared_ptr<Array> array;
78 ArrayFromVector<Int32Type, int32_t>(null_bitmap, data32, &array);
79 arrays_one_.push_back(array);
80 ArrayFromVector<Int64Type, int64_t>(null_bitmap, data64, &array);
81 arrays_another_.push_back(array);
82
83 Construct();
84 ASSERT_FALSE(one_->Equals(another_));
85 ASSERT_FALSE(one_->Equals(*another_.get()));
86}
87
88TEST_F(TestChunkedArray, EqualsDifferingLengths) {
89 std::vector<bool> null_bitmap100(100, true);
90 std::vector<bool> null_bitmap101(101, true);
91 std::vector<int32_t> data100(100, 1);
92 std::vector<int32_t> data101(101, 1);
93 std::shared_ptr<Array> array;
94 ArrayFromVector<Int32Type, int32_t>(null_bitmap100, data100, &array);
95 arrays_one_.push_back(array);
96 ArrayFromVector<Int32Type, int32_t>(null_bitmap101, data101, &array);
97 arrays_another_.push_back(array);
98
99 Construct();
100 ASSERT_FALSE(one_->Equals(another_));
101 ASSERT_FALSE(one_->Equals(*another_.get()));
102
103 std::vector<bool> null_bitmap1(1, true);
104 std::vector<int32_t> data1(1, 1);
105 ArrayFromVector<Int32Type, int32_t>(null_bitmap1, data1, &array);
106 arrays_one_.push_back(array);
107
108 Construct();
109 ASSERT_TRUE(one_->Equals(another_));
110 ASSERT_TRUE(one_->Equals(*another_.get()));
111}
112
113TEST_F(TestChunkedArray, SliceEquals) {
114 arrays_one_.push_back(MakeRandomArray<Int32Array>(100));
115 arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
116 arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
117 Construct();
118
119 std::shared_ptr<ChunkedArray> slice = one_->Slice(125, 50);
120 ASSERT_EQ(slice->length(), 50);
121 AssertChunkedEqual(*one_->Slice(125, 50), *slice);
122
123 std::shared_ptr<ChunkedArray> slice2 = one_->Slice(75)->Slice(25)->Slice(25, 50);
124 ASSERT_EQ(slice2->length(), 50);
125 AssertChunkedEqual(*slice, *slice2);
126
127 // Making empty slices of a ChunkedArray
128 std::shared_ptr<ChunkedArray> slice3 = one_->Slice(one_->length(), 99);
129 ASSERT_EQ(slice3->length(), 0);
130 ASSERT_EQ(slice3->num_chunks(), 0);
131 ASSERT_TRUE(slice3->type()->Equals(one_->type()));
132
133 std::shared_ptr<ChunkedArray> slice4 = one_->Slice(10, 0);
134 ASSERT_EQ(slice4->length(), 0);
135 ASSERT_EQ(slice4->num_chunks(), 0);
136 ASSERT_TRUE(slice4->type()->Equals(one_->type()));
137
138 // Slicing an empty ChunkedArray
139 std::shared_ptr<ChunkedArray> slice5 = slice4->Slice(0, 10);
140 ASSERT_EQ(slice5->length(), 0);
141 ASSERT_EQ(slice5->num_chunks(), 0);
142 ASSERT_TRUE(slice5->type()->Equals(one_->type()));
143}
144
145class TestColumn : public TestChunkedArray {
146 protected:
147 void Construct() override {
148 TestChunkedArray::Construct();
149
150 one_col_ = std::make_shared<Column>(one_field_, one_);
151 another_col_ = std::make_shared<Column>(another_field_, another_);
152 }
153
154 std::shared_ptr<ChunkedArray> data_;
155 std::unique_ptr<Column> column_;
156
157 std::shared_ptr<Field> one_field_;
158 std::shared_ptr<Field> another_field_;
159
160 std::shared_ptr<Column> one_col_;
161 std::shared_ptr<Column> another_col_;
162};
163
164TEST_F(TestColumn, BasicAPI) {
165 ArrayVector arrays;
166 arrays.push_back(MakeRandomArray<Int32Array>(100));
167 arrays.push_back(MakeRandomArray<Int32Array>(100, 10));
168 arrays.push_back(MakeRandomArray<Int32Array>(100, 20));
169
170 auto f0 = field("c0", int32());
171 column_.reset(new Column(f0, arrays));
172
173 ASSERT_EQ("c0", column_->name());
174 ASSERT_TRUE(column_->type()->Equals(int32()));
175 ASSERT_EQ(300, column_->length());
176 ASSERT_EQ(30, column_->null_count());
177 ASSERT_EQ(3, column_->data()->num_chunks());
178}
179
180TEST_F(TestColumn, ChunksInhomogeneous) {
181 ArrayVector arrays;
182 arrays.push_back(MakeRandomArray<Int32Array>(100));
183 arrays.push_back(MakeRandomArray<Int32Array>(100, 10));
184
185 auto f0 = field("c0", int32());
186 column_.reset(new Column(f0, arrays));
187
188 ASSERT_OK(column_->ValidateData());
189
190 arrays.push_back(MakeRandomArray<Int16Array>(100, 10));
191 column_.reset(new Column(f0, arrays));
192 ASSERT_RAISES(Invalid, column_->ValidateData());
193}
194
195TEST_F(TestColumn, SliceEquals) {
196 arrays_one_.push_back(MakeRandomArray<Int32Array>(100));
197 arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
198 arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
199 one_field_ = field("column", int32());
200 Construct();
201
202 std::shared_ptr<Column> slice = one_col_->Slice(125, 50);
203 ASSERT_EQ(slice->length(), 50);
204 ASSERT_TRUE(slice->Equals(one_col_->Slice(125, 50)));
205
206 std::shared_ptr<Column> slice2 = one_col_->Slice(75)->Slice(25)->Slice(25, 50);
207 ASSERT_EQ(slice2->length(), 50);
208 ASSERT_TRUE(slice2->Equals(slice));
209}
210
211TEST_F(TestColumn, Equals) {
212 std::vector<bool> null_bitmap(100, true);
213 std::vector<int32_t> data(100, 1);
214 std::shared_ptr<Array> array;
215 ArrayFromVector<Int32Type, int32_t>(null_bitmap, data, &array);
216 arrays_one_.push_back(array);
217 arrays_another_.push_back(array);
218
219 one_field_ = field("column", int32());
220 another_field_ = field("column", int32());
221
222 Construct();
223 ASSERT_TRUE(one_col_->Equals(one_col_));
224 ASSERT_FALSE(one_col_->Equals(nullptr));
225 ASSERT_TRUE(one_col_->Equals(another_col_));
226 ASSERT_TRUE(one_col_->Equals(*another_col_.get()));
227
228 // Field is different
229 another_field_ = field("two", int32());
230 Construct();
231 ASSERT_FALSE(one_col_->Equals(another_col_));
232 ASSERT_FALSE(one_col_->Equals(*another_col_.get()));
233
234 // ChunkedArray is different
235 another_field_ = field("column", int32());
236 arrays_another_.push_back(array);
237 Construct();
238 ASSERT_FALSE(one_col_->Equals(another_col_));
239 ASSERT_FALSE(one_col_->Equals(*another_col_.get()));
240}
241
242class TestTable : public TestBase {
243 public:
244 void MakeExample1(int length) {
245 auto f0 = field("f0", int32());
246 auto f1 = field("f1", uint8());
247 auto f2 = field("f2", int16());
248
249 vector<shared_ptr<Field>> fields = {f0, f1, f2};
250 schema_ = std::make_shared<Schema>(fields);
251
252 arrays_ = {MakeRandomArray<Int32Array>(length), MakeRandomArray<UInt8Array>(length),
253 MakeRandomArray<Int16Array>(length)};
254
255 columns_ = {std::make_shared<Column>(schema_->field(0), arrays_[0]),
256 std::make_shared<Column>(schema_->field(1), arrays_[1]),
257 std::make_shared<Column>(schema_->field(2), arrays_[2])};
258 }
259
260 protected:
261 std::shared_ptr<Table> table_;
262 shared_ptr<Schema> schema_;
263
264 std::vector<std::shared_ptr<Array>> arrays_;
265 std::vector<std::shared_ptr<Column>> columns_;
266};
267
268TEST_F(TestTable, EmptySchema) {
269 auto empty_schema = ::arrow::schema({});
270 table_ = Table::Make(empty_schema, columns_);
271 ASSERT_OK(table_->Validate());
272 ASSERT_EQ(0, table_->num_rows());
273 ASSERT_EQ(0, table_->num_columns());
274}
275
276TEST_F(TestTable, Ctors) {
277 const int length = 100;
278 MakeExample1(length);
279
280 table_ = Table::Make(schema_, columns_);
281 ASSERT_OK(table_->Validate());
282 ASSERT_EQ(length, table_->num_rows());
283 ASSERT_EQ(3, table_->num_columns());
284
285 auto array_ctor = Table::Make(schema_, arrays_);
286 ASSERT_TRUE(table_->Equals(*array_ctor));
287
288 table_ = Table::Make(schema_, columns_, length);
289 ASSERT_OK(table_->Validate());
290 ASSERT_EQ(length, table_->num_rows());
291
292 table_ = Table::Make(schema_, arrays_);
293 ASSERT_OK(table_->Validate());
294 ASSERT_EQ(length, table_->num_rows());
295 ASSERT_EQ(3, table_->num_columns());
296}
297
298TEST_F(TestTable, Metadata) {
299 const int length = 100;
300 MakeExample1(length);
301
302 table_ = Table::Make(schema_, columns_);
303
304 ASSERT_TRUE(table_->schema()->Equals(*schema_));
305
306 auto col = table_->column(0);
307 ASSERT_EQ(schema_->field(0)->name(), col->name());
308 ASSERT_EQ(schema_->field(0)->type(), col->type());
309}
310
311TEST_F(TestTable, InvalidColumns) {
312 // Check that columns are all the same length
313 const int length = 100;
314 MakeExample1(length);
315
316 table_ = Table::Make(schema_, columns_, length - 1);
317 ASSERT_RAISES(Invalid, table_->Validate());
318
319 columns_.clear();
320
321 // Wrong number of columns
322 table_ = Table::Make(schema_, columns_, length);
323 ASSERT_RAISES(Invalid, table_->Validate());
324
325 columns_ = {
326 std::make_shared<Column>(schema_->field(0), MakeRandomArray<Int32Array>(length)),
327 std::make_shared<Column>(schema_->field(1), MakeRandomArray<UInt8Array>(length)),
328 std::make_shared<Column>(schema_->field(2),
329 MakeRandomArray<Int16Array>(length - 1))};
330
331 table_ = Table::Make(schema_, columns_, length);
332 ASSERT_RAISES(Invalid, table_->Validate());
333}
334
335TEST_F(TestTable, Equals) {
336 const int length = 100;
337 MakeExample1(length);
338
339 table_ = Table::Make(schema_, columns_);
340
341 ASSERT_TRUE(table_->Equals(*table_));
342 // Differing schema
343 auto f0 = field("f3", int32());
344 auto f1 = field("f4", uint8());
345 auto f2 = field("f5", int16());
346 vector<shared_ptr<Field>> fields = {f0, f1, f2};
347 auto other_schema = std::make_shared<Schema>(fields);
348 auto other = Table::Make(other_schema, columns_);
349 ASSERT_FALSE(table_->Equals(*other));
350 // Differing columns
351 std::vector<std::shared_ptr<Column>> other_columns = {
352 std::make_shared<Column>(schema_->field(0),
353 MakeRandomArray<Int32Array>(length, 10)),
354 std::make_shared<Column>(schema_->field(1),
355 MakeRandomArray<UInt8Array>(length, 10)),
356 std::make_shared<Column>(schema_->field(2),
357 MakeRandomArray<Int16Array>(length, 10))};
358
359 other = Table::Make(schema_, other_columns);
360 ASSERT_FALSE(table_->Equals(*other));
361}
362
363TEST_F(TestTable, FromRecordBatches) {
364 const int64_t length = 10;
365 MakeExample1(length);
366
367 auto batch1 = RecordBatch::Make(schema_, length, arrays_);
368
369 std::shared_ptr<Table> result, expected;
370 ASSERT_OK(Table::FromRecordBatches({batch1}, &result));
371
372 expected = Table::Make(schema_, columns_);
373 ASSERT_TRUE(result->Equals(*expected));
374
375 std::vector<std::shared_ptr<Column>> other_columns;
376 for (int i = 0; i < schema_->num_fields(); ++i) {
377 std::vector<std::shared_ptr<Array>> col_arrays = {arrays_[i], arrays_[i]};
378 other_columns.push_back(std::make_shared<Column>(schema_->field(i), col_arrays));
379 }
380
381 ASSERT_OK(Table::FromRecordBatches({batch1, batch1}, &result));
382 expected = Table::Make(schema_, other_columns);
383 ASSERT_TRUE(result->Equals(*expected));
384
385 // Error states
386 std::vector<std::shared_ptr<RecordBatch>> empty_batches;
387 ASSERT_RAISES(Invalid, Table::FromRecordBatches(empty_batches, &result));
388
389 auto other_schema = ::arrow::schema({schema_->field(0), schema_->field(1)});
390
391 std::vector<std::shared_ptr<Array>> other_arrays = {arrays_[0], arrays_[1]};
392 auto batch2 = RecordBatch::Make(other_schema, length, other_arrays);
393 ASSERT_RAISES(Invalid, Table::FromRecordBatches({batch1, batch2}, &result));
394}
395
396TEST_F(TestTable, FromRecordBatchesZeroLength) {
397 // ARROW-2307
398 MakeExample1(10);
399
400 std::shared_ptr<Table> result;
401 ASSERT_OK(Table::FromRecordBatches(schema_, {}, &result));
402
403 ASSERT_EQ(0, result->num_rows());
404 ASSERT_TRUE(result->schema()->Equals(*schema_));
405}
406
407TEST_F(TestTable, ConcatenateTables) {
408 const int64_t length = 10;
409
410 MakeExample1(length);
411 auto batch1 = RecordBatch::Make(schema_, length, arrays_);
412
413 // generate different data
414 MakeExample1(length);
415 auto batch2 = RecordBatch::Make(schema_, length, arrays_);
416
417 std::shared_ptr<Table> t1, t2, t3, result, expected;
418 ASSERT_OK(Table::FromRecordBatches({batch1}, &t1));
419 ASSERT_OK(Table::FromRecordBatches({batch2}, &t2));
420
421 ASSERT_OK(ConcatenateTables({t1, t2}, &result));
422 ASSERT_OK(Table::FromRecordBatches({batch1, batch2}, &expected));
423 AssertTablesEqual(*expected, *result);
424
425 // Error states
426 std::vector<std::shared_ptr<Table>> empty_tables;
427 ASSERT_RAISES(Invalid, ConcatenateTables(empty_tables, &result));
428
429 auto other_schema = ::arrow::schema({schema_->field(0), schema_->field(1)});
430
431 std::vector<std::shared_ptr<Array>> other_arrays = {arrays_[0], arrays_[1]};
432 auto batch3 = RecordBatch::Make(other_schema, length, other_arrays);
433 ASSERT_OK(Table::FromRecordBatches({batch3}, &t3));
434
435 ASSERT_RAISES(Invalid, ConcatenateTables({t1, t3}, &result));
436}
437
438TEST_F(TestTable, RemoveColumn) {
439 const int64_t length = 10;
440 MakeExample1(length);
441
442 auto table_sp = Table::Make(schema_, columns_);
443 const Table& table = *table_sp;
444
445 std::shared_ptr<Table> result;
446 ASSERT_OK(table.RemoveColumn(0, &result));
447
448 auto ex_schema = ::arrow::schema({schema_->field(1), schema_->field(2)});
449 std::vector<std::shared_ptr<Column>> ex_columns = {table.column(1), table.column(2)};
450
451 auto expected = Table::Make(ex_schema, ex_columns);
452 ASSERT_TRUE(result->Equals(*expected));
453
454 ASSERT_OK(table.RemoveColumn(1, &result));
455 ex_schema = ::arrow::schema({schema_->field(0), schema_->field(2)});
456 ex_columns = {table.column(0), table.column(2)};
457
458 expected = Table::Make(ex_schema, ex_columns);
459 ASSERT_TRUE(result->Equals(*expected));
460
461 ASSERT_OK(table.RemoveColumn(2, &result));
462 ex_schema = ::arrow::schema({schema_->field(0), schema_->field(1)});
463 ex_columns = {table.column(0), table.column(1)};
464 expected = Table::Make(ex_schema, ex_columns);
465 ASSERT_TRUE(result->Equals(*expected));
466}
467
468TEST_F(TestTable, SetColumn) {
469 const int64_t length = 10;
470 MakeExample1(length);
471
472 auto table_sp = Table::Make(schema_, columns_);
473 const Table& table = *table_sp;
474
475 std::shared_ptr<Table> result;
476 ASSERT_OK(table.SetColumn(0, table.column(1), &result));
477
478 auto ex_schema =
479 ::arrow::schema({schema_->field(1), schema_->field(1), schema_->field(2)});
480 std::vector<std::shared_ptr<Column>> ex_columns = {table.column(1), table.column(1),
481 table.column(2)};
482
483 auto expected = Table::Make(ex_schema, ex_columns);
484 ASSERT_TRUE(result->Equals(*expected));
485}
486
487TEST_F(TestTable, RemoveColumnEmpty) {
488 // ARROW-1865
489 const int64_t length = 10;
490
491 auto f0 = field("f0", int32());
492 auto schema = ::arrow::schema({f0});
493 auto a0 = MakeRandomArray<Int32Array>(length);
494
495 auto table = Table::Make(schema, {std::make_shared<Column>(f0, a0)});
496
497 std::shared_ptr<Table> empty;
498 ASSERT_OK(table->RemoveColumn(0, &empty));
499
500 ASSERT_EQ(table->num_rows(), empty->num_rows());
501
502 std::shared_ptr<Table> added;
503 ASSERT_OK(empty->AddColumn(0, table->column(0), &added));
504 ASSERT_EQ(table->num_rows(), added->num_rows());
505}
506
507TEST_F(TestTable, AddColumn) {
508 const int64_t length = 10;
509 MakeExample1(length);
510
511 auto table_sp = Table::Make(schema_, columns_);
512 const Table& table = *table_sp;
513
514 std::shared_ptr<Table> result;
515 // Some negative tests with invalid index
516 Status status = table.AddColumn(10, columns_[0], &result);
517 ASSERT_TRUE(status.IsInvalid());
518 status = table.AddColumn(4, columns_[0], &result);
519 ASSERT_TRUE(status.IsInvalid());
520 status = table.AddColumn(-1, columns_[0], &result);
521 ASSERT_TRUE(status.IsInvalid());
522
523 // Add column with wrong length
524 auto longer_col = std::make_shared<Column>(schema_->field(0),
525 MakeRandomArray<Int32Array>(length + 1));
526 status = table.AddColumn(0, longer_col, &result);
527 ASSERT_TRUE(status.IsInvalid());
528
529 // Add column 0 in different places
530 ASSERT_OK(table.AddColumn(0, columns_[0], &result));
531 auto ex_schema = ::arrow::schema(
532 {schema_->field(0), schema_->field(0), schema_->field(1), schema_->field(2)});
533
534 auto expected = Table::Make(
535 ex_schema, {table.column(0), table.column(0), table.column(1), table.column(2)});
536 ASSERT_TRUE(result->Equals(*expected));
537
538 ASSERT_OK(table.AddColumn(1, columns_[0], &result));
539 ex_schema = ::arrow::schema(
540 {schema_->field(0), schema_->field(0), schema_->field(1), schema_->field(2)});
541
542 expected = Table::Make(
543 ex_schema, {table.column(0), table.column(0), table.column(1), table.column(2)});
544 ASSERT_TRUE(result->Equals(*expected));
545
546 ASSERT_OK(table.AddColumn(2, columns_[0], &result));
547 ex_schema = ::arrow::schema(
548 {schema_->field(0), schema_->field(1), schema_->field(0), schema_->field(2)});
549 expected = Table::Make(
550 ex_schema, {table.column(0), table.column(1), table.column(0), table.column(2)});
551 ASSERT_TRUE(result->Equals(*expected));
552
553 ASSERT_OK(table.AddColumn(3, columns_[0], &result));
554 ex_schema = ::arrow::schema(
555 {schema_->field(0), schema_->field(1), schema_->field(2), schema_->field(0)});
556 expected = Table::Make(
557 ex_schema, {table.column(0), table.column(1), table.column(2), table.column(0)});
558 ASSERT_TRUE(result->Equals(*expected));
559}
560
561class TestRecordBatch : public TestBase {};
562
563TEST_F(TestRecordBatch, Equals) {
564 const int length = 10;
565
566 auto f0 = field("f0", int32());
567 auto f1 = field("f1", uint8());
568 auto f2 = field("f2", int16());
569
570 vector<shared_ptr<Field>> fields = {f0, f1, f2};
571 auto schema = ::arrow::schema({f0, f1, f2});
572 auto schema2 = ::arrow::schema({f0, f1});
573
574 auto a0 = MakeRandomArray<Int32Array>(length);
575 auto a1 = MakeRandomArray<UInt8Array>(length);
576 auto a2 = MakeRandomArray<Int16Array>(length);
577
578 auto b1 = RecordBatch::Make(schema, length, {a0, a1, a2});
579 auto b3 = RecordBatch::Make(schema2, length, {a0, a1});
580 auto b4 = RecordBatch::Make(schema, length, {a0, a1, a1});
581
582 ASSERT_TRUE(b1->Equals(*b1));
583 ASSERT_FALSE(b1->Equals(*b3));
584 ASSERT_FALSE(b1->Equals(*b4));
585}
586
587TEST_F(TestRecordBatch, Validate) {
588 const int length = 10;
589
590 auto f0 = field("f0", int32());
591 auto f1 = field("f1", uint8());
592 auto f2 = field("f2", int16());
593
594 auto schema = ::arrow::schema({f0, f1, f2});
595
596 auto a0 = MakeRandomArray<Int32Array>(length);
597 auto a1 = MakeRandomArray<UInt8Array>(length);
598 auto a2 = MakeRandomArray<Int16Array>(length);
599 auto a3 = MakeRandomArray<Int16Array>(5);
600
601 auto b1 = RecordBatch::Make(schema, length, {a0, a1, a2});
602
603 ASSERT_OK(b1->Validate());
604
605 // Length mismatch
606 auto b2 = RecordBatch::Make(schema, length, {a0, a1, a3});
607 ASSERT_RAISES(Invalid, b2->Validate());
608
609 // Type mismatch
610 auto b3 = RecordBatch::Make(schema, length, {a0, a1, a0});
611 ASSERT_RAISES(Invalid, b3->Validate());
612}
613
614TEST_F(TestRecordBatch, Slice) {
615 const int length = 10;
616
617 auto f0 = field("f0", int32());
618 auto f1 = field("f1", uint8());
619
620 vector<shared_ptr<Field>> fields = {f0, f1};
621 auto schema = ::arrow::schema(fields);
622
623 auto a0 = MakeRandomArray<Int32Array>(length);
624 auto a1 = MakeRandomArray<UInt8Array>(length);
625
626 auto batch = RecordBatch::Make(schema, length, {a0, a1});
627
628 auto batch_slice = batch->Slice(2);
629 auto batch_slice2 = batch->Slice(1, 5);
630
631 ASSERT_EQ(batch_slice->num_rows(), batch->num_rows() - 2);
632
633 for (int i = 0; i < batch->num_columns(); ++i) {
634 ASSERT_EQ(2, batch_slice->column(i)->offset());
635 ASSERT_EQ(length - 2, batch_slice->column(i)->length());
636
637 ASSERT_EQ(1, batch_slice2->column(i)->offset());
638 ASSERT_EQ(5, batch_slice2->column(i)->length());
639 }
640}
641
642TEST_F(TestRecordBatch, AddColumn) {
643 const int length = 10;
644
645 auto field1 = field("f1", int32());
646 auto field2 = field("f2", uint8());
647 auto field3 = field("f3", int16());
648
649 auto schema1 = ::arrow::schema({field1, field2});
650 auto schema2 = ::arrow::schema({field2, field3});
651 auto schema3 = ::arrow::schema({field2});
652
653 auto array1 = MakeRandomArray<Int32Array>(length);
654 auto array2 = MakeRandomArray<UInt8Array>(length);
655 auto array3 = MakeRandomArray<Int16Array>(length);
656
657 auto batch1 = RecordBatch::Make(schema1, length, {array1, array2});
658 auto batch2 = RecordBatch::Make(schema2, length, {array2, array3});
659 auto batch3 = RecordBatch::Make(schema3, length, {array2});
660
661 const RecordBatch& batch = *batch3;
662 std::shared_ptr<RecordBatch> result;
663
664 // Negative tests with invalid index
665 Status status = batch.AddColumn(5, field1, array1, &result);
666 ASSERT_TRUE(status.IsInvalid());
667 status = batch.AddColumn(2, field1, array1, &result);
668 ASSERT_TRUE(status.IsInvalid());
669 status = batch.AddColumn(-1, field1, array1, &result);
670 ASSERT_TRUE(status.IsInvalid());
671
672 // Negative test with wrong length
673 auto longer_col = MakeRandomArray<Int32Array>(length + 1);
674 status = batch.AddColumn(0, field1, longer_col, &result);
675 ASSERT_TRUE(status.IsInvalid());
676
677 // Negative test with mismatch type
678 status = batch.AddColumn(0, field1, array2, &result);
679 ASSERT_TRUE(status.IsInvalid());
680
681 ASSERT_OK(batch.AddColumn(0, field1, array1, &result));
682 ASSERT_TRUE(result->Equals(*batch1));
683
684 ASSERT_OK(batch.AddColumn(1, field3, array3, &result));
685 ASSERT_TRUE(result->Equals(*batch2));
686
687 std::shared_ptr<RecordBatch> result2;
688 ASSERT_OK(batch.AddColumn(1, "f3", array3, &result2));
689 ASSERT_TRUE(result2->Equals(*result));
690
691 ASSERT_TRUE(result2->schema()->field(1)->nullable());
692}
693
694TEST_F(TestRecordBatch, RemoveColumn) {
695 const int length = 10;
696
697 auto field1 = field("f1", int32());
698 auto field2 = field("f2", uint8());
699 auto field3 = field("f3", int16());
700
701 auto schema1 = ::arrow::schema({field1, field2, field3});
702 auto schema2 = ::arrow::schema({field2, field3});
703 auto schema3 = ::arrow::schema({field1, field3});
704 auto schema4 = ::arrow::schema({field1, field2});
705
706 auto array1 = MakeRandomArray<Int32Array>(length);
707 auto array2 = MakeRandomArray<UInt8Array>(length);
708 auto array3 = MakeRandomArray<Int16Array>(length);
709
710 auto batch1 = RecordBatch::Make(schema1, length, {array1, array2, array3});
711 auto batch2 = RecordBatch::Make(schema2, length, {array2, array3});
712 auto batch3 = RecordBatch::Make(schema3, length, {array1, array3});
713 auto batch4 = RecordBatch::Make(schema4, length, {array1, array2});
714
715 const RecordBatch& batch = *batch1;
716 std::shared_ptr<RecordBatch> result;
717
718 // Negative tests with invalid index
719 Status status = batch.RemoveColumn(3, &result);
720 ASSERT_TRUE(status.IsInvalid());
721 status = batch.RemoveColumn(-1, &result);
722 ASSERT_TRUE(status.IsInvalid());
723
724 ASSERT_OK(batch.RemoveColumn(0, &result));
725 ASSERT_TRUE(result->Equals(*batch2));
726
727 ASSERT_OK(batch.RemoveColumn(1, &result));
728 ASSERT_TRUE(result->Equals(*batch3));
729
730 ASSERT_OK(batch.RemoveColumn(2, &result));
731 ASSERT_TRUE(result->Equals(*batch4));
732}
733
734TEST_F(TestRecordBatch, RemoveColumnEmpty) {
735 const int length = 10;
736
737 auto field1 = field("f1", int32());
738 auto schema1 = ::arrow::schema({field1});
739 auto array1 = MakeRandomArray<Int32Array>(length);
740 auto batch1 = RecordBatch::Make(schema1, length, {array1});
741
742 std::shared_ptr<RecordBatch> empty;
743 ASSERT_OK(batch1->RemoveColumn(0, &empty));
744 ASSERT_EQ(batch1->num_rows(), empty->num_rows());
745
746 std::shared_ptr<RecordBatch> added;
747 ASSERT_OK(empty->AddColumn(0, field1, array1, &added));
748 ASSERT_TRUE(added->Equals(*batch1));
749}
750
751class TestTableBatchReader : public TestBase {};
752
753TEST_F(TestTableBatchReader, ReadNext) {
754 ArrayVector c1, c2;
755
756 auto a1 = MakeRandomArray<Int32Array>(10);
757 auto a2 = MakeRandomArray<Int32Array>(20);
758 auto a3 = MakeRandomArray<Int32Array>(30);
759 auto a4 = MakeRandomArray<Int32Array>(10);
760
761 auto sch1 = arrow::schema({field("f1", int32()), field("f2", int32())});
762
763 std::vector<std::shared_ptr<Column>> columns;
764
765 std::shared_ptr<RecordBatch> batch;
766
767 columns = {column(sch1->field(0), {a1, a4, a2}), column(sch1->field(1), {a2, a2})};
768 auto t1 = Table::Make(sch1, columns);
769
770 TableBatchReader i1(*t1);
771
772 ASSERT_OK(i1.ReadNext(&batch));
773 ASSERT_EQ(10, batch->num_rows());
774
775 ASSERT_OK(i1.ReadNext(&batch));
776 ASSERT_EQ(10, batch->num_rows());
777
778 ASSERT_OK(i1.ReadNext(&batch));
779 ASSERT_EQ(20, batch->num_rows());
780
781 ASSERT_OK(i1.ReadNext(&batch));
782 ASSERT_EQ(nullptr, batch);
783
784 columns = {column(sch1->field(0), {a1}), column(sch1->field(1), {a4})};
785 auto t2 = Table::Make(sch1, columns);
786
787 TableBatchReader i2(*t2);
788
789 ASSERT_OK(i2.ReadNext(&batch));
790 ASSERT_EQ(10, batch->num_rows());
791
792 // Ensure non-sliced
793 ASSERT_EQ(a1->data().get(), batch->column_data(0).get());
794 ASSERT_EQ(a4->data().get(), batch->column_data(1).get());
795
796 ASSERT_OK(i1.ReadNext(&batch));
797 ASSERT_EQ(nullptr, batch);
798}
799
800TEST_F(TestTableBatchReader, Chunksize) {
801 auto a1 = MakeRandomArray<Int32Array>(10);
802 auto a2 = MakeRandomArray<Int32Array>(20);
803 auto a3 = MakeRandomArray<Int32Array>(10);
804
805 auto sch1 = arrow::schema({field("f1", int32())});
806 auto t1 = Table::Make(sch1, {column(sch1->field(0), {a1, a2, a3})});
807
808 TableBatchReader i1(*t1);
809
810 i1.set_chunksize(15);
811
812 std::shared_ptr<RecordBatch> batch;
813 ASSERT_OK(i1.ReadNext(&batch));
814 ASSERT_OK(batch->Validate());
815 ASSERT_EQ(10, batch->num_rows());
816
817 ASSERT_OK(i1.ReadNext(&batch));
818 ASSERT_OK(batch->Validate());
819 ASSERT_EQ(15, batch->num_rows());
820
821 ASSERT_OK(i1.ReadNext(&batch));
822 ASSERT_OK(batch->Validate());
823 ASSERT_EQ(5, batch->num_rows());
824
825 ASSERT_OK(i1.ReadNext(&batch));
826 ASSERT_OK(batch->Validate());
827 ASSERT_EQ(10, batch->num_rows());
828
829 ASSERT_OK(i1.ReadNext(&batch));
830 ASSERT_EQ(nullptr, batch);
831}
832
833} // namespace arrow
834