1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #include <cstdint> |
19 | #include <memory> |
20 | #include <vector> |
21 | |
22 | #include <gtest/gtest.h> |
23 | |
24 | #include "arrow/array.h" |
25 | #include "arrow/record_batch.h" |
26 | #include "arrow/status.h" |
27 | #include "arrow/table.h" |
28 | #include "arrow/test-common.h" |
29 | #include "arrow/test-util.h" |
30 | #include "arrow/type.h" |
31 | |
32 | using std::shared_ptr; |
33 | using std::vector; |
34 | |
35 | namespace arrow { |
36 | |
37 | std::shared_ptr<Column> column(const std::shared_ptr<Field>& field, |
38 | const std::vector<std::shared_ptr<Array>>& arrays) { |
39 | return std::make_shared<Column>(field, arrays); |
40 | } |
41 | |
42 | class TestChunkedArray : public TestBase { |
43 | protected: |
44 | virtual void Construct() { |
45 | one_ = std::make_shared<ChunkedArray>(arrays_one_); |
46 | if (!arrays_another_.empty()) { |
47 | another_ = std::make_shared<ChunkedArray>(arrays_another_); |
48 | } |
49 | } |
50 | |
51 | ArrayVector arrays_one_; |
52 | ArrayVector arrays_another_; |
53 | |
54 | std::shared_ptr<ChunkedArray> one_; |
55 | std::shared_ptr<ChunkedArray> another_; |
56 | }; |
57 | |
58 | TEST_F(TestChunkedArray, BasicEquals) { |
59 | std::vector<bool> null_bitmap(100, true); |
60 | std::vector<int32_t> data(100, 1); |
61 | std::shared_ptr<Array> array; |
62 | ArrayFromVector<Int32Type, int32_t>(null_bitmap, data, &array); |
63 | arrays_one_.push_back(array); |
64 | arrays_another_.push_back(array); |
65 | |
66 | Construct(); |
67 | ASSERT_TRUE(one_->Equals(one_)); |
68 | ASSERT_FALSE(one_->Equals(nullptr)); |
69 | ASSERT_TRUE(one_->Equals(another_)); |
70 | ASSERT_TRUE(one_->Equals(*another_.get())); |
71 | } |
72 | |
73 | TEST_F(TestChunkedArray, EqualsDifferingTypes) { |
74 | std::vector<bool> null_bitmap(100, true); |
75 | std::vector<int32_t> data32(100, 1); |
76 | std::vector<int64_t> data64(100, 1); |
77 | std::shared_ptr<Array> array; |
78 | ArrayFromVector<Int32Type, int32_t>(null_bitmap, data32, &array); |
79 | arrays_one_.push_back(array); |
80 | ArrayFromVector<Int64Type, int64_t>(null_bitmap, data64, &array); |
81 | arrays_another_.push_back(array); |
82 | |
83 | Construct(); |
84 | ASSERT_FALSE(one_->Equals(another_)); |
85 | ASSERT_FALSE(one_->Equals(*another_.get())); |
86 | } |
87 | |
88 | TEST_F(TestChunkedArray, EqualsDifferingLengths) { |
89 | std::vector<bool> null_bitmap100(100, true); |
90 | std::vector<bool> null_bitmap101(101, true); |
91 | std::vector<int32_t> data100(100, 1); |
92 | std::vector<int32_t> data101(101, 1); |
93 | std::shared_ptr<Array> array; |
94 | ArrayFromVector<Int32Type, int32_t>(null_bitmap100, data100, &array); |
95 | arrays_one_.push_back(array); |
96 | ArrayFromVector<Int32Type, int32_t>(null_bitmap101, data101, &array); |
97 | arrays_another_.push_back(array); |
98 | |
99 | Construct(); |
100 | ASSERT_FALSE(one_->Equals(another_)); |
101 | ASSERT_FALSE(one_->Equals(*another_.get())); |
102 | |
103 | std::vector<bool> null_bitmap1(1, true); |
104 | std::vector<int32_t> data1(1, 1); |
105 | ArrayFromVector<Int32Type, int32_t>(null_bitmap1, data1, &array); |
106 | arrays_one_.push_back(array); |
107 | |
108 | Construct(); |
109 | ASSERT_TRUE(one_->Equals(another_)); |
110 | ASSERT_TRUE(one_->Equals(*another_.get())); |
111 | } |
112 | |
113 | TEST_F(TestChunkedArray, SliceEquals) { |
114 | arrays_one_.push_back(MakeRandomArray<Int32Array>(100)); |
115 | arrays_one_.push_back(MakeRandomArray<Int32Array>(50)); |
116 | arrays_one_.push_back(MakeRandomArray<Int32Array>(50)); |
117 | Construct(); |
118 | |
119 | std::shared_ptr<ChunkedArray> slice = one_->Slice(125, 50); |
120 | ASSERT_EQ(slice->length(), 50); |
121 | AssertChunkedEqual(*one_->Slice(125, 50), *slice); |
122 | |
123 | std::shared_ptr<ChunkedArray> slice2 = one_->Slice(75)->Slice(25)->Slice(25, 50); |
124 | ASSERT_EQ(slice2->length(), 50); |
125 | AssertChunkedEqual(*slice, *slice2); |
126 | |
127 | // Making empty slices of a ChunkedArray |
128 | std::shared_ptr<ChunkedArray> slice3 = one_->Slice(one_->length(), 99); |
129 | ASSERT_EQ(slice3->length(), 0); |
130 | ASSERT_EQ(slice3->num_chunks(), 0); |
131 | ASSERT_TRUE(slice3->type()->Equals(one_->type())); |
132 | |
133 | std::shared_ptr<ChunkedArray> slice4 = one_->Slice(10, 0); |
134 | ASSERT_EQ(slice4->length(), 0); |
135 | ASSERT_EQ(slice4->num_chunks(), 0); |
136 | ASSERT_TRUE(slice4->type()->Equals(one_->type())); |
137 | |
138 | // Slicing an empty ChunkedArray |
139 | std::shared_ptr<ChunkedArray> slice5 = slice4->Slice(0, 10); |
140 | ASSERT_EQ(slice5->length(), 0); |
141 | ASSERT_EQ(slice5->num_chunks(), 0); |
142 | ASSERT_TRUE(slice5->type()->Equals(one_->type())); |
143 | } |
144 | |
145 | class TestColumn : public TestChunkedArray { |
146 | protected: |
147 | void Construct() override { |
148 | TestChunkedArray::Construct(); |
149 | |
150 | one_col_ = std::make_shared<Column>(one_field_, one_); |
151 | another_col_ = std::make_shared<Column>(another_field_, another_); |
152 | } |
153 | |
154 | std::shared_ptr<ChunkedArray> data_; |
155 | std::unique_ptr<Column> column_; |
156 | |
157 | std::shared_ptr<Field> one_field_; |
158 | std::shared_ptr<Field> another_field_; |
159 | |
160 | std::shared_ptr<Column> one_col_; |
161 | std::shared_ptr<Column> another_col_; |
162 | }; |
163 | |
164 | TEST_F(TestColumn, BasicAPI) { |
165 | ArrayVector arrays; |
166 | arrays.push_back(MakeRandomArray<Int32Array>(100)); |
167 | arrays.push_back(MakeRandomArray<Int32Array>(100, 10)); |
168 | arrays.push_back(MakeRandomArray<Int32Array>(100, 20)); |
169 | |
170 | auto f0 = field("c0" , int32()); |
171 | column_.reset(new Column(f0, arrays)); |
172 | |
173 | ASSERT_EQ("c0" , column_->name()); |
174 | ASSERT_TRUE(column_->type()->Equals(int32())); |
175 | ASSERT_EQ(300, column_->length()); |
176 | ASSERT_EQ(30, column_->null_count()); |
177 | ASSERT_EQ(3, column_->data()->num_chunks()); |
178 | } |
179 | |
180 | TEST_F(TestColumn, ChunksInhomogeneous) { |
181 | ArrayVector arrays; |
182 | arrays.push_back(MakeRandomArray<Int32Array>(100)); |
183 | arrays.push_back(MakeRandomArray<Int32Array>(100, 10)); |
184 | |
185 | auto f0 = field("c0" , int32()); |
186 | column_.reset(new Column(f0, arrays)); |
187 | |
188 | ASSERT_OK(column_->ValidateData()); |
189 | |
190 | arrays.push_back(MakeRandomArray<Int16Array>(100, 10)); |
191 | column_.reset(new Column(f0, arrays)); |
192 | ASSERT_RAISES(Invalid, column_->ValidateData()); |
193 | } |
194 | |
195 | TEST_F(TestColumn, SliceEquals) { |
196 | arrays_one_.push_back(MakeRandomArray<Int32Array>(100)); |
197 | arrays_one_.push_back(MakeRandomArray<Int32Array>(50)); |
198 | arrays_one_.push_back(MakeRandomArray<Int32Array>(50)); |
199 | one_field_ = field("column" , int32()); |
200 | Construct(); |
201 | |
202 | std::shared_ptr<Column> slice = one_col_->Slice(125, 50); |
203 | ASSERT_EQ(slice->length(), 50); |
204 | ASSERT_TRUE(slice->Equals(one_col_->Slice(125, 50))); |
205 | |
206 | std::shared_ptr<Column> slice2 = one_col_->Slice(75)->Slice(25)->Slice(25, 50); |
207 | ASSERT_EQ(slice2->length(), 50); |
208 | ASSERT_TRUE(slice2->Equals(slice)); |
209 | } |
210 | |
211 | TEST_F(TestColumn, Equals) { |
212 | std::vector<bool> null_bitmap(100, true); |
213 | std::vector<int32_t> data(100, 1); |
214 | std::shared_ptr<Array> array; |
215 | ArrayFromVector<Int32Type, int32_t>(null_bitmap, data, &array); |
216 | arrays_one_.push_back(array); |
217 | arrays_another_.push_back(array); |
218 | |
219 | one_field_ = field("column" , int32()); |
220 | another_field_ = field("column" , int32()); |
221 | |
222 | Construct(); |
223 | ASSERT_TRUE(one_col_->Equals(one_col_)); |
224 | ASSERT_FALSE(one_col_->Equals(nullptr)); |
225 | ASSERT_TRUE(one_col_->Equals(another_col_)); |
226 | ASSERT_TRUE(one_col_->Equals(*another_col_.get())); |
227 | |
228 | // Field is different |
229 | another_field_ = field("two" , int32()); |
230 | Construct(); |
231 | ASSERT_FALSE(one_col_->Equals(another_col_)); |
232 | ASSERT_FALSE(one_col_->Equals(*another_col_.get())); |
233 | |
234 | // ChunkedArray is different |
235 | another_field_ = field("column" , int32()); |
236 | arrays_another_.push_back(array); |
237 | Construct(); |
238 | ASSERT_FALSE(one_col_->Equals(another_col_)); |
239 | ASSERT_FALSE(one_col_->Equals(*another_col_.get())); |
240 | } |
241 | |
242 | class TestTable : public TestBase { |
243 | public: |
244 | void MakeExample1(int length) { |
245 | auto f0 = field("f0" , int32()); |
246 | auto f1 = field("f1" , uint8()); |
247 | auto f2 = field("f2" , int16()); |
248 | |
249 | vector<shared_ptr<Field>> fields = {f0, f1, f2}; |
250 | schema_ = std::make_shared<Schema>(fields); |
251 | |
252 | arrays_ = {MakeRandomArray<Int32Array>(length), MakeRandomArray<UInt8Array>(length), |
253 | MakeRandomArray<Int16Array>(length)}; |
254 | |
255 | columns_ = {std::make_shared<Column>(schema_->field(0), arrays_[0]), |
256 | std::make_shared<Column>(schema_->field(1), arrays_[1]), |
257 | std::make_shared<Column>(schema_->field(2), arrays_[2])}; |
258 | } |
259 | |
260 | protected: |
261 | std::shared_ptr<Table> table_; |
262 | shared_ptr<Schema> schema_; |
263 | |
264 | std::vector<std::shared_ptr<Array>> arrays_; |
265 | std::vector<std::shared_ptr<Column>> columns_; |
266 | }; |
267 | |
268 | TEST_F(TestTable, EmptySchema) { |
269 | auto empty_schema = ::arrow::schema({}); |
270 | table_ = Table::Make(empty_schema, columns_); |
271 | ASSERT_OK(table_->Validate()); |
272 | ASSERT_EQ(0, table_->num_rows()); |
273 | ASSERT_EQ(0, table_->num_columns()); |
274 | } |
275 | |
276 | TEST_F(TestTable, Ctors) { |
277 | const int length = 100; |
278 | MakeExample1(length); |
279 | |
280 | table_ = Table::Make(schema_, columns_); |
281 | ASSERT_OK(table_->Validate()); |
282 | ASSERT_EQ(length, table_->num_rows()); |
283 | ASSERT_EQ(3, table_->num_columns()); |
284 | |
285 | auto array_ctor = Table::Make(schema_, arrays_); |
286 | ASSERT_TRUE(table_->Equals(*array_ctor)); |
287 | |
288 | table_ = Table::Make(schema_, columns_, length); |
289 | ASSERT_OK(table_->Validate()); |
290 | ASSERT_EQ(length, table_->num_rows()); |
291 | |
292 | table_ = Table::Make(schema_, arrays_); |
293 | ASSERT_OK(table_->Validate()); |
294 | ASSERT_EQ(length, table_->num_rows()); |
295 | ASSERT_EQ(3, table_->num_columns()); |
296 | } |
297 | |
298 | TEST_F(TestTable, Metadata) { |
299 | const int length = 100; |
300 | MakeExample1(length); |
301 | |
302 | table_ = Table::Make(schema_, columns_); |
303 | |
304 | ASSERT_TRUE(table_->schema()->Equals(*schema_)); |
305 | |
306 | auto col = table_->column(0); |
307 | ASSERT_EQ(schema_->field(0)->name(), col->name()); |
308 | ASSERT_EQ(schema_->field(0)->type(), col->type()); |
309 | } |
310 | |
311 | TEST_F(TestTable, InvalidColumns) { |
312 | // Check that columns are all the same length |
313 | const int length = 100; |
314 | MakeExample1(length); |
315 | |
316 | table_ = Table::Make(schema_, columns_, length - 1); |
317 | ASSERT_RAISES(Invalid, table_->Validate()); |
318 | |
319 | columns_.clear(); |
320 | |
321 | // Wrong number of columns |
322 | table_ = Table::Make(schema_, columns_, length); |
323 | ASSERT_RAISES(Invalid, table_->Validate()); |
324 | |
325 | columns_ = { |
326 | std::make_shared<Column>(schema_->field(0), MakeRandomArray<Int32Array>(length)), |
327 | std::make_shared<Column>(schema_->field(1), MakeRandomArray<UInt8Array>(length)), |
328 | std::make_shared<Column>(schema_->field(2), |
329 | MakeRandomArray<Int16Array>(length - 1))}; |
330 | |
331 | table_ = Table::Make(schema_, columns_, length); |
332 | ASSERT_RAISES(Invalid, table_->Validate()); |
333 | } |
334 | |
335 | TEST_F(TestTable, Equals) { |
336 | const int length = 100; |
337 | MakeExample1(length); |
338 | |
339 | table_ = Table::Make(schema_, columns_); |
340 | |
341 | ASSERT_TRUE(table_->Equals(*table_)); |
342 | // Differing schema |
343 | auto f0 = field("f3" , int32()); |
344 | auto f1 = field("f4" , uint8()); |
345 | auto f2 = field("f5" , int16()); |
346 | vector<shared_ptr<Field>> fields = {f0, f1, f2}; |
347 | auto other_schema = std::make_shared<Schema>(fields); |
348 | auto other = Table::Make(other_schema, columns_); |
349 | ASSERT_FALSE(table_->Equals(*other)); |
350 | // Differing columns |
351 | std::vector<std::shared_ptr<Column>> other_columns = { |
352 | std::make_shared<Column>(schema_->field(0), |
353 | MakeRandomArray<Int32Array>(length, 10)), |
354 | std::make_shared<Column>(schema_->field(1), |
355 | MakeRandomArray<UInt8Array>(length, 10)), |
356 | std::make_shared<Column>(schema_->field(2), |
357 | MakeRandomArray<Int16Array>(length, 10))}; |
358 | |
359 | other = Table::Make(schema_, other_columns); |
360 | ASSERT_FALSE(table_->Equals(*other)); |
361 | } |
362 | |
363 | TEST_F(TestTable, FromRecordBatches) { |
364 | const int64_t length = 10; |
365 | MakeExample1(length); |
366 | |
367 | auto batch1 = RecordBatch::Make(schema_, length, arrays_); |
368 | |
369 | std::shared_ptr<Table> result, expected; |
370 | ASSERT_OK(Table::FromRecordBatches({batch1}, &result)); |
371 | |
372 | expected = Table::Make(schema_, columns_); |
373 | ASSERT_TRUE(result->Equals(*expected)); |
374 | |
375 | std::vector<std::shared_ptr<Column>> other_columns; |
376 | for (int i = 0; i < schema_->num_fields(); ++i) { |
377 | std::vector<std::shared_ptr<Array>> col_arrays = {arrays_[i], arrays_[i]}; |
378 | other_columns.push_back(std::make_shared<Column>(schema_->field(i), col_arrays)); |
379 | } |
380 | |
381 | ASSERT_OK(Table::FromRecordBatches({batch1, batch1}, &result)); |
382 | expected = Table::Make(schema_, other_columns); |
383 | ASSERT_TRUE(result->Equals(*expected)); |
384 | |
385 | // Error states |
386 | std::vector<std::shared_ptr<RecordBatch>> empty_batches; |
387 | ASSERT_RAISES(Invalid, Table::FromRecordBatches(empty_batches, &result)); |
388 | |
389 | auto other_schema = ::arrow::schema({schema_->field(0), schema_->field(1)}); |
390 | |
391 | std::vector<std::shared_ptr<Array>> other_arrays = {arrays_[0], arrays_[1]}; |
392 | auto batch2 = RecordBatch::Make(other_schema, length, other_arrays); |
393 | ASSERT_RAISES(Invalid, Table::FromRecordBatches({batch1, batch2}, &result)); |
394 | } |
395 | |
396 | TEST_F(TestTable, FromRecordBatchesZeroLength) { |
397 | // ARROW-2307 |
398 | MakeExample1(10); |
399 | |
400 | std::shared_ptr<Table> result; |
401 | ASSERT_OK(Table::FromRecordBatches(schema_, {}, &result)); |
402 | |
403 | ASSERT_EQ(0, result->num_rows()); |
404 | ASSERT_TRUE(result->schema()->Equals(*schema_)); |
405 | } |
406 | |
407 | TEST_F(TestTable, ConcatenateTables) { |
408 | const int64_t length = 10; |
409 | |
410 | MakeExample1(length); |
411 | auto batch1 = RecordBatch::Make(schema_, length, arrays_); |
412 | |
413 | // generate different data |
414 | MakeExample1(length); |
415 | auto batch2 = RecordBatch::Make(schema_, length, arrays_); |
416 | |
417 | std::shared_ptr<Table> t1, t2, t3, result, expected; |
418 | ASSERT_OK(Table::FromRecordBatches({batch1}, &t1)); |
419 | ASSERT_OK(Table::FromRecordBatches({batch2}, &t2)); |
420 | |
421 | ASSERT_OK(ConcatenateTables({t1, t2}, &result)); |
422 | ASSERT_OK(Table::FromRecordBatches({batch1, batch2}, &expected)); |
423 | AssertTablesEqual(*expected, *result); |
424 | |
425 | // Error states |
426 | std::vector<std::shared_ptr<Table>> empty_tables; |
427 | ASSERT_RAISES(Invalid, ConcatenateTables(empty_tables, &result)); |
428 | |
429 | auto other_schema = ::arrow::schema({schema_->field(0), schema_->field(1)}); |
430 | |
431 | std::vector<std::shared_ptr<Array>> other_arrays = {arrays_[0], arrays_[1]}; |
432 | auto batch3 = RecordBatch::Make(other_schema, length, other_arrays); |
433 | ASSERT_OK(Table::FromRecordBatches({batch3}, &t3)); |
434 | |
435 | ASSERT_RAISES(Invalid, ConcatenateTables({t1, t3}, &result)); |
436 | } |
437 | |
438 | TEST_F(TestTable, RemoveColumn) { |
439 | const int64_t length = 10; |
440 | MakeExample1(length); |
441 | |
442 | auto table_sp = Table::Make(schema_, columns_); |
443 | const Table& table = *table_sp; |
444 | |
445 | std::shared_ptr<Table> result; |
446 | ASSERT_OK(table.RemoveColumn(0, &result)); |
447 | |
448 | auto ex_schema = ::arrow::schema({schema_->field(1), schema_->field(2)}); |
449 | std::vector<std::shared_ptr<Column>> ex_columns = {table.column(1), table.column(2)}; |
450 | |
451 | auto expected = Table::Make(ex_schema, ex_columns); |
452 | ASSERT_TRUE(result->Equals(*expected)); |
453 | |
454 | ASSERT_OK(table.RemoveColumn(1, &result)); |
455 | ex_schema = ::arrow::schema({schema_->field(0), schema_->field(2)}); |
456 | ex_columns = {table.column(0), table.column(2)}; |
457 | |
458 | expected = Table::Make(ex_schema, ex_columns); |
459 | ASSERT_TRUE(result->Equals(*expected)); |
460 | |
461 | ASSERT_OK(table.RemoveColumn(2, &result)); |
462 | ex_schema = ::arrow::schema({schema_->field(0), schema_->field(1)}); |
463 | ex_columns = {table.column(0), table.column(1)}; |
464 | expected = Table::Make(ex_schema, ex_columns); |
465 | ASSERT_TRUE(result->Equals(*expected)); |
466 | } |
467 | |
468 | TEST_F(TestTable, SetColumn) { |
469 | const int64_t length = 10; |
470 | MakeExample1(length); |
471 | |
472 | auto table_sp = Table::Make(schema_, columns_); |
473 | const Table& table = *table_sp; |
474 | |
475 | std::shared_ptr<Table> result; |
476 | ASSERT_OK(table.SetColumn(0, table.column(1), &result)); |
477 | |
478 | auto ex_schema = |
479 | ::arrow::schema({schema_->field(1), schema_->field(1), schema_->field(2)}); |
480 | std::vector<std::shared_ptr<Column>> ex_columns = {table.column(1), table.column(1), |
481 | table.column(2)}; |
482 | |
483 | auto expected = Table::Make(ex_schema, ex_columns); |
484 | ASSERT_TRUE(result->Equals(*expected)); |
485 | } |
486 | |
487 | TEST_F(TestTable, RemoveColumnEmpty) { |
488 | // ARROW-1865 |
489 | const int64_t length = 10; |
490 | |
491 | auto f0 = field("f0" , int32()); |
492 | auto schema = ::arrow::schema({f0}); |
493 | auto a0 = MakeRandomArray<Int32Array>(length); |
494 | |
495 | auto table = Table::Make(schema, {std::make_shared<Column>(f0, a0)}); |
496 | |
497 | std::shared_ptr<Table> empty; |
498 | ASSERT_OK(table->RemoveColumn(0, &empty)); |
499 | |
500 | ASSERT_EQ(table->num_rows(), empty->num_rows()); |
501 | |
502 | std::shared_ptr<Table> added; |
503 | ASSERT_OK(empty->AddColumn(0, table->column(0), &added)); |
504 | ASSERT_EQ(table->num_rows(), added->num_rows()); |
505 | } |
506 | |
507 | TEST_F(TestTable, AddColumn) { |
508 | const int64_t length = 10; |
509 | MakeExample1(length); |
510 | |
511 | auto table_sp = Table::Make(schema_, columns_); |
512 | const Table& table = *table_sp; |
513 | |
514 | std::shared_ptr<Table> result; |
515 | // Some negative tests with invalid index |
516 | Status status = table.AddColumn(10, columns_[0], &result); |
517 | ASSERT_TRUE(status.IsInvalid()); |
518 | status = table.AddColumn(4, columns_[0], &result); |
519 | ASSERT_TRUE(status.IsInvalid()); |
520 | status = table.AddColumn(-1, columns_[0], &result); |
521 | ASSERT_TRUE(status.IsInvalid()); |
522 | |
523 | // Add column with wrong length |
524 | auto longer_col = std::make_shared<Column>(schema_->field(0), |
525 | MakeRandomArray<Int32Array>(length + 1)); |
526 | status = table.AddColumn(0, longer_col, &result); |
527 | ASSERT_TRUE(status.IsInvalid()); |
528 | |
529 | // Add column 0 in different places |
530 | ASSERT_OK(table.AddColumn(0, columns_[0], &result)); |
531 | auto ex_schema = ::arrow::schema( |
532 | {schema_->field(0), schema_->field(0), schema_->field(1), schema_->field(2)}); |
533 | |
534 | auto expected = Table::Make( |
535 | ex_schema, {table.column(0), table.column(0), table.column(1), table.column(2)}); |
536 | ASSERT_TRUE(result->Equals(*expected)); |
537 | |
538 | ASSERT_OK(table.AddColumn(1, columns_[0], &result)); |
539 | ex_schema = ::arrow::schema( |
540 | {schema_->field(0), schema_->field(0), schema_->field(1), schema_->field(2)}); |
541 | |
542 | expected = Table::Make( |
543 | ex_schema, {table.column(0), table.column(0), table.column(1), table.column(2)}); |
544 | ASSERT_TRUE(result->Equals(*expected)); |
545 | |
546 | ASSERT_OK(table.AddColumn(2, columns_[0], &result)); |
547 | ex_schema = ::arrow::schema( |
548 | {schema_->field(0), schema_->field(1), schema_->field(0), schema_->field(2)}); |
549 | expected = Table::Make( |
550 | ex_schema, {table.column(0), table.column(1), table.column(0), table.column(2)}); |
551 | ASSERT_TRUE(result->Equals(*expected)); |
552 | |
553 | ASSERT_OK(table.AddColumn(3, columns_[0], &result)); |
554 | ex_schema = ::arrow::schema( |
555 | {schema_->field(0), schema_->field(1), schema_->field(2), schema_->field(0)}); |
556 | expected = Table::Make( |
557 | ex_schema, {table.column(0), table.column(1), table.column(2), table.column(0)}); |
558 | ASSERT_TRUE(result->Equals(*expected)); |
559 | } |
560 | |
561 | class TestRecordBatch : public TestBase {}; |
562 | |
563 | TEST_F(TestRecordBatch, Equals) { |
564 | const int length = 10; |
565 | |
566 | auto f0 = field("f0" , int32()); |
567 | auto f1 = field("f1" , uint8()); |
568 | auto f2 = field("f2" , int16()); |
569 | |
570 | vector<shared_ptr<Field>> fields = {f0, f1, f2}; |
571 | auto schema = ::arrow::schema({f0, f1, f2}); |
572 | auto schema2 = ::arrow::schema({f0, f1}); |
573 | |
574 | auto a0 = MakeRandomArray<Int32Array>(length); |
575 | auto a1 = MakeRandomArray<UInt8Array>(length); |
576 | auto a2 = MakeRandomArray<Int16Array>(length); |
577 | |
578 | auto b1 = RecordBatch::Make(schema, length, {a0, a1, a2}); |
579 | auto b3 = RecordBatch::Make(schema2, length, {a0, a1}); |
580 | auto b4 = RecordBatch::Make(schema, length, {a0, a1, a1}); |
581 | |
582 | ASSERT_TRUE(b1->Equals(*b1)); |
583 | ASSERT_FALSE(b1->Equals(*b3)); |
584 | ASSERT_FALSE(b1->Equals(*b4)); |
585 | } |
586 | |
587 | TEST_F(TestRecordBatch, Validate) { |
588 | const int length = 10; |
589 | |
590 | auto f0 = field("f0" , int32()); |
591 | auto f1 = field("f1" , uint8()); |
592 | auto f2 = field("f2" , int16()); |
593 | |
594 | auto schema = ::arrow::schema({f0, f1, f2}); |
595 | |
596 | auto a0 = MakeRandomArray<Int32Array>(length); |
597 | auto a1 = MakeRandomArray<UInt8Array>(length); |
598 | auto a2 = MakeRandomArray<Int16Array>(length); |
599 | auto a3 = MakeRandomArray<Int16Array>(5); |
600 | |
601 | auto b1 = RecordBatch::Make(schema, length, {a0, a1, a2}); |
602 | |
603 | ASSERT_OK(b1->Validate()); |
604 | |
605 | // Length mismatch |
606 | auto b2 = RecordBatch::Make(schema, length, {a0, a1, a3}); |
607 | ASSERT_RAISES(Invalid, b2->Validate()); |
608 | |
609 | // Type mismatch |
610 | auto b3 = RecordBatch::Make(schema, length, {a0, a1, a0}); |
611 | ASSERT_RAISES(Invalid, b3->Validate()); |
612 | } |
613 | |
614 | TEST_F(TestRecordBatch, Slice) { |
615 | const int length = 10; |
616 | |
617 | auto f0 = field("f0" , int32()); |
618 | auto f1 = field("f1" , uint8()); |
619 | |
620 | vector<shared_ptr<Field>> fields = {f0, f1}; |
621 | auto schema = ::arrow::schema(fields); |
622 | |
623 | auto a0 = MakeRandomArray<Int32Array>(length); |
624 | auto a1 = MakeRandomArray<UInt8Array>(length); |
625 | |
626 | auto batch = RecordBatch::Make(schema, length, {a0, a1}); |
627 | |
628 | auto batch_slice = batch->Slice(2); |
629 | auto batch_slice2 = batch->Slice(1, 5); |
630 | |
631 | ASSERT_EQ(batch_slice->num_rows(), batch->num_rows() - 2); |
632 | |
633 | for (int i = 0; i < batch->num_columns(); ++i) { |
634 | ASSERT_EQ(2, batch_slice->column(i)->offset()); |
635 | ASSERT_EQ(length - 2, batch_slice->column(i)->length()); |
636 | |
637 | ASSERT_EQ(1, batch_slice2->column(i)->offset()); |
638 | ASSERT_EQ(5, batch_slice2->column(i)->length()); |
639 | } |
640 | } |
641 | |
642 | TEST_F(TestRecordBatch, AddColumn) { |
643 | const int length = 10; |
644 | |
645 | auto field1 = field("f1" , int32()); |
646 | auto field2 = field("f2" , uint8()); |
647 | auto field3 = field("f3" , int16()); |
648 | |
649 | auto schema1 = ::arrow::schema({field1, field2}); |
650 | auto schema2 = ::arrow::schema({field2, field3}); |
651 | auto schema3 = ::arrow::schema({field2}); |
652 | |
653 | auto array1 = MakeRandomArray<Int32Array>(length); |
654 | auto array2 = MakeRandomArray<UInt8Array>(length); |
655 | auto array3 = MakeRandomArray<Int16Array>(length); |
656 | |
657 | auto batch1 = RecordBatch::Make(schema1, length, {array1, array2}); |
658 | auto batch2 = RecordBatch::Make(schema2, length, {array2, array3}); |
659 | auto batch3 = RecordBatch::Make(schema3, length, {array2}); |
660 | |
661 | const RecordBatch& batch = *batch3; |
662 | std::shared_ptr<RecordBatch> result; |
663 | |
664 | // Negative tests with invalid index |
665 | Status status = batch.AddColumn(5, field1, array1, &result); |
666 | ASSERT_TRUE(status.IsInvalid()); |
667 | status = batch.AddColumn(2, field1, array1, &result); |
668 | ASSERT_TRUE(status.IsInvalid()); |
669 | status = batch.AddColumn(-1, field1, array1, &result); |
670 | ASSERT_TRUE(status.IsInvalid()); |
671 | |
672 | // Negative test with wrong length |
673 | auto longer_col = MakeRandomArray<Int32Array>(length + 1); |
674 | status = batch.AddColumn(0, field1, longer_col, &result); |
675 | ASSERT_TRUE(status.IsInvalid()); |
676 | |
677 | // Negative test with mismatch type |
678 | status = batch.AddColumn(0, field1, array2, &result); |
679 | ASSERT_TRUE(status.IsInvalid()); |
680 | |
681 | ASSERT_OK(batch.AddColumn(0, field1, array1, &result)); |
682 | ASSERT_TRUE(result->Equals(*batch1)); |
683 | |
684 | ASSERT_OK(batch.AddColumn(1, field3, array3, &result)); |
685 | ASSERT_TRUE(result->Equals(*batch2)); |
686 | |
687 | std::shared_ptr<RecordBatch> result2; |
688 | ASSERT_OK(batch.AddColumn(1, "f3" , array3, &result2)); |
689 | ASSERT_TRUE(result2->Equals(*result)); |
690 | |
691 | ASSERT_TRUE(result2->schema()->field(1)->nullable()); |
692 | } |
693 | |
694 | TEST_F(TestRecordBatch, RemoveColumn) { |
695 | const int length = 10; |
696 | |
697 | auto field1 = field("f1" , int32()); |
698 | auto field2 = field("f2" , uint8()); |
699 | auto field3 = field("f3" , int16()); |
700 | |
701 | auto schema1 = ::arrow::schema({field1, field2, field3}); |
702 | auto schema2 = ::arrow::schema({field2, field3}); |
703 | auto schema3 = ::arrow::schema({field1, field3}); |
704 | auto schema4 = ::arrow::schema({field1, field2}); |
705 | |
706 | auto array1 = MakeRandomArray<Int32Array>(length); |
707 | auto array2 = MakeRandomArray<UInt8Array>(length); |
708 | auto array3 = MakeRandomArray<Int16Array>(length); |
709 | |
710 | auto batch1 = RecordBatch::Make(schema1, length, {array1, array2, array3}); |
711 | auto batch2 = RecordBatch::Make(schema2, length, {array2, array3}); |
712 | auto batch3 = RecordBatch::Make(schema3, length, {array1, array3}); |
713 | auto batch4 = RecordBatch::Make(schema4, length, {array1, array2}); |
714 | |
715 | const RecordBatch& batch = *batch1; |
716 | std::shared_ptr<RecordBatch> result; |
717 | |
718 | // Negative tests with invalid index |
719 | Status status = batch.RemoveColumn(3, &result); |
720 | ASSERT_TRUE(status.IsInvalid()); |
721 | status = batch.RemoveColumn(-1, &result); |
722 | ASSERT_TRUE(status.IsInvalid()); |
723 | |
724 | ASSERT_OK(batch.RemoveColumn(0, &result)); |
725 | ASSERT_TRUE(result->Equals(*batch2)); |
726 | |
727 | ASSERT_OK(batch.RemoveColumn(1, &result)); |
728 | ASSERT_TRUE(result->Equals(*batch3)); |
729 | |
730 | ASSERT_OK(batch.RemoveColumn(2, &result)); |
731 | ASSERT_TRUE(result->Equals(*batch4)); |
732 | } |
733 | |
734 | TEST_F(TestRecordBatch, RemoveColumnEmpty) { |
735 | const int length = 10; |
736 | |
737 | auto field1 = field("f1" , int32()); |
738 | auto schema1 = ::arrow::schema({field1}); |
739 | auto array1 = MakeRandomArray<Int32Array>(length); |
740 | auto batch1 = RecordBatch::Make(schema1, length, {array1}); |
741 | |
742 | std::shared_ptr<RecordBatch> empty; |
743 | ASSERT_OK(batch1->RemoveColumn(0, &empty)); |
744 | ASSERT_EQ(batch1->num_rows(), empty->num_rows()); |
745 | |
746 | std::shared_ptr<RecordBatch> added; |
747 | ASSERT_OK(empty->AddColumn(0, field1, array1, &added)); |
748 | ASSERT_TRUE(added->Equals(*batch1)); |
749 | } |
750 | |
751 | class TestTableBatchReader : public TestBase {}; |
752 | |
753 | TEST_F(TestTableBatchReader, ReadNext) { |
754 | ArrayVector c1, c2; |
755 | |
756 | auto a1 = MakeRandomArray<Int32Array>(10); |
757 | auto a2 = MakeRandomArray<Int32Array>(20); |
758 | auto a3 = MakeRandomArray<Int32Array>(30); |
759 | auto a4 = MakeRandomArray<Int32Array>(10); |
760 | |
761 | auto sch1 = arrow::schema({field("f1" , int32()), field("f2" , int32())}); |
762 | |
763 | std::vector<std::shared_ptr<Column>> columns; |
764 | |
765 | std::shared_ptr<RecordBatch> batch; |
766 | |
767 | columns = {column(sch1->field(0), {a1, a4, a2}), column(sch1->field(1), {a2, a2})}; |
768 | auto t1 = Table::Make(sch1, columns); |
769 | |
770 | TableBatchReader i1(*t1); |
771 | |
772 | ASSERT_OK(i1.ReadNext(&batch)); |
773 | ASSERT_EQ(10, batch->num_rows()); |
774 | |
775 | ASSERT_OK(i1.ReadNext(&batch)); |
776 | ASSERT_EQ(10, batch->num_rows()); |
777 | |
778 | ASSERT_OK(i1.ReadNext(&batch)); |
779 | ASSERT_EQ(20, batch->num_rows()); |
780 | |
781 | ASSERT_OK(i1.ReadNext(&batch)); |
782 | ASSERT_EQ(nullptr, batch); |
783 | |
784 | columns = {column(sch1->field(0), {a1}), column(sch1->field(1), {a4})}; |
785 | auto t2 = Table::Make(sch1, columns); |
786 | |
787 | TableBatchReader i2(*t2); |
788 | |
789 | ASSERT_OK(i2.ReadNext(&batch)); |
790 | ASSERT_EQ(10, batch->num_rows()); |
791 | |
792 | // Ensure non-sliced |
793 | ASSERT_EQ(a1->data().get(), batch->column_data(0).get()); |
794 | ASSERT_EQ(a4->data().get(), batch->column_data(1).get()); |
795 | |
796 | ASSERT_OK(i1.ReadNext(&batch)); |
797 | ASSERT_EQ(nullptr, batch); |
798 | } |
799 | |
800 | TEST_F(TestTableBatchReader, Chunksize) { |
801 | auto a1 = MakeRandomArray<Int32Array>(10); |
802 | auto a2 = MakeRandomArray<Int32Array>(20); |
803 | auto a3 = MakeRandomArray<Int32Array>(10); |
804 | |
805 | auto sch1 = arrow::schema({field("f1" , int32())}); |
806 | auto t1 = Table::Make(sch1, {column(sch1->field(0), {a1, a2, a3})}); |
807 | |
808 | TableBatchReader i1(*t1); |
809 | |
810 | i1.set_chunksize(15); |
811 | |
812 | std::shared_ptr<RecordBatch> batch; |
813 | ASSERT_OK(i1.ReadNext(&batch)); |
814 | ASSERT_OK(batch->Validate()); |
815 | ASSERT_EQ(10, batch->num_rows()); |
816 | |
817 | ASSERT_OK(i1.ReadNext(&batch)); |
818 | ASSERT_OK(batch->Validate()); |
819 | ASSERT_EQ(15, batch->num_rows()); |
820 | |
821 | ASSERT_OK(i1.ReadNext(&batch)); |
822 | ASSERT_OK(batch->Validate()); |
823 | ASSERT_EQ(5, batch->num_rows()); |
824 | |
825 | ASSERT_OK(i1.ReadNext(&batch)); |
826 | ASSERT_OK(batch->Validate()); |
827 | ASSERT_EQ(10, batch->num_rows()); |
828 | |
829 | ASSERT_OK(i1.ReadNext(&batch)); |
830 | ASSERT_EQ(nullptr, batch); |
831 | } |
832 | |
833 | } // namespace arrow |
834 | |