1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #include <cstddef> |
19 | #include <cstdint> |
20 | #include <memory> |
21 | #include <string> |
22 | #include <vector> |
23 | |
24 | #include <gtest/gtest.h> |
25 | |
26 | #include "arrow/builder.h" |
27 | #include "arrow/record_batch.h" |
28 | #include "arrow/status.h" |
29 | #include "arrow/table_builder.h" |
30 | #include "arrow/test-common.h" |
31 | #include "arrow/test-util.h" |
32 | #include "arrow/type.h" |
33 | #include "arrow/util/checked_cast.h" |
34 | |
35 | namespace arrow { |
36 | |
37 | class Array; |
38 | |
39 | using internal::checked_cast; |
40 | |
41 | class TestRecordBatchBuilder : public TestBase { |
42 | public: |
43 | }; |
44 | |
45 | std::shared_ptr<Schema> ExampleSchema1() { |
46 | auto f0 = field("f0" , int32()); |
47 | auto f1 = field("f1" , utf8()); |
48 | auto f2 = field("f1" , list(int8())); |
49 | return ::arrow::schema({f0, f1, f2}); |
50 | } |
51 | |
52 | template <typename BuilderType, typename T> |
53 | void AppendValues(BuilderType* builder, const std::vector<T>& values, |
54 | const std::vector<bool>& is_valid) { |
55 | for (size_t i = 0; i < values.size(); ++i) { |
56 | if (is_valid.size() == 0 || is_valid[i]) { |
57 | ASSERT_OK(builder->Append(values[i])); |
58 | } else { |
59 | ASSERT_OK(builder->AppendNull()); |
60 | } |
61 | } |
62 | } |
63 | |
64 | template <typename ValueType, typename T> |
65 | void AppendList(ListBuilder* builder, const std::vector<std::vector<T>>& values, |
66 | const std::vector<bool>& is_valid) { |
67 | auto values_builder = checked_cast<ValueType*>(builder->value_builder()); |
68 | |
69 | for (size_t i = 0; i < values.size(); ++i) { |
70 | if (is_valid.size() == 0 || is_valid[i]) { |
71 | ASSERT_OK(builder->Append()); |
72 | AppendValues<ValueType, T>(values_builder, values[i], {}); |
73 | } else { |
74 | ASSERT_OK(builder->AppendNull()); |
75 | } |
76 | } |
77 | } |
78 | |
79 | TEST_F(TestRecordBatchBuilder, Basics) { |
80 | auto schema = ExampleSchema1(); |
81 | |
82 | std::unique_ptr<RecordBatchBuilder> builder; |
83 | ASSERT_OK(RecordBatchBuilder::Make(schema, pool_, &builder)); |
84 | |
85 | std::vector<bool> is_valid = {false, true, true, true}; |
86 | std::vector<int32_t> f0_values = {0, 1, 2, 3}; |
87 | std::vector<std::string> f1_values = {"a" , "bb" , "ccc" , "dddd" }; |
88 | std::vector<std::vector<int8_t>> f2_values = {{}, {0, 1}, {}, {2}}; |
89 | |
90 | std::shared_ptr<Array> a0, a1, a2; |
91 | |
92 | // Make the expected record batch |
93 | auto AppendData = [&](Int32Builder* b0, StringBuilder* b1, ListBuilder* b2) { |
94 | AppendValues<Int32Builder, int32_t>(b0, f0_values, is_valid); |
95 | AppendValues<StringBuilder, std::string>(b1, f1_values, is_valid); |
96 | AppendList<Int8Builder, int8_t>(b2, f2_values, is_valid); |
97 | }; |
98 | |
99 | Int32Builder ex_b0; |
100 | StringBuilder ex_b1; |
101 | ListBuilder ex_b2(pool_, std::unique_ptr<Int8Builder>(new Int8Builder(pool_))); |
102 | |
103 | AppendData(&ex_b0, &ex_b1, &ex_b2); |
104 | ASSERT_OK(ex_b0.Finish(&a0)); |
105 | ASSERT_OK(ex_b1.Finish(&a1)); |
106 | ASSERT_OK(ex_b2.Finish(&a2)); |
107 | |
108 | auto expected = RecordBatch::Make(schema, 4, {a0, a1, a2}); |
109 | |
110 | // Builder attributes |
111 | ASSERT_EQ(3, builder->num_fields()); |
112 | ASSERT_EQ(schema.get(), builder->schema().get()); |
113 | |
114 | const int kIter = 3; |
115 | for (int i = 0; i < kIter; ++i) { |
116 | AppendData(builder->GetFieldAs<Int32Builder>(0), |
117 | checked_cast<StringBuilder*>(builder->GetField(1)), |
118 | builder->GetFieldAs<ListBuilder>(2)); |
119 | |
120 | std::shared_ptr<RecordBatch> batch; |
121 | |
122 | if (i == kIter - 1) { |
123 | // Do not flush in last iteration |
124 | ASSERT_OK(builder->Flush(false, &batch)); |
125 | } else { |
126 | ASSERT_OK(builder->Flush(&batch)); |
127 | } |
128 | |
129 | ASSERT_BATCHES_EQUAL(*expected, *batch); |
130 | } |
131 | |
132 | // Test setting initial capacity |
133 | builder->SetInitialCapacity(4096); |
134 | ASSERT_EQ(4096, builder->initial_capacity()); |
135 | } |
136 | |
137 | TEST_F(TestRecordBatchBuilder, InvalidFieldLength) { |
138 | auto schema = ExampleSchema1(); |
139 | |
140 | std::unique_ptr<RecordBatchBuilder> builder; |
141 | ASSERT_OK(RecordBatchBuilder::Make(schema, pool_, &builder)); |
142 | |
143 | std::vector<bool> is_valid = {false, true, true, true}; |
144 | std::vector<int32_t> f0_values = {0, 1, 2, 3}; |
145 | |
146 | AppendValues<Int32Builder, int32_t>(builder->GetFieldAs<Int32Builder>(0), f0_values, |
147 | is_valid); |
148 | |
149 | std::shared_ptr<RecordBatch> dummy; |
150 | ASSERT_RAISES(Invalid, builder->Flush(&dummy)); |
151 | } |
152 | |
153 | } // namespace arrow |
154 | |