1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #include "arrow/array/builder_nested.h" |
19 | |
20 | #include <algorithm> |
21 | #include <cstddef> |
22 | #include <cstdint> |
23 | #include <cstring> |
24 | #include <sstream> |
25 | #include <utility> |
26 | #include <vector> |
27 | |
28 | #include "arrow/array.h" |
29 | #include "arrow/buffer.h" |
30 | #include "arrow/status.h" |
31 | #include "arrow/type.h" |
32 | #include "arrow/type_traits.h" |
33 | #include "arrow/util/bit-util.h" |
34 | #include "arrow/util/int-util.h" |
35 | #include "arrow/util/logging.h" |
36 | |
37 | namespace arrow { |
38 | |
39 | // ---------------------------------------------------------------------- |
40 | // ListBuilder |
41 | |
42 | ListBuilder::ListBuilder(MemoryPool* pool, |
43 | std::shared_ptr<ArrayBuilder> const& value_builder, |
44 | const std::shared_ptr<DataType>& type) |
45 | : ArrayBuilder(type ? type |
46 | : std::static_pointer_cast<DataType>( |
47 | std::make_shared<ListType>(value_builder->type())), |
48 | pool), |
49 | offsets_builder_(pool), |
50 | value_builder_(value_builder) {} |
51 | |
52 | Status ListBuilder::AppendValues(const int32_t* offsets, int64_t length, |
53 | const uint8_t* valid_bytes) { |
54 | RETURN_NOT_OK(Reserve(length)); |
55 | UnsafeAppendToBitmap(valid_bytes, length); |
56 | offsets_builder_.UnsafeAppend(offsets, length); |
57 | return Status::OK(); |
58 | } |
59 | |
60 | Status ListBuilder::AppendNextOffset() { |
61 | const int64_t num_values = value_builder_->length(); |
62 | ARROW_RETURN_IF( |
63 | num_values > kListMaximumElements, |
64 | Status::CapacityError("ListArray cannot contain more then 2^31 - 1 child elements," , |
65 | " have " , num_values)); |
66 | return offsets_builder_.Append(static_cast<int32_t>(num_values)); |
67 | } |
68 | |
69 | Status ListBuilder::Append(bool is_valid) { |
70 | RETURN_NOT_OK(Reserve(1)); |
71 | UnsafeAppendToBitmap(is_valid); |
72 | return AppendNextOffset(); |
73 | } |
74 | |
75 | Status ListBuilder::Resize(int64_t capacity) { |
76 | DCHECK_LE(capacity, kListMaximumElements); |
77 | RETURN_NOT_OK(CheckCapacity(capacity, capacity_)); |
78 | |
79 | // one more then requested for offsets |
80 | RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1)); |
81 | return ArrayBuilder::Resize(capacity); |
82 | } |
83 | |
84 | Status ListBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) { |
85 | RETURN_NOT_OK(AppendNextOffset()); |
86 | |
87 | // Offset padding zeroed by BufferBuilder |
88 | std::shared_ptr<Buffer> offsets; |
89 | RETURN_NOT_OK(offsets_builder_.Finish(&offsets)); |
90 | |
91 | std::shared_ptr<ArrayData> items; |
92 | if (values_) { |
93 | items = values_->data(); |
94 | } else { |
95 | if (value_builder_->length() == 0) { |
96 | // Try to make sure we get a non-null values buffer (ARROW-2744) |
97 | RETURN_NOT_OK(value_builder_->Resize(0)); |
98 | } |
99 | RETURN_NOT_OK(value_builder_->FinishInternal(&items)); |
100 | } |
101 | |
102 | std::shared_ptr<Buffer> null_bitmap; |
103 | RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap)); |
104 | *out = ArrayData::Make(type_, length_, {null_bitmap, offsets}, null_count_); |
105 | (*out)->child_data.emplace_back(std::move(items)); |
106 | Reset(); |
107 | return Status::OK(); |
108 | } |
109 | |
110 | void ListBuilder::Reset() { |
111 | ArrayBuilder::Reset(); |
112 | values_.reset(); |
113 | offsets_builder_.Reset(); |
114 | value_builder_->Reset(); |
115 | } |
116 | |
117 | ArrayBuilder* ListBuilder::value_builder() const { |
118 | DCHECK(!values_) << "Using value builder is pointless when values_ is set" ; |
119 | return value_builder_.get(); |
120 | } |
121 | |
122 | // ---------------------------------------------------------------------- |
123 | // Struct |
124 | |
125 | StructBuilder::StructBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool, |
126 | std::vector<std::shared_ptr<ArrayBuilder>>&& field_builders) |
127 | : ArrayBuilder(type, pool) { |
128 | children_ = std::move(field_builders); |
129 | } |
130 | |
131 | void StructBuilder::Reset() { |
132 | ArrayBuilder::Reset(); |
133 | for (const auto& field_builder : children_) { |
134 | field_builder->Reset(); |
135 | } |
136 | } |
137 | |
138 | Status StructBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) { |
139 | std::shared_ptr<Buffer> null_bitmap; |
140 | RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap)); |
141 | *out = ArrayData::Make(type_, length_, {null_bitmap}, null_count_); |
142 | |
143 | (*out)->child_data.resize(children_.size()); |
144 | for (size_t i = 0; i < children_.size(); ++i) { |
145 | if (length_ == 0) { |
146 | // Try to make sure the child buffers are initialized |
147 | RETURN_NOT_OK(children_[i]->Resize(0)); |
148 | } |
149 | RETURN_NOT_OK(children_[i]->FinishInternal(&(*out)->child_data[i])); |
150 | } |
151 | |
152 | capacity_ = length_ = null_count_ = 0; |
153 | return Status::OK(); |
154 | } |
155 | |
156 | } // namespace arrow |
157 | |