1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #pragma once |
19 | |
20 | #include <memory> |
21 | |
22 | #include "arrow/array/builder_base.h" |
23 | |
24 | namespace arrow { |
25 | |
26 | namespace internal { |
27 | |
28 | class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder { |
29 | public: |
30 | explicit AdaptiveIntBuilderBase(MemoryPool* pool); |
31 | |
32 | /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory |
33 | Status AppendNulls(const uint8_t* valid_bytes, int64_t length) { |
34 | ARROW_RETURN_NOT_OK(CommitPendingData()); |
35 | ARROW_RETURN_NOT_OK(Reserve(length)); |
36 | memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length); |
37 | UnsafeAppendToBitmap(valid_bytes, length); |
38 | return Status::OK(); |
39 | } |
40 | |
41 | Status AppendNull() { |
42 | pending_data_[pending_pos_] = 0; |
43 | pending_valid_[pending_pos_] = 0; |
44 | pending_has_nulls_ = true; |
45 | ++pending_pos_; |
46 | |
47 | if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) { |
48 | return CommitPendingData(); |
49 | } |
50 | return Status::OK(); |
51 | } |
52 | |
53 | void Reset() override; |
54 | Status Resize(int64_t capacity) override; |
55 | |
56 | protected: |
57 | virtual Status CommitPendingData() = 0; |
58 | |
59 | std::shared_ptr<ResizableBuffer> data_; |
60 | uint8_t* raw_data_; |
61 | uint8_t int_size_; |
62 | |
63 | static constexpr int32_t pending_size_ = 1024; |
64 | uint8_t pending_valid_[pending_size_]; |
65 | uint64_t pending_data_[pending_size_]; |
66 | int32_t pending_pos_; |
67 | bool pending_has_nulls_; |
68 | }; |
69 | |
70 | } // namespace internal |
71 | |
72 | class ARROW_EXPORT AdaptiveUIntBuilder : public internal::AdaptiveIntBuilderBase { |
73 | public: |
74 | explicit AdaptiveUIntBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT); |
75 | |
76 | using ArrayBuilder::Advance; |
77 | using internal::AdaptiveIntBuilderBase::Reset; |
78 | |
79 | /// Scalar append |
80 | Status Append(const uint64_t val) { |
81 | pending_data_[pending_pos_] = val; |
82 | pending_valid_[pending_pos_] = 1; |
83 | ++pending_pos_; |
84 | |
85 | if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) { |
86 | return CommitPendingData(); |
87 | } |
88 | return Status::OK(); |
89 | } |
90 | |
91 | /// \brief Append a sequence of elements in one shot |
92 | /// \param[in] values a contiguous C array of values |
93 | /// \param[in] length the number of values to append |
94 | /// \param[in] valid_bytes an optional sequence of bytes where non-zero |
95 | /// indicates a valid (non-null) value |
96 | /// \return Status |
97 | Status AppendValues(const uint64_t* values, int64_t length, |
98 | const uint8_t* valid_bytes = NULLPTR); |
99 | |
100 | Status FinishInternal(std::shared_ptr<ArrayData>* out) override; |
101 | |
102 | protected: |
103 | Status CommitPendingData() override; |
104 | Status ExpandIntSize(uint8_t new_int_size); |
105 | |
106 | Status AppendValuesInternal(const uint64_t* values, int64_t length, |
107 | const uint8_t* valid_bytes); |
108 | |
109 | template <typename new_type, typename old_type> |
110 | typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type |
111 | ExpandIntSizeInternal(); |
112 | #define __LESS(a, b) (a) < (b) |
113 | template <typename new_type, typename old_type> |
114 | typename std::enable_if<__LESS(sizeof(old_type), sizeof(new_type)), Status>::type |
115 | ExpandIntSizeInternal(); |
116 | #undef __LESS |
117 | |
118 | template <typename new_type> |
119 | Status ExpandIntSizeN(); |
120 | }; |
121 | |
122 | class ARROW_EXPORT AdaptiveIntBuilder : public internal::AdaptiveIntBuilderBase { |
123 | public: |
124 | explicit AdaptiveIntBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT); |
125 | |
126 | using ArrayBuilder::Advance; |
127 | using internal::AdaptiveIntBuilderBase::Reset; |
128 | |
129 | /// Scalar append |
130 | Status Append(const int64_t val) { |
131 | auto v = static_cast<uint64_t>(val); |
132 | |
133 | pending_data_[pending_pos_] = v; |
134 | pending_valid_[pending_pos_] = 1; |
135 | ++pending_pos_; |
136 | |
137 | if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) { |
138 | return CommitPendingData(); |
139 | } |
140 | return Status::OK(); |
141 | } |
142 | |
143 | /// \brief Append a sequence of elements in one shot |
144 | /// \param[in] values a contiguous C array of values |
145 | /// \param[in] length the number of values to append |
146 | /// \param[in] valid_bytes an optional sequence of bytes where non-zero |
147 | /// indicates a valid (non-null) value |
148 | /// \return Status |
149 | Status AppendValues(const int64_t* values, int64_t length, |
150 | const uint8_t* valid_bytes = NULLPTR); |
151 | |
152 | Status FinishInternal(std::shared_ptr<ArrayData>* out) override; |
153 | |
154 | protected: |
155 | Status CommitPendingData() override; |
156 | Status ExpandIntSize(uint8_t new_int_size); |
157 | |
158 | Status AppendValuesInternal(const int64_t* values, int64_t length, |
159 | const uint8_t* valid_bytes); |
160 | |
161 | template <typename new_type, typename old_type> |
162 | typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type |
163 | ExpandIntSizeInternal(); |
164 | #define __LESS(a, b) (a) < (b) |
165 | template <typename new_type, typename old_type> |
166 | typename std::enable_if<__LESS(sizeof(old_type), sizeof(new_type)), Status>::type |
167 | ExpandIntSizeInternal(); |
168 | #undef __LESS |
169 | |
170 | template <typename new_type> |
171 | Status ExpandIntSizeN(); |
172 | }; |
173 | |
174 | } // namespace arrow |
175 | |