1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | // Private header, not to be exported |
19 | |
20 | #ifndef ARROW_VISITOR_INLINE_H |
21 | #define ARROW_VISITOR_INLINE_H |
22 | |
23 | #include "arrow/array.h" |
24 | #include "arrow/status.h" |
25 | #include "arrow/tensor.h" |
26 | #include "arrow/type.h" |
27 | #include "arrow/util/bit-util.h" |
28 | #include "arrow/util/checked_cast.h" |
29 | #include "arrow/util/string_view.h" |
30 | |
31 | namespace arrow { |
32 | |
33 | #define TYPE_VISIT_INLINE(TYPE_CLASS) \ |
34 | case TYPE_CLASS::type_id: \ |
35 | return visitor->Visit(internal::checked_cast<const TYPE_CLASS&>(type)); |
36 | |
37 | template <typename VISITOR> |
38 | inline Status VisitTypeInline(const DataType& type, VISITOR* visitor) { |
39 | switch (type.id()) { |
40 | TYPE_VISIT_INLINE(NullType); |
41 | TYPE_VISIT_INLINE(BooleanType); |
42 | TYPE_VISIT_INLINE(Int8Type); |
43 | TYPE_VISIT_INLINE(UInt8Type); |
44 | TYPE_VISIT_INLINE(Int16Type); |
45 | TYPE_VISIT_INLINE(UInt16Type); |
46 | TYPE_VISIT_INLINE(Int32Type); |
47 | TYPE_VISIT_INLINE(UInt32Type); |
48 | TYPE_VISIT_INLINE(Int64Type); |
49 | TYPE_VISIT_INLINE(UInt64Type); |
50 | TYPE_VISIT_INLINE(HalfFloatType); |
51 | TYPE_VISIT_INLINE(FloatType); |
52 | TYPE_VISIT_INLINE(DoubleType); |
53 | TYPE_VISIT_INLINE(StringType); |
54 | TYPE_VISIT_INLINE(BinaryType); |
55 | TYPE_VISIT_INLINE(FixedSizeBinaryType); |
56 | TYPE_VISIT_INLINE(Date32Type); |
57 | TYPE_VISIT_INLINE(Date64Type); |
58 | TYPE_VISIT_INLINE(TimestampType); |
59 | TYPE_VISIT_INLINE(Time32Type); |
60 | TYPE_VISIT_INLINE(Time64Type); |
61 | TYPE_VISIT_INLINE(Decimal128Type); |
62 | TYPE_VISIT_INLINE(ListType); |
63 | TYPE_VISIT_INLINE(StructType); |
64 | TYPE_VISIT_INLINE(UnionType); |
65 | TYPE_VISIT_INLINE(DictionaryType); |
66 | default: |
67 | break; |
68 | } |
69 | return Status::NotImplemented("Type not implemented" ); |
70 | } |
71 | |
72 | #undef TYPE_VISIT_INLINE |
73 | |
74 | #define ARRAY_VISIT_INLINE(TYPE_CLASS) \ |
75 | case TYPE_CLASS::type_id: \ |
76 | return visitor->Visit( \ |
77 | internal::checked_cast<const typename TypeTraits<TYPE_CLASS>::ArrayType&>( \ |
78 | array)); |
79 | |
80 | template <typename VISITOR> |
81 | inline Status VisitArrayInline(const Array& array, VISITOR* visitor) { |
82 | switch (array.type_id()) { |
83 | ARRAY_VISIT_INLINE(NullType); |
84 | ARRAY_VISIT_INLINE(BooleanType); |
85 | ARRAY_VISIT_INLINE(Int8Type); |
86 | ARRAY_VISIT_INLINE(UInt8Type); |
87 | ARRAY_VISIT_INLINE(Int16Type); |
88 | ARRAY_VISIT_INLINE(UInt16Type); |
89 | ARRAY_VISIT_INLINE(Int32Type); |
90 | ARRAY_VISIT_INLINE(UInt32Type); |
91 | ARRAY_VISIT_INLINE(Int64Type); |
92 | ARRAY_VISIT_INLINE(UInt64Type); |
93 | ARRAY_VISIT_INLINE(HalfFloatType); |
94 | ARRAY_VISIT_INLINE(FloatType); |
95 | ARRAY_VISIT_INLINE(DoubleType); |
96 | ARRAY_VISIT_INLINE(StringType); |
97 | ARRAY_VISIT_INLINE(BinaryType); |
98 | ARRAY_VISIT_INLINE(FixedSizeBinaryType); |
99 | ARRAY_VISIT_INLINE(Date32Type); |
100 | ARRAY_VISIT_INLINE(Date64Type); |
101 | ARRAY_VISIT_INLINE(TimestampType); |
102 | ARRAY_VISIT_INLINE(Time32Type); |
103 | ARRAY_VISIT_INLINE(Time64Type); |
104 | ARRAY_VISIT_INLINE(Decimal128Type); |
105 | ARRAY_VISIT_INLINE(ListType); |
106 | ARRAY_VISIT_INLINE(StructType); |
107 | ARRAY_VISIT_INLINE(UnionType); |
108 | ARRAY_VISIT_INLINE(DictionaryType); |
109 | default: |
110 | break; |
111 | } |
112 | return Status::NotImplemented("Type not implemented" ); |
113 | } |
114 | |
115 | // Visit an array's data values, in order, without overhead. |
116 | // |
117 | // The Visit function's `visitor` argument should define two public methods: |
118 | // - Status VisitNull() |
119 | // - Status VisitValue(<scalar>) |
120 | // |
121 | // The scalar value's type depends on the array data type: |
122 | // - the type's `c_type`, if any |
123 | // - for boolean arrays, a `bool` |
124 | // - for binary, string and fixed-size binary arrays, a `util::string_view` |
125 | |
126 | template <typename T, typename Enable = void> |
127 | struct ArrayDataVisitor {}; |
128 | |
129 | template <> |
130 | struct ArrayDataVisitor<BooleanType> { |
131 | template <typename Visitor> |
132 | static Status Visit(const ArrayData& arr, Visitor* visitor) { |
133 | if (arr.null_count != 0) { |
134 | internal::BitmapReader valid_reader(arr.buffers[0]->data(), arr.offset, arr.length); |
135 | internal::BitmapReader value_reader(arr.buffers[1]->data(), arr.offset, arr.length); |
136 | for (int64_t i = 0; i < arr.length; ++i) { |
137 | const bool is_null = valid_reader.IsNotSet(); |
138 | if (is_null) { |
139 | ARROW_RETURN_NOT_OK(visitor->VisitNull()); |
140 | } else { |
141 | ARROW_RETURN_NOT_OK(visitor->VisitValue(value_reader.IsSet())); |
142 | } |
143 | valid_reader.Next(); |
144 | value_reader.Next(); |
145 | } |
146 | } else { |
147 | internal::BitmapReader value_reader(arr.buffers[1]->data(), arr.offset, arr.length); |
148 | for (int64_t i = 0; i < arr.length; ++i) { |
149 | ARROW_RETURN_NOT_OK(visitor->VisitValue(value_reader.IsSet())); |
150 | value_reader.Next(); |
151 | } |
152 | } |
153 | return Status::OK(); |
154 | } |
155 | }; |
156 | |
157 | template <typename T> |
158 | struct ArrayDataVisitor<T, enable_if_has_c_type<T>> { |
159 | template <typename Visitor> |
160 | static Status Visit(const ArrayData& arr, Visitor* visitor) { |
161 | using c_type = typename T::c_type; |
162 | const c_type* data = arr.GetValues<c_type>(1); |
163 | |
164 | if (arr.null_count != 0) { |
165 | internal::BitmapReader valid_reader(arr.buffers[0]->data(), arr.offset, arr.length); |
166 | for (int64_t i = 0; i < arr.length; ++i) { |
167 | const bool is_null = valid_reader.IsNotSet(); |
168 | if (is_null) { |
169 | ARROW_RETURN_NOT_OK(visitor->VisitNull()); |
170 | } else { |
171 | ARROW_RETURN_NOT_OK(visitor->VisitValue(data[i])); |
172 | } |
173 | valid_reader.Next(); |
174 | } |
175 | } else { |
176 | for (int64_t i = 0; i < arr.length; ++i) { |
177 | ARROW_RETURN_NOT_OK(visitor->VisitValue(data[i])); |
178 | } |
179 | } |
180 | return Status::OK(); |
181 | } |
182 | }; |
183 | |
184 | template <typename T> |
185 | struct ArrayDataVisitor<T, enable_if_binary<T>> { |
186 | template <typename Visitor> |
187 | static Status Visit(const ArrayData& arr, Visitor* visitor) { |
188 | constexpr uint8_t empty_value = 0; |
189 | |
190 | const int32_t* offsets = arr.GetValues<int32_t>(1); |
191 | const uint8_t* data; |
192 | if (!arr.buffers[2]) { |
193 | data = &empty_value; |
194 | } else { |
195 | data = arr.GetValues<uint8_t>(2); |
196 | } |
197 | |
198 | if (arr.null_count != 0) { |
199 | internal::BitmapReader valid_reader(arr.buffers[0]->data(), arr.offset, arr.length); |
200 | for (int64_t i = 0; i < arr.length; ++i) { |
201 | const bool is_null = valid_reader.IsNotSet(); |
202 | valid_reader.Next(); |
203 | if (is_null) { |
204 | ARROW_RETURN_NOT_OK(visitor->VisitNull()); |
205 | } else { |
206 | auto value = util::string_view(reinterpret_cast<const char*>(data + offsets[i]), |
207 | offsets[i + 1] - offsets[i]); |
208 | ARROW_RETURN_NOT_OK(visitor->VisitValue(value)); |
209 | } |
210 | } |
211 | } else { |
212 | for (int64_t i = 0; i < arr.length; ++i) { |
213 | auto value = util::string_view(reinterpret_cast<const char*>(data + offsets[i]), |
214 | offsets[i + 1] - offsets[i]); |
215 | ARROW_RETURN_NOT_OK(visitor->VisitValue(value)); |
216 | } |
217 | } |
218 | return Status::OK(); |
219 | } |
220 | }; |
221 | |
222 | template <typename T> |
223 | struct ArrayDataVisitor<T, enable_if_fixed_size_binary<T>> { |
224 | template <typename Visitor> |
225 | static Status Visit(const ArrayData& arr, Visitor* visitor) { |
226 | const auto& fw_type = internal::checked_cast<const FixedSizeBinaryType&>(*arr.type); |
227 | |
228 | const int32_t byte_width = fw_type.byte_width(); |
229 | const uint8_t* data = arr.GetValues<uint8_t>(1); |
230 | |
231 | if (arr.null_count != 0) { |
232 | internal::BitmapReader valid_reader(arr.buffers[0]->data(), arr.offset, arr.length); |
233 | for (int64_t i = 0; i < arr.length; ++i) { |
234 | const bool is_null = valid_reader.IsNotSet(); |
235 | valid_reader.Next(); |
236 | if (is_null) { |
237 | ARROW_RETURN_NOT_OK(visitor->VisitNull()); |
238 | } else { |
239 | auto value = util::string_view(reinterpret_cast<const char*>(data), byte_width); |
240 | ARROW_RETURN_NOT_OK(visitor->VisitValue(value)); |
241 | } |
242 | data += byte_width; |
243 | } |
244 | } else { |
245 | for (int64_t i = 0; i < arr.length; ++i) { |
246 | auto value = util::string_view(reinterpret_cast<const char*>(data), byte_width); |
247 | ARROW_RETURN_NOT_OK(visitor->VisitValue(value)); |
248 | data += byte_width; |
249 | } |
250 | } |
251 | return Status::OK(); |
252 | } |
253 | }; |
254 | |
255 | } // namespace arrow |
256 | |
257 | #endif // ARROW_VISITOR_INLINE_H |
258 | |