1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #include <cstdint> |
19 | #include <sstream> |
20 | #include <type_traits> |
21 | #include <utility> |
22 | #include <vector> |
23 | |
24 | #include "arrow/array.h" |
25 | #include "arrow/builder.h" |
26 | #include "arrow/ipc/json-internal.h" |
27 | #include "arrow/ipc/json-simple.h" |
28 | #include "arrow/memory_pool.h" |
29 | #include "arrow/util/checked_cast.h" |
30 | #include "arrow/util/decimal.h" |
31 | #include "arrow/util/logging.h" |
32 | #include "arrow/util/string_view.h" |
33 | |
34 | namespace arrow { |
35 | namespace ipc { |
36 | namespace internal { |
37 | namespace json { |
38 | |
39 | using ::arrow::internal::checked_cast; |
40 | |
41 | static constexpr auto kParseFlags = rj::kParseFullPrecisionFlag | rj::kParseNanAndInfFlag; |
42 | |
43 | static Status JSONTypeError(const char* expected_type, rj::Type json_type) { |
44 | return Status::Invalid("Expected " , expected_type, " or null, got JSON type " , |
45 | json_type); |
46 | } |
47 | |
48 | class Converter { |
49 | public: |
50 | virtual ~Converter() = default; |
51 | |
52 | virtual Status Init() { return Status::OK(); } |
53 | |
54 | virtual Status AppendValue(const rj::Value& json_obj) = 0; |
55 | |
56 | virtual Status AppendNull() = 0; |
57 | |
58 | virtual Status AppendValues(const rj::Value& json_array) = 0; |
59 | |
60 | virtual std::shared_ptr<ArrayBuilder> builder() = 0; |
61 | |
62 | virtual Status Finish(std::shared_ptr<Array>* out) { |
63 | auto builder = this->builder(); |
64 | if (builder->length() == 0) { |
65 | // Make sure the builder was initialized |
66 | RETURN_NOT_OK(builder->Resize(1)); |
67 | } |
68 | return builder->Finish(out); |
69 | } |
70 | |
71 | protected: |
72 | std::shared_ptr<DataType> type_; |
73 | }; |
74 | |
75 | Status GetConverter(const std::shared_ptr<DataType>&, std::shared_ptr<Converter>* out); |
76 | |
77 | // CRTP |
78 | template <class Derived> |
79 | class ConcreteConverter : public Converter { |
80 | public: |
81 | Status AppendValues(const rj::Value& json_array) override { |
82 | auto self = static_cast<Derived*>(this); |
83 | if (!json_array.IsArray()) { |
84 | return JSONTypeError("array" , json_array.GetType()); |
85 | } |
86 | auto size = json_array.Size(); |
87 | for (uint32_t i = 0; i < size; ++i) { |
88 | RETURN_NOT_OK(self->AppendValue(json_array[i])); |
89 | } |
90 | return Status::OK(); |
91 | } |
92 | }; |
93 | |
94 | // TODO : dates and times? |
95 | |
96 | // ------------------------------------------------------------------------ |
97 | // Converter for null arrays |
98 | |
99 | class NullConverter final : public ConcreteConverter<NullConverter> { |
100 | public: |
101 | explicit NullConverter(const std::shared_ptr<DataType>& type) { |
102 | type_ = type; |
103 | builder_ = std::make_shared<NullBuilder>(); |
104 | } |
105 | |
106 | Status AppendNull() override { return builder_->AppendNull(); } |
107 | |
108 | Status AppendValue(const rj::Value& json_obj) override { |
109 | if (json_obj.IsNull()) { |
110 | return AppendNull(); |
111 | } |
112 | return JSONTypeError("null" , json_obj.GetType()); |
113 | } |
114 | |
115 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } |
116 | |
117 | protected: |
118 | std::shared_ptr<NullBuilder> builder_; |
119 | }; |
120 | |
121 | // ------------------------------------------------------------------------ |
122 | // Converter for boolean arrays |
123 | |
124 | class BooleanConverter final : public ConcreteConverter<BooleanConverter> { |
125 | public: |
126 | explicit BooleanConverter(const std::shared_ptr<DataType>& type) { |
127 | type_ = type; |
128 | builder_ = std::make_shared<BooleanBuilder>(); |
129 | } |
130 | |
131 | Status AppendNull() override { return builder_->AppendNull(); } |
132 | |
133 | Status AppendValue(const rj::Value& json_obj) override { |
134 | if (json_obj.IsNull()) { |
135 | return AppendNull(); |
136 | } |
137 | if (json_obj.IsBool()) { |
138 | return builder_->Append(json_obj.GetBool()); |
139 | } |
140 | return JSONTypeError("boolean" , json_obj.GetType()); |
141 | } |
142 | |
143 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } |
144 | |
145 | protected: |
146 | std::shared_ptr<BooleanBuilder> builder_; |
147 | }; |
148 | |
149 | // ------------------------------------------------------------------------ |
150 | // Converter for int arrays |
151 | |
152 | template <typename Type> |
153 | class IntegerConverter final : public ConcreteConverter<IntegerConverter<Type>> { |
154 | using c_type = typename Type::c_type; |
155 | static constexpr auto is_signed = std::is_signed<c_type>::value; |
156 | |
157 | public: |
158 | explicit IntegerConverter(const std::shared_ptr<DataType>& type) { |
159 | this->type_ = type; |
160 | builder_ = std::make_shared<NumericBuilder<Type>>(); |
161 | } |
162 | |
163 | Status AppendNull() override { return builder_->AppendNull(); } |
164 | |
165 | Status AppendValue(const rj::Value& json_obj) override { |
166 | if (json_obj.IsNull()) { |
167 | return AppendNull(); |
168 | } |
169 | return AppendNumber(json_obj); |
170 | } |
171 | |
172 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } |
173 | |
174 | protected: |
175 | // Append signed integer value |
176 | template <typename Integer = c_type> |
177 | typename std::enable_if<std::is_signed<Integer>::value, Status>::type AppendNumber( |
178 | const rj::Value& json_obj) { |
179 | if (json_obj.IsInt64()) { |
180 | int64_t v64 = json_obj.GetInt64(); |
181 | c_type v = static_cast<c_type>(v64); |
182 | if (v == v64) { |
183 | return builder_->Append(v); |
184 | } else { |
185 | return Status::Invalid("Value " , v64, " out of bounds for " , |
186 | this->type_->ToString()); |
187 | } |
188 | } else { |
189 | return JSONTypeError("signed int" , json_obj.GetType()); |
190 | } |
191 | } |
192 | |
193 | // Append unsigned integer value |
194 | template <typename Integer = c_type> |
195 | typename std::enable_if<std::is_unsigned<Integer>::value, Status>::type AppendNumber( |
196 | const rj::Value& json_obj) { |
197 | if (json_obj.IsUint64()) { |
198 | uint64_t v64 = json_obj.GetUint64(); |
199 | c_type v = static_cast<c_type>(v64); |
200 | if (v == v64) { |
201 | return builder_->Append(v); |
202 | } else { |
203 | return Status::Invalid("Value " , v64, " out of bounds for " , |
204 | this->type_->ToString()); |
205 | } |
206 | return builder_->Append(v); |
207 | } else { |
208 | return JSONTypeError("unsigned int" , json_obj.GetType()); |
209 | } |
210 | } |
211 | |
212 | std::shared_ptr<NumericBuilder<Type>> builder_; |
213 | }; |
214 | |
215 | // ------------------------------------------------------------------------ |
216 | // Converter for float arrays |
217 | |
218 | template <typename Type> |
219 | class FloatConverter final : public ConcreteConverter<FloatConverter<Type>> { |
220 | using c_type = typename Type::c_type; |
221 | |
222 | public: |
223 | explicit FloatConverter(const std::shared_ptr<DataType>& type) { |
224 | this->type_ = type; |
225 | builder_ = std::make_shared<NumericBuilder<Type>>(); |
226 | } |
227 | |
228 | Status AppendNull() override { return builder_->AppendNull(); } |
229 | |
230 | Status AppendValue(const rj::Value& json_obj) override { |
231 | if (json_obj.IsNull()) { |
232 | return AppendNull(); |
233 | } |
234 | if (json_obj.IsNumber()) { |
235 | c_type v = static_cast<c_type>(json_obj.GetDouble()); |
236 | return builder_->Append(v); |
237 | } else { |
238 | return JSONTypeError("number" , json_obj.GetType()); |
239 | } |
240 | } |
241 | |
242 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } |
243 | |
244 | protected: |
245 | std::shared_ptr<NumericBuilder<Type>> builder_; |
246 | }; |
247 | |
248 | // ------------------------------------------------------------------------ |
249 | // Converter for decimal arrays |
250 | |
251 | class DecimalConverter final : public ConcreteConverter<DecimalConverter> { |
252 | public: |
253 | explicit DecimalConverter(const std::shared_ptr<DataType>& type) { |
254 | this->type_ = type; |
255 | decimal_type_ = checked_cast<Decimal128Type*>(type.get()); |
256 | builder_ = std::make_shared<DecimalBuilder>(type); |
257 | } |
258 | |
259 | Status AppendNull() override { return builder_->AppendNull(); } |
260 | |
261 | Status AppendValue(const rj::Value& json_obj) override { |
262 | if (json_obj.IsNull()) { |
263 | return AppendNull(); |
264 | } |
265 | if (json_obj.IsString()) { |
266 | int32_t precision, scale; |
267 | Decimal128 d; |
268 | auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength()); |
269 | RETURN_NOT_OK(Decimal128::FromString(view, &d, &precision, &scale)); |
270 | if (scale != decimal_type_->scale()) { |
271 | return Status::Invalid("Invalid scale for decimal: expected " , |
272 | decimal_type_->scale(), ", got " , scale); |
273 | } |
274 | return builder_->Append(d); |
275 | } |
276 | return JSONTypeError("decimal string" , json_obj.GetType()); |
277 | } |
278 | |
279 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } |
280 | |
281 | protected: |
282 | std::shared_ptr<DecimalBuilder> builder_; |
283 | Decimal128Type* decimal_type_; |
284 | }; |
285 | |
286 | // ------------------------------------------------------------------------ |
287 | // Converter for binary and string arrays |
288 | |
289 | class StringConverter final : public ConcreteConverter<StringConverter> { |
290 | public: |
291 | explicit StringConverter(const std::shared_ptr<DataType>& type) { |
292 | this->type_ = type; |
293 | builder_ = std::make_shared<BinaryBuilder>(type, default_memory_pool()); |
294 | } |
295 | |
296 | Status AppendNull() override { return builder_->AppendNull(); } |
297 | |
298 | Status AppendValue(const rj::Value& json_obj) override { |
299 | if (json_obj.IsNull()) { |
300 | return AppendNull(); |
301 | } |
302 | if (json_obj.IsString()) { |
303 | auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength()); |
304 | return builder_->Append(view); |
305 | } else { |
306 | return JSONTypeError("string" , json_obj.GetType()); |
307 | } |
308 | } |
309 | |
310 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } |
311 | |
312 | protected: |
313 | std::shared_ptr<BinaryBuilder> builder_; |
314 | }; |
315 | |
316 | // ------------------------------------------------------------------------ |
317 | // Converter for fixed-size binary arrays |
318 | |
319 | class FixedSizeBinaryConverter final |
320 | : public ConcreteConverter<FixedSizeBinaryConverter> { |
321 | public: |
322 | explicit FixedSizeBinaryConverter(const std::shared_ptr<DataType>& type) { |
323 | this->type_ = type; |
324 | builder_ = std::make_shared<FixedSizeBinaryBuilder>(type, default_memory_pool()); |
325 | } |
326 | |
327 | Status AppendNull() override { return builder_->AppendNull(); } |
328 | |
329 | Status AppendValue(const rj::Value& json_obj) override { |
330 | if (json_obj.IsNull()) { |
331 | return AppendNull(); |
332 | } |
333 | if (json_obj.IsString()) { |
334 | auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength()); |
335 | if (view.length() != static_cast<size_t>(builder_->byte_width())) { |
336 | std::stringstream ss; |
337 | ss << "Invalid string length " << view.length() << " in JSON input for " |
338 | << this->type_->ToString(); |
339 | return Status::Invalid(ss.str()); |
340 | } |
341 | return builder_->Append(view); |
342 | } else { |
343 | return JSONTypeError("string" , json_obj.GetType()); |
344 | } |
345 | } |
346 | |
347 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } |
348 | |
349 | protected: |
350 | std::shared_ptr<FixedSizeBinaryBuilder> builder_; |
351 | }; |
352 | |
353 | // ------------------------------------------------------------------------ |
354 | // Converter for list arrays |
355 | |
356 | class ListConverter final : public ConcreteConverter<ListConverter> { |
357 | public: |
358 | explicit ListConverter(const std::shared_ptr<DataType>& type) { type_ = type; } |
359 | |
360 | Status Init() override { |
361 | const auto& list_type = checked_cast<const ListType&>(*type_); |
362 | RETURN_NOT_OK(GetConverter(list_type.value_type(), &child_converter_)); |
363 | auto child_builder = child_converter_->builder(); |
364 | builder_ = std::make_shared<ListBuilder>(default_memory_pool(), child_builder, type_); |
365 | return Status::OK(); |
366 | } |
367 | |
368 | Status AppendNull() override { return builder_->AppendNull(); } |
369 | |
370 | Status AppendValue(const rj::Value& json_obj) override { |
371 | if (json_obj.IsNull()) { |
372 | return AppendNull(); |
373 | } |
374 | RETURN_NOT_OK(builder_->Append()); |
375 | // Extend the child converter with this JSON array |
376 | return child_converter_->AppendValues(json_obj); |
377 | } |
378 | |
379 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } |
380 | |
381 | protected: |
382 | std::shared_ptr<ListBuilder> builder_; |
383 | std::shared_ptr<Converter> child_converter_; |
384 | }; |
385 | |
386 | // ------------------------------------------------------------------------ |
387 | // Converter for struct arrays |
388 | |
389 | class StructConverter final : public ConcreteConverter<StructConverter> { |
390 | public: |
391 | explicit StructConverter(const std::shared_ptr<DataType>& type) { type_ = type; } |
392 | |
393 | Status Init() override { |
394 | std::vector<std::shared_ptr<ArrayBuilder>> child_builders; |
395 | for (const auto& field : type_->children()) { |
396 | std::shared_ptr<Converter> child_converter; |
397 | RETURN_NOT_OK(GetConverter(field->type(), &child_converter)); |
398 | child_converters_.push_back(child_converter); |
399 | child_builders.push_back(child_converter->builder()); |
400 | } |
401 | builder_ = std::make_shared<StructBuilder>(type_, default_memory_pool(), |
402 | std::move(child_builders)); |
403 | return Status::OK(); |
404 | } |
405 | |
406 | Status AppendNull() override { |
407 | for (auto& converter : child_converters_) { |
408 | RETURN_NOT_OK(converter->AppendNull()); |
409 | } |
410 | return builder_->AppendNull(); |
411 | } |
412 | |
413 | // Append a JSON value that is either an array of N elements in order |
414 | // or an object mapping struct names to values (omitted struct members |
415 | // are mapped to null). |
416 | Status AppendValue(const rj::Value& json_obj) override { |
417 | if (json_obj.IsNull()) { |
418 | return AppendNull(); |
419 | } |
420 | if (json_obj.IsArray()) { |
421 | auto size = json_obj.Size(); |
422 | auto expected_size = static_cast<uint32_t>(type_->num_children()); |
423 | if (size != expected_size) { |
424 | return Status::Invalid("Expected array of size " , expected_size, |
425 | ", got array of size " , size); |
426 | } |
427 | for (uint32_t i = 0; i < size; ++i) { |
428 | RETURN_NOT_OK(child_converters_[i]->AppendValue(json_obj[i])); |
429 | } |
430 | return builder_->Append(); |
431 | } |
432 | if (json_obj.IsObject()) { |
433 | auto remaining = json_obj.MemberCount(); |
434 | auto num_children = type_->num_children(); |
435 | for (int32_t i = 0; i < num_children; ++i) { |
436 | const auto& field = type_->child(i); |
437 | auto it = json_obj.FindMember(field->name()); |
438 | if (it != json_obj.MemberEnd()) { |
439 | --remaining; |
440 | RETURN_NOT_OK(child_converters_[i]->AppendValue(it->value)); |
441 | } else { |
442 | RETURN_NOT_OK(child_converters_[i]->AppendNull()); |
443 | } |
444 | } |
445 | if (remaining > 0) { |
446 | return Status::Invalid("Unexpected members in JSON object for type " , |
447 | type_->ToString()); |
448 | } |
449 | return builder_->Append(); |
450 | } |
451 | return JSONTypeError("array or object" , json_obj.GetType()); |
452 | } |
453 | |
454 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } |
455 | |
456 | protected: |
457 | std::shared_ptr<StructBuilder> builder_; |
458 | std::vector<std::shared_ptr<Converter>> child_converters_; |
459 | }; |
460 | |
461 | // ------------------------------------------------------------------------ |
462 | // General conversion functions |
463 | |
464 | Status GetConverter(const std::shared_ptr<DataType>& type, |
465 | std::shared_ptr<Converter>* out) { |
466 | std::shared_ptr<Converter> res; |
467 | |
468 | #define SIMPLE_CONVERTER_CASE(ID, CLASS) \ |
469 | case ID: \ |
470 | res = std::make_shared<CLASS>(type); \ |
471 | break; |
472 | |
473 | switch (type->id()) { |
474 | SIMPLE_CONVERTER_CASE(Type::INT8, IntegerConverter<Int8Type>) |
475 | SIMPLE_CONVERTER_CASE(Type::INT16, IntegerConverter<Int16Type>) |
476 | SIMPLE_CONVERTER_CASE(Type::INT32, IntegerConverter<Int32Type>) |
477 | SIMPLE_CONVERTER_CASE(Type::TIME32, IntegerConverter<Int32Type>) |
478 | SIMPLE_CONVERTER_CASE(Type::DATE32, IntegerConverter<Date32Type>) |
479 | SIMPLE_CONVERTER_CASE(Type::INT64, IntegerConverter<Int64Type>) |
480 | SIMPLE_CONVERTER_CASE(Type::TIME64, IntegerConverter<Int64Type>) |
481 | SIMPLE_CONVERTER_CASE(Type::TIMESTAMP, IntegerConverter<Int64Type>) |
482 | SIMPLE_CONVERTER_CASE(Type::DATE64, IntegerConverter<Date64Type>) |
483 | SIMPLE_CONVERTER_CASE(Type::UINT8, IntegerConverter<UInt8Type>) |
484 | SIMPLE_CONVERTER_CASE(Type::UINT16, IntegerConverter<UInt16Type>) |
485 | SIMPLE_CONVERTER_CASE(Type::UINT32, IntegerConverter<UInt32Type>) |
486 | SIMPLE_CONVERTER_CASE(Type::UINT64, IntegerConverter<UInt64Type>) |
487 | SIMPLE_CONVERTER_CASE(Type::NA, NullConverter) |
488 | SIMPLE_CONVERTER_CASE(Type::BOOL, BooleanConverter) |
489 | SIMPLE_CONVERTER_CASE(Type::FLOAT, FloatConverter<FloatType>) |
490 | SIMPLE_CONVERTER_CASE(Type::DOUBLE, FloatConverter<DoubleType>) |
491 | SIMPLE_CONVERTER_CASE(Type::LIST, ListConverter) |
492 | SIMPLE_CONVERTER_CASE(Type::STRUCT, StructConverter) |
493 | SIMPLE_CONVERTER_CASE(Type::STRING, StringConverter) |
494 | SIMPLE_CONVERTER_CASE(Type::BINARY, StringConverter) |
495 | SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter) |
496 | SIMPLE_CONVERTER_CASE(Type::DECIMAL, DecimalConverter) |
497 | default: { |
498 | return Status::NotImplemented("JSON conversion to " , type->ToString(), |
499 | " not implemented" ); |
500 | } |
501 | } |
502 | |
503 | #undef SIMPLE_CONVERTER_CASE |
504 | |
505 | RETURN_NOT_OK(res->Init()); |
506 | *out = res; |
507 | return Status::OK(); |
508 | } |
509 | |
510 | Status ArrayFromJSON(const std::shared_ptr<DataType>& type, |
511 | const util::string_view& json_string, std::shared_ptr<Array>* out) { |
512 | std::shared_ptr<Converter> converter; |
513 | RETURN_NOT_OK(GetConverter(type, &converter)); |
514 | |
515 | rj::Document json_doc; |
516 | json_doc.Parse<kParseFlags>(json_string.data(), json_string.length()); |
517 | if (json_doc.HasParseError()) { |
518 | return Status::Invalid("JSON parse error at offset " , json_doc.GetErrorOffset(), ": " , |
519 | GetParseError_En(json_doc.GetParseError())); |
520 | } |
521 | |
522 | // The JSON document should be an array, append it |
523 | RETURN_NOT_OK(converter->AppendValues(json_doc)); |
524 | return converter->Finish(out); |
525 | } |
526 | |
527 | Status ArrayFromJSON(const std::shared_ptr<DataType>& type, |
528 | const std::string& json_string, std::shared_ptr<Array>* out) { |
529 | return ArrayFromJSON(type, util::string_view(json_string), out); |
530 | } |
531 | |
532 | Status ArrayFromJSON(const std::shared_ptr<DataType>& type, const char* json_string, |
533 | std::shared_ptr<Array>* out) { |
534 | return ArrayFromJSON(type, util::string_view(json_string), out); |
535 | } |
536 | |
537 | } // namespace json |
538 | } // namespace internal |
539 | } // namespace ipc |
540 | } // namespace arrow |
541 | |