1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include <cstdint>
19#include <sstream>
20#include <type_traits>
21#include <utility>
22#include <vector>
23
24#include "arrow/array.h"
25#include "arrow/builder.h"
26#include "arrow/ipc/json-internal.h"
27#include "arrow/ipc/json-simple.h"
28#include "arrow/memory_pool.h"
29#include "arrow/util/checked_cast.h"
30#include "arrow/util/decimal.h"
31#include "arrow/util/logging.h"
32#include "arrow/util/string_view.h"
33
34namespace arrow {
35namespace ipc {
36namespace internal {
37namespace json {
38
39using ::arrow::internal::checked_cast;
40
41static constexpr auto kParseFlags = rj::kParseFullPrecisionFlag | rj::kParseNanAndInfFlag;
42
43static Status JSONTypeError(const char* expected_type, rj::Type json_type) {
44 return Status::Invalid("Expected ", expected_type, " or null, got JSON type ",
45 json_type);
46}
47
48class Converter {
49 public:
50 virtual ~Converter() = default;
51
52 virtual Status Init() { return Status::OK(); }
53
54 virtual Status AppendValue(const rj::Value& json_obj) = 0;
55
56 virtual Status AppendNull() = 0;
57
58 virtual Status AppendValues(const rj::Value& json_array) = 0;
59
60 virtual std::shared_ptr<ArrayBuilder> builder() = 0;
61
62 virtual Status Finish(std::shared_ptr<Array>* out) {
63 auto builder = this->builder();
64 if (builder->length() == 0) {
65 // Make sure the builder was initialized
66 RETURN_NOT_OK(builder->Resize(1));
67 }
68 return builder->Finish(out);
69 }
70
71 protected:
72 std::shared_ptr<DataType> type_;
73};
74
75Status GetConverter(const std::shared_ptr<DataType>&, std::shared_ptr<Converter>* out);
76
77// CRTP
78template <class Derived>
79class ConcreteConverter : public Converter {
80 public:
81 Status AppendValues(const rj::Value& json_array) override {
82 auto self = static_cast<Derived*>(this);
83 if (!json_array.IsArray()) {
84 return JSONTypeError("array", json_array.GetType());
85 }
86 auto size = json_array.Size();
87 for (uint32_t i = 0; i < size; ++i) {
88 RETURN_NOT_OK(self->AppendValue(json_array[i]));
89 }
90 return Status::OK();
91 }
92};
93
94// TODO : dates and times?
95
96// ------------------------------------------------------------------------
97// Converter for null arrays
98
99class NullConverter final : public ConcreteConverter<NullConverter> {
100 public:
101 explicit NullConverter(const std::shared_ptr<DataType>& type) {
102 type_ = type;
103 builder_ = std::make_shared<NullBuilder>();
104 }
105
106 Status AppendNull() override { return builder_->AppendNull(); }
107
108 Status AppendValue(const rj::Value& json_obj) override {
109 if (json_obj.IsNull()) {
110 return AppendNull();
111 }
112 return JSONTypeError("null", json_obj.GetType());
113 }
114
115 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
116
117 protected:
118 std::shared_ptr<NullBuilder> builder_;
119};
120
121// ------------------------------------------------------------------------
122// Converter for boolean arrays
123
124class BooleanConverter final : public ConcreteConverter<BooleanConverter> {
125 public:
126 explicit BooleanConverter(const std::shared_ptr<DataType>& type) {
127 type_ = type;
128 builder_ = std::make_shared<BooleanBuilder>();
129 }
130
131 Status AppendNull() override { return builder_->AppendNull(); }
132
133 Status AppendValue(const rj::Value& json_obj) override {
134 if (json_obj.IsNull()) {
135 return AppendNull();
136 }
137 if (json_obj.IsBool()) {
138 return builder_->Append(json_obj.GetBool());
139 }
140 return JSONTypeError("boolean", json_obj.GetType());
141 }
142
143 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
144
145 protected:
146 std::shared_ptr<BooleanBuilder> builder_;
147};
148
149// ------------------------------------------------------------------------
150// Converter for int arrays
151
152template <typename Type>
153class IntegerConverter final : public ConcreteConverter<IntegerConverter<Type>> {
154 using c_type = typename Type::c_type;
155 static constexpr auto is_signed = std::is_signed<c_type>::value;
156
157 public:
158 explicit IntegerConverter(const std::shared_ptr<DataType>& type) {
159 this->type_ = type;
160 builder_ = std::make_shared<NumericBuilder<Type>>();
161 }
162
163 Status AppendNull() override { return builder_->AppendNull(); }
164
165 Status AppendValue(const rj::Value& json_obj) override {
166 if (json_obj.IsNull()) {
167 return AppendNull();
168 }
169 return AppendNumber(json_obj);
170 }
171
172 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
173
174 protected:
175 // Append signed integer value
176 template <typename Integer = c_type>
177 typename std::enable_if<std::is_signed<Integer>::value, Status>::type AppendNumber(
178 const rj::Value& json_obj) {
179 if (json_obj.IsInt64()) {
180 int64_t v64 = json_obj.GetInt64();
181 c_type v = static_cast<c_type>(v64);
182 if (v == v64) {
183 return builder_->Append(v);
184 } else {
185 return Status::Invalid("Value ", v64, " out of bounds for ",
186 this->type_->ToString());
187 }
188 } else {
189 return JSONTypeError("signed int", json_obj.GetType());
190 }
191 }
192
193 // Append unsigned integer value
194 template <typename Integer = c_type>
195 typename std::enable_if<std::is_unsigned<Integer>::value, Status>::type AppendNumber(
196 const rj::Value& json_obj) {
197 if (json_obj.IsUint64()) {
198 uint64_t v64 = json_obj.GetUint64();
199 c_type v = static_cast<c_type>(v64);
200 if (v == v64) {
201 return builder_->Append(v);
202 } else {
203 return Status::Invalid("Value ", v64, " out of bounds for ",
204 this->type_->ToString());
205 }
206 return builder_->Append(v);
207 } else {
208 return JSONTypeError("unsigned int", json_obj.GetType());
209 }
210 }
211
212 std::shared_ptr<NumericBuilder<Type>> builder_;
213};
214
215// ------------------------------------------------------------------------
216// Converter for float arrays
217
218template <typename Type>
219class FloatConverter final : public ConcreteConverter<FloatConverter<Type>> {
220 using c_type = typename Type::c_type;
221
222 public:
223 explicit FloatConverter(const std::shared_ptr<DataType>& type) {
224 this->type_ = type;
225 builder_ = std::make_shared<NumericBuilder<Type>>();
226 }
227
228 Status AppendNull() override { return builder_->AppendNull(); }
229
230 Status AppendValue(const rj::Value& json_obj) override {
231 if (json_obj.IsNull()) {
232 return AppendNull();
233 }
234 if (json_obj.IsNumber()) {
235 c_type v = static_cast<c_type>(json_obj.GetDouble());
236 return builder_->Append(v);
237 } else {
238 return JSONTypeError("number", json_obj.GetType());
239 }
240 }
241
242 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
243
244 protected:
245 std::shared_ptr<NumericBuilder<Type>> builder_;
246};
247
248// ------------------------------------------------------------------------
249// Converter for decimal arrays
250
251class DecimalConverter final : public ConcreteConverter<DecimalConverter> {
252 public:
253 explicit DecimalConverter(const std::shared_ptr<DataType>& type) {
254 this->type_ = type;
255 decimal_type_ = checked_cast<Decimal128Type*>(type.get());
256 builder_ = std::make_shared<DecimalBuilder>(type);
257 }
258
259 Status AppendNull() override { return builder_->AppendNull(); }
260
261 Status AppendValue(const rj::Value& json_obj) override {
262 if (json_obj.IsNull()) {
263 return AppendNull();
264 }
265 if (json_obj.IsString()) {
266 int32_t precision, scale;
267 Decimal128 d;
268 auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength());
269 RETURN_NOT_OK(Decimal128::FromString(view, &d, &precision, &scale));
270 if (scale != decimal_type_->scale()) {
271 return Status::Invalid("Invalid scale for decimal: expected ",
272 decimal_type_->scale(), ", got ", scale);
273 }
274 return builder_->Append(d);
275 }
276 return JSONTypeError("decimal string", json_obj.GetType());
277 }
278
279 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
280
281 protected:
282 std::shared_ptr<DecimalBuilder> builder_;
283 Decimal128Type* decimal_type_;
284};
285
286// ------------------------------------------------------------------------
287// Converter for binary and string arrays
288
289class StringConverter final : public ConcreteConverter<StringConverter> {
290 public:
291 explicit StringConverter(const std::shared_ptr<DataType>& type) {
292 this->type_ = type;
293 builder_ = std::make_shared<BinaryBuilder>(type, default_memory_pool());
294 }
295
296 Status AppendNull() override { return builder_->AppendNull(); }
297
298 Status AppendValue(const rj::Value& json_obj) override {
299 if (json_obj.IsNull()) {
300 return AppendNull();
301 }
302 if (json_obj.IsString()) {
303 auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength());
304 return builder_->Append(view);
305 } else {
306 return JSONTypeError("string", json_obj.GetType());
307 }
308 }
309
310 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
311
312 protected:
313 std::shared_ptr<BinaryBuilder> builder_;
314};
315
316// ------------------------------------------------------------------------
317// Converter for fixed-size binary arrays
318
319class FixedSizeBinaryConverter final
320 : public ConcreteConverter<FixedSizeBinaryConverter> {
321 public:
322 explicit FixedSizeBinaryConverter(const std::shared_ptr<DataType>& type) {
323 this->type_ = type;
324 builder_ = std::make_shared<FixedSizeBinaryBuilder>(type, default_memory_pool());
325 }
326
327 Status AppendNull() override { return builder_->AppendNull(); }
328
329 Status AppendValue(const rj::Value& json_obj) override {
330 if (json_obj.IsNull()) {
331 return AppendNull();
332 }
333 if (json_obj.IsString()) {
334 auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength());
335 if (view.length() != static_cast<size_t>(builder_->byte_width())) {
336 std::stringstream ss;
337 ss << "Invalid string length " << view.length() << " in JSON input for "
338 << this->type_->ToString();
339 return Status::Invalid(ss.str());
340 }
341 return builder_->Append(view);
342 } else {
343 return JSONTypeError("string", json_obj.GetType());
344 }
345 }
346
347 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
348
349 protected:
350 std::shared_ptr<FixedSizeBinaryBuilder> builder_;
351};
352
353// ------------------------------------------------------------------------
354// Converter for list arrays
355
356class ListConverter final : public ConcreteConverter<ListConverter> {
357 public:
358 explicit ListConverter(const std::shared_ptr<DataType>& type) { type_ = type; }
359
360 Status Init() override {
361 const auto& list_type = checked_cast<const ListType&>(*type_);
362 RETURN_NOT_OK(GetConverter(list_type.value_type(), &child_converter_));
363 auto child_builder = child_converter_->builder();
364 builder_ = std::make_shared<ListBuilder>(default_memory_pool(), child_builder, type_);
365 return Status::OK();
366 }
367
368 Status AppendNull() override { return builder_->AppendNull(); }
369
370 Status AppendValue(const rj::Value& json_obj) override {
371 if (json_obj.IsNull()) {
372 return AppendNull();
373 }
374 RETURN_NOT_OK(builder_->Append());
375 // Extend the child converter with this JSON array
376 return child_converter_->AppendValues(json_obj);
377 }
378
379 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
380
381 protected:
382 std::shared_ptr<ListBuilder> builder_;
383 std::shared_ptr<Converter> child_converter_;
384};
385
386// ------------------------------------------------------------------------
387// Converter for struct arrays
388
389class StructConverter final : public ConcreteConverter<StructConverter> {
390 public:
391 explicit StructConverter(const std::shared_ptr<DataType>& type) { type_ = type; }
392
393 Status Init() override {
394 std::vector<std::shared_ptr<ArrayBuilder>> child_builders;
395 for (const auto& field : type_->children()) {
396 std::shared_ptr<Converter> child_converter;
397 RETURN_NOT_OK(GetConverter(field->type(), &child_converter));
398 child_converters_.push_back(child_converter);
399 child_builders.push_back(child_converter->builder());
400 }
401 builder_ = std::make_shared<StructBuilder>(type_, default_memory_pool(),
402 std::move(child_builders));
403 return Status::OK();
404 }
405
406 Status AppendNull() override {
407 for (auto& converter : child_converters_) {
408 RETURN_NOT_OK(converter->AppendNull());
409 }
410 return builder_->AppendNull();
411 }
412
413 // Append a JSON value that is either an array of N elements in order
414 // or an object mapping struct names to values (omitted struct members
415 // are mapped to null).
416 Status AppendValue(const rj::Value& json_obj) override {
417 if (json_obj.IsNull()) {
418 return AppendNull();
419 }
420 if (json_obj.IsArray()) {
421 auto size = json_obj.Size();
422 auto expected_size = static_cast<uint32_t>(type_->num_children());
423 if (size != expected_size) {
424 return Status::Invalid("Expected array of size ", expected_size,
425 ", got array of size ", size);
426 }
427 for (uint32_t i = 0; i < size; ++i) {
428 RETURN_NOT_OK(child_converters_[i]->AppendValue(json_obj[i]));
429 }
430 return builder_->Append();
431 }
432 if (json_obj.IsObject()) {
433 auto remaining = json_obj.MemberCount();
434 auto num_children = type_->num_children();
435 for (int32_t i = 0; i < num_children; ++i) {
436 const auto& field = type_->child(i);
437 auto it = json_obj.FindMember(field->name());
438 if (it != json_obj.MemberEnd()) {
439 --remaining;
440 RETURN_NOT_OK(child_converters_[i]->AppendValue(it->value));
441 } else {
442 RETURN_NOT_OK(child_converters_[i]->AppendNull());
443 }
444 }
445 if (remaining > 0) {
446 return Status::Invalid("Unexpected members in JSON object for type ",
447 type_->ToString());
448 }
449 return builder_->Append();
450 }
451 return JSONTypeError("array or object", json_obj.GetType());
452 }
453
454 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
455
456 protected:
457 std::shared_ptr<StructBuilder> builder_;
458 std::vector<std::shared_ptr<Converter>> child_converters_;
459};
460
461// ------------------------------------------------------------------------
462// General conversion functions
463
464Status GetConverter(const std::shared_ptr<DataType>& type,
465 std::shared_ptr<Converter>* out) {
466 std::shared_ptr<Converter> res;
467
468#define SIMPLE_CONVERTER_CASE(ID, CLASS) \
469 case ID: \
470 res = std::make_shared<CLASS>(type); \
471 break;
472
473 switch (type->id()) {
474 SIMPLE_CONVERTER_CASE(Type::INT8, IntegerConverter<Int8Type>)
475 SIMPLE_CONVERTER_CASE(Type::INT16, IntegerConverter<Int16Type>)
476 SIMPLE_CONVERTER_CASE(Type::INT32, IntegerConverter<Int32Type>)
477 SIMPLE_CONVERTER_CASE(Type::TIME32, IntegerConverter<Int32Type>)
478 SIMPLE_CONVERTER_CASE(Type::DATE32, IntegerConverter<Date32Type>)
479 SIMPLE_CONVERTER_CASE(Type::INT64, IntegerConverter<Int64Type>)
480 SIMPLE_CONVERTER_CASE(Type::TIME64, IntegerConverter<Int64Type>)
481 SIMPLE_CONVERTER_CASE(Type::TIMESTAMP, IntegerConverter<Int64Type>)
482 SIMPLE_CONVERTER_CASE(Type::DATE64, IntegerConverter<Date64Type>)
483 SIMPLE_CONVERTER_CASE(Type::UINT8, IntegerConverter<UInt8Type>)
484 SIMPLE_CONVERTER_CASE(Type::UINT16, IntegerConverter<UInt16Type>)
485 SIMPLE_CONVERTER_CASE(Type::UINT32, IntegerConverter<UInt32Type>)
486 SIMPLE_CONVERTER_CASE(Type::UINT64, IntegerConverter<UInt64Type>)
487 SIMPLE_CONVERTER_CASE(Type::NA, NullConverter)
488 SIMPLE_CONVERTER_CASE(Type::BOOL, BooleanConverter)
489 SIMPLE_CONVERTER_CASE(Type::FLOAT, FloatConverter<FloatType>)
490 SIMPLE_CONVERTER_CASE(Type::DOUBLE, FloatConverter<DoubleType>)
491 SIMPLE_CONVERTER_CASE(Type::LIST, ListConverter)
492 SIMPLE_CONVERTER_CASE(Type::STRUCT, StructConverter)
493 SIMPLE_CONVERTER_CASE(Type::STRING, StringConverter)
494 SIMPLE_CONVERTER_CASE(Type::BINARY, StringConverter)
495 SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter)
496 SIMPLE_CONVERTER_CASE(Type::DECIMAL, DecimalConverter)
497 default: {
498 return Status::NotImplemented("JSON conversion to ", type->ToString(),
499 " not implemented");
500 }
501 }
502
503#undef SIMPLE_CONVERTER_CASE
504
505 RETURN_NOT_OK(res->Init());
506 *out = res;
507 return Status::OK();
508}
509
510Status ArrayFromJSON(const std::shared_ptr<DataType>& type,
511 const util::string_view& json_string, std::shared_ptr<Array>* out) {
512 std::shared_ptr<Converter> converter;
513 RETURN_NOT_OK(GetConverter(type, &converter));
514
515 rj::Document json_doc;
516 json_doc.Parse<kParseFlags>(json_string.data(), json_string.length());
517 if (json_doc.HasParseError()) {
518 return Status::Invalid("JSON parse error at offset ", json_doc.GetErrorOffset(), ": ",
519 GetParseError_En(json_doc.GetParseError()));
520 }
521
522 // The JSON document should be an array, append it
523 RETURN_NOT_OK(converter->AppendValues(json_doc));
524 return converter->Finish(out);
525}
526
527Status ArrayFromJSON(const std::shared_ptr<DataType>& type,
528 const std::string& json_string, std::shared_ptr<Array>* out) {
529 return ArrayFromJSON(type, util::string_view(json_string), out);
530}
531
532Status ArrayFromJSON(const std::shared_ptr<DataType>& type, const char* json_string,
533 std::shared_ptr<Array>* out) {
534 return ArrayFromJSON(type, util::string_view(json_string), out);
535}
536
537} // namespace json
538} // namespace internal
539} // namespace ipc
540} // namespace arrow
541