1#pragma once
2
3#include <Functions/IFunctionImpl.h>
4#include <Core/AccurateComparison.h>
5#include <Functions/DummyJSONParser.h>
6#include <Functions/SimdJSONParser.h>
7#include <Functions/RapidJSONParser.h>
8#include "config_functions.h"
9#include <Common/CpuId.h>
10#include <Common/typeid_cast.h>
11#include <Common/assert_cast.h>
12#include <Core/Settings.h>
13#include <Columns/ColumnConst.h>
14#include <Columns/ColumnString.h>
15#include <Columns/ColumnVector.h>
16#include <Columns/ColumnFixedString.h>
17#include <Columns/ColumnNullable.h>
18#include <Columns/ColumnArray.h>
19#include <Columns/ColumnTuple.h>
20#include <DataTypes/DataTypesNumber.h>
21#include <DataTypes/DataTypeString.h>
22#include <DataTypes/DataTypeEnum.h>
23#include <DataTypes/DataTypeFactory.h>
24#include <DataTypes/DataTypeNullable.h>
25#include <DataTypes/DataTypeArray.h>
26#include <DataTypes/DataTypeTuple.h>
27#include <Interpreters/Context.h>
28#include <ext/range.h>
29
30
31namespace DB
32{
33namespace ErrorCodes
34{
35 extern const int ILLEGAL_COLUMN;
36 extern const int ILLEGAL_TYPE_OF_ARGUMENT;
37 extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
38}
39
40
41/// Functions to parse JSONs and extract values from it.
42/// The first argument of all these functions gets a JSON,
43/// after that there are any number of arguments specifying path to a desired part from the JSON's root.
44/// For example,
45/// select JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100
46template <typename Name, template<typename> typename Impl>
47class FunctionJSON : public IFunction
48{
49public:
50 static FunctionPtr create(const Context & context_) { return std::make_shared<FunctionJSON>(context_); }
51 FunctionJSON(const Context & context_) : context(context_) {}
52
53 static constexpr auto name = Name::name;
54 String getName() const override { return Name::name; }
55 bool isVariadic() const override { return true; }
56 size_t getNumberOfArguments() const override { return 0; }
57 bool useDefaultImplementationForConstants() const override { return false; }
58
59 DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
60 {
61 return Impl<DummyJSONParser>::getType(Name::name, arguments);
62 }
63
64 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result_pos, size_t input_rows_count) override
65 {
66 /// Choose JSONParser.
67#if USE_SIMDJSON
68 if (context.getSettingsRef().allow_simdjson && Cpu::CpuFlagsCache::have_SSE42 && Cpu::CpuFlagsCache::have_PCLMUL)
69 {
70 Executor<SimdJSONParser>::run(block, arguments, result_pos, input_rows_count);
71 return;
72 }
73#endif
74#if USE_RAPIDJSON
75 Executor<RapidJSONParser>::run(block, arguments, result_pos, input_rows_count);
76#else
77 Executor<DummyJSONParser>::run(block, arguments, result_pos, input_rows_count);
78#endif
79 }
80
81private:
82 const Context & context;
83
84 template <typename JSONParser>
85 class Executor
86 {
87 public:
88 static void run(Block & block, const ColumnNumbers & arguments, size_t result_pos, size_t input_rows_count)
89 {
90 MutableColumnPtr to{block.getByPosition(result_pos).type->createColumn()};
91 to->reserve(input_rows_count);
92
93 if (arguments.size() < 1)
94 throw Exception{"Function " + String(Name::name) + " requires at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
95
96 const auto & first_column = block.getByPosition(arguments[0]);
97 if (!isString(first_column.type))
98 throw Exception{"The first argument of function " + String(Name::name) + " should be a string containing JSON, illegal type: " + first_column.type->getName(),
99 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
100
101 const ColumnPtr & arg_json = first_column.column;
102 auto col_json_const = typeid_cast<const ColumnConst *>(arg_json.get());
103 auto col_json_string
104 = typeid_cast<const ColumnString *>(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get());
105
106 if (!col_json_string)
107 throw Exception{"Illegal column " + arg_json->getName(), ErrorCodes::ILLEGAL_COLUMN};
108
109 const ColumnString::Chars & chars = col_json_string->getChars();
110 const ColumnString::Offsets & offsets = col_json_string->getOffsets();
111
112 std::vector<Move> moves = prepareListOfMoves(block, arguments);
113
114 /// Preallocate memory in parser if necessary.
115 JSONParser parser;
116 if (parser.need_preallocate)
117 parser.preallocate(calculateMaxSize(offsets));
118
119 Impl<JSONParser> impl;
120
121 /// prepare() does Impl-specific preparation before handling each row.
122 impl.prepare(Name::name, block, arguments, result_pos);
123
124 bool json_parsed_ok = false;
125 if (col_json_const)
126 {
127 StringRef json{reinterpret_cast<const char *>(&chars[0]), offsets[0] - 1};
128 json_parsed_ok = parser.parse(json);
129 }
130
131 for (const auto i : ext::range(0, input_rows_count))
132 {
133 if (!col_json_const)
134 {
135 StringRef json{reinterpret_cast<const char *>(&chars[offsets[i - 1]]), offsets[i] - offsets[i - 1] - 1};
136 json_parsed_ok = parser.parse(json);
137 }
138
139 bool ok = json_parsed_ok;
140 if (ok)
141 {
142 auto it = parser.getRoot();
143
144 /// Perform moves.
145 for (size_t j = 0; (j != moves.size()) && ok; ++j)
146 {
147 switch (moves[j].type)
148 {
149 case MoveType::ConstIndex:
150 ok = moveIteratorToElementByIndex(it, moves[j].index);
151 break;
152 case MoveType::ConstKey:
153 ok = moveIteratorToElementByKey(it, moves[j].key);
154 break;
155 case MoveType::Index:
156 {
157 const Field field = (*block.getByPosition(arguments[j + 1]).column)[i];
158 ok = moveIteratorToElementByIndex(it, field.get<Int64>());
159 break;
160 }
161 case MoveType::Key:
162 {
163 const Field field = (*block.getByPosition(arguments[j + 1]).column)[i];
164 ok = moveIteratorToElementByKey(it, field.get<String>().data());
165 break;
166 }
167 }
168 }
169
170 if (ok)
171 ok = impl.addValueToColumn(*to, it);
172 }
173
174 /// We add default value (=null or zero) if something goes wrong, we don't throw exceptions in these JSON functions.
175 if (!ok)
176 to->insertDefault();
177 }
178 block.getByPosition(result_pos).column = std::move(to);
179 }
180
181 private:
182 /// Represents a move of a JSON iterator described by a single argument passed to a JSON function.
183 /// For example, the call JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1)
184 /// contains two moves: {MoveType::ConstKey, "b"} and {MoveType::ConstIndex, 1}.
185 /// Keys and indices can be nonconst, in this case they are calculated for each row.
186 enum class MoveType
187 {
188 Key,
189 Index,
190 ConstKey,
191 ConstIndex,
192 };
193
194 struct Move
195 {
196 Move(MoveType type_, size_t index_ = 0) : type(type_), index(index_) {}
197 Move(MoveType type_, const String & key_) : type(type_), key(key_) {}
198 MoveType type;
199 size_t index = 0;
200 String key;
201 };
202
203 static std::vector<Move> prepareListOfMoves(Block & block, const ColumnNumbers & arguments)
204 {
205 constexpr size_t num_extra_arguments = Impl<JSONParser>::num_extra_arguments;
206 const size_t num_moves = arguments.size() - num_extra_arguments - 1;
207 std::vector<Move> moves;
208 moves.reserve(num_moves);
209 for (const auto i : ext::range(0, num_moves))
210 {
211 const auto & column = block.getByPosition(arguments[i + 1]);
212 if (!isString(column.type) && !isInteger(column.type))
213 throw Exception{"The argument " + std::to_string(i + 2) + " of function " + String(Name::name)
214 + " should be a string specifying key or an integer specifying index, illegal type: " + column.type->getName(),
215 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
216
217 if (isColumnConst(*column.column))
218 {
219 const auto & column_const = assert_cast<const ColumnConst &>(*column.column);
220 if (isString(column.type))
221 moves.emplace_back(MoveType::ConstKey, column_const.getField().get<String>());
222 else
223 moves.emplace_back(MoveType::ConstIndex, column_const.getField().get<Int64>());
224 }
225 else
226 {
227 if (isString(column.type))
228 moves.emplace_back(MoveType::Key, "");
229 else
230 moves.emplace_back(MoveType::Index, 0);
231 }
232 }
233 return moves;
234 }
235
236 using Iterator = typename JSONParser::Iterator;
237
238 /// Performs moves of types MoveType::Index and MoveType::ConstIndex.
239 static bool moveIteratorToElementByIndex(Iterator & it, int index)
240 {
241 if (JSONParser::isArray(it))
242 {
243 if (index > 0)
244 return JSONParser::arrayElementByIndex(it, index - 1);
245 else
246 return JSONParser::arrayElementByIndex(it, JSONParser::sizeOfArray(it) + index);
247 }
248 if (JSONParser::isObject(it))
249 {
250 if (index > 0)
251 return JSONParser::objectMemberByIndex(it, index - 1);
252 else
253 return JSONParser::objectMemberByIndex(it, JSONParser::sizeOfObject(it) + index);
254 }
255 return false;
256 }
257
258 /// Performs moves of types MoveType::Key and MoveType::ConstKey.
259 static bool moveIteratorToElementByKey(Iterator & it, const String & key)
260 {
261 if (JSONParser::isObject(it))
262 return JSONParser::objectMemberByName(it, key);
263 return false;
264 }
265
266 static size_t calculateMaxSize(const ColumnString::Offsets & offsets)
267 {
268 size_t max_size = 0;
269 for (const auto i : ext::range(0, offsets.size()))
270 if (max_size < offsets[i] - offsets[i - 1])
271 max_size = offsets[i] - offsets[i - 1];
272
273 if (max_size < 1)
274 max_size = 1;
275 return max_size;
276 }
277 };
278};
279
280
281struct NameJSONHas { static constexpr auto name{"JSONHas"}; };
282struct NameIsValidJSON { static constexpr auto name{"isValidJSON"}; };
283struct NameJSONLength { static constexpr auto name{"JSONLength"}; };
284struct NameJSONKey { static constexpr auto name{"JSONKey"}; };
285struct NameJSONType { static constexpr auto name{"JSONType"}; };
286struct NameJSONExtractInt { static constexpr auto name{"JSONExtractInt"}; };
287struct NameJSONExtractUInt { static constexpr auto name{"JSONExtractUInt"}; };
288struct NameJSONExtractFloat { static constexpr auto name{"JSONExtractFloat"}; };
289struct NameJSONExtractBool { static constexpr auto name{"JSONExtractBool"}; };
290struct NameJSONExtractString { static constexpr auto name{"JSONExtractString"}; };
291struct NameJSONExtract { static constexpr auto name{"JSONExtract"}; };
292struct NameJSONExtractKeysAndValues { static constexpr auto name{"JSONExtractKeysAndValues"}; };
293struct NameJSONExtractRaw { static constexpr auto name{"JSONExtractRaw"}; };
294struct NameJSONExtractArrayRaw { static constexpr auto name{"JSONExtractArrayRaw"}; };
295
296
297template <typename JSONParser>
298class JSONHasImpl
299{
300public:
301 static DataTypePtr getType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared<DataTypeUInt8>(); }
302
303 using Iterator = typename JSONParser::Iterator;
304 static bool addValueToColumn(IColumn & dest, const Iterator &)
305 {
306 ColumnVector<UInt8> & col_vec = assert_cast<ColumnVector<UInt8> &>(dest);
307 col_vec.insertValue(1);
308 return true;
309 }
310
311 static constexpr size_t num_extra_arguments = 0;
312 static void prepare(const char *, const Block &, const ColumnNumbers &, size_t) {}
313};
314
315
316template <typename JSONParser>
317class IsValidJSONImpl
318{
319public:
320 static DataTypePtr getType(const char * function_name, const ColumnsWithTypeAndName & arguments)
321 {
322 if (arguments.size() != 1)
323 {
324 /// IsValidJSON() shouldn't get parameters other than JSON.
325 throw Exception{"Function " + String(function_name) + " needs exactly one argument",
326 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
327 }
328 return std::make_shared<DataTypeUInt8>();
329 }
330
331 using Iterator = typename JSONParser::Iterator;
332 static bool addValueToColumn(IColumn & dest, const Iterator &)
333 {
334 /// This function is called only if JSON is valid.
335 /// If JSON isn't valid then `FunctionJSON::Executor::run()` adds default value (=zero) to `dest` without calling this function.
336 ColumnVector<UInt8> & col_vec = assert_cast<ColumnVector<UInt8> &>(dest);
337 col_vec.insertValue(1);
338 return true;
339 }
340
341 static constexpr size_t num_extra_arguments = 0;
342 static void prepare(const char *, const Block &, const ColumnNumbers &, size_t) {}
343};
344
345
346template <typename JSONParser>
347class JSONLengthImpl
348{
349public:
350 static DataTypePtr getType(const char *, const ColumnsWithTypeAndName &)
351 {
352 return std::make_shared<DataTypeUInt64>();
353 }
354
355 using Iterator = typename JSONParser::Iterator;
356 static bool addValueToColumn(IColumn & dest, const Iterator & it)
357 {
358 size_t size;
359 if (JSONParser::isArray(it))
360 size = JSONParser::sizeOfArray(it);
361 else if (JSONParser::isObject(it))
362 size = JSONParser::sizeOfObject(it);
363 else
364 return false;
365
366 ColumnVector<UInt64> & col_vec = assert_cast<ColumnVector<UInt64> &>(dest);
367 col_vec.insertValue(size);
368 return true;
369 }
370
371 static constexpr size_t num_extra_arguments = 0;
372 static void prepare(const char *, const Block &, const ColumnNumbers &, size_t) {}
373};
374
375
376template <typename JSONParser>
377class JSONKeyImpl
378{
379public:
380 static DataTypePtr getType(const char *, const ColumnsWithTypeAndName &)
381 {
382 return std::make_shared<DataTypeString>();
383 }
384
385 using Iterator = typename JSONParser::Iterator;
386 static bool addValueToColumn(IColumn & dest, const Iterator & it)
387 {
388 if (!JSONParser::isObjectMember(it))
389 return false;
390 StringRef key = JSONParser::getKey(it);
391 ColumnString & col_str = assert_cast<ColumnString &>(dest);
392 col_str.insertData(key.data, key.size);
393 return true;
394 }
395
396 static constexpr size_t num_extra_arguments = 0;
397 static void prepare(const char *, const Block &, const ColumnNumbers &, size_t) {}
398};
399
400
401template <typename JSONParser>
402class JSONTypeImpl
403{
404public:
405 static DataTypePtr getType(const char *, const ColumnsWithTypeAndName &)
406 {
407 static const std::vector<std::pair<String, Int8>> values = {
408 {"Array", '['},
409 {"Object", '{'},
410 {"String", '"'},
411 {"Int64", 'i'},
412 {"UInt64", 'u'},
413 {"Double", 'd'},
414 {"Bool", 'b'},
415 {"Null", 0}, /// the default value for the column.
416 };
417 return std::make_shared<DataTypeEnum<Int8>>(values);
418 }
419
420 using Iterator = typename JSONParser::Iterator;
421 static bool addValueToColumn(IColumn & dest, const Iterator & it)
422 {
423 UInt8 type;
424 if (JSONParser::isInt64(it))
425 type = 'i';
426 else if (JSONParser::isUInt64(it))
427 type = 'u';
428 else if (JSONParser::isDouble(it))
429 type = 'd';
430 else if (JSONParser::isBool(it))
431 type = 'b';
432 else if (JSONParser::isString(it))
433 type = '"';
434 else if (JSONParser::isArray(it))
435 type = '[';
436 else if (JSONParser::isObject(it))
437 type = '{';
438 else if (JSONParser::isNull(it))
439 type = 0;
440 else
441 return false;
442
443 ColumnVector<Int8> & col_vec = assert_cast<ColumnVector<Int8> &>(dest);
444 col_vec.insertValue(type);
445 return true;
446 }
447
448 static constexpr size_t num_extra_arguments = 0;
449 static void prepare(const char *, const Block &, const ColumnNumbers &, size_t) {}
450};
451
452
453template <typename JSONParser, typename NumberType, bool convert_bool_to_integer = false>
454class JSONExtractNumericImpl
455{
456public:
457 static DataTypePtr getType(const char *, const ColumnsWithTypeAndName &)
458 {
459 return std::make_shared<DataTypeNumber<NumberType>>();
460 }
461
462 using Iterator = typename JSONParser::Iterator;
463 static bool addValueToColumn(IColumn & dest, const Iterator & it)
464 {
465 NumberType value;
466
467 if (JSONParser::isInt64(it))
468 {
469 if (!accurate::convertNumeric(JSONParser::getInt64(it), value))
470 return false;
471 }
472 else if (JSONParser::isUInt64(it))
473 {
474 if (!accurate::convertNumeric(JSONParser::getUInt64(it), value))
475 return false;
476 }
477 else if (JSONParser::isDouble(it))
478 {
479 if (!accurate::convertNumeric(JSONParser::getDouble(it), value))
480 return false;
481 }
482 else if (JSONParser::isBool(it) && is_integral_v<NumberType> && convert_bool_to_integer)
483 value = static_cast<NumberType>(JSONParser::getBool(it));
484 else
485 return false;
486
487 auto & col_vec = assert_cast<ColumnVector<NumberType> &>(dest);
488 col_vec.insertValue(value);
489 return true;
490 }
491
492 static constexpr size_t num_extra_arguments = 0;
493 static void prepare(const char *, const Block &, const ColumnNumbers &, size_t) {}
494};
495
496template <typename JSONParser>
497using JSONExtractInt8Impl = JSONExtractNumericImpl<JSONParser, Int8>;
498template <typename JSONParser>
499using JSONExtractUInt8Impl = JSONExtractNumericImpl<JSONParser, UInt8>;
500template <typename JSONParser>
501using JSONExtractInt16Impl = JSONExtractNumericImpl<JSONParser, Int16>;
502template <typename JSONParser>
503using JSONExtractUInt16Impl = JSONExtractNumericImpl<JSONParser, UInt16>;
504template <typename JSONParser>
505using JSONExtractInt32Impl = JSONExtractNumericImpl<JSONParser, Int32>;
506template <typename JSONParser>
507using JSONExtractUInt32Impl = JSONExtractNumericImpl<JSONParser, UInt32>;
508template <typename JSONParser>
509using JSONExtractInt64Impl = JSONExtractNumericImpl<JSONParser, Int64>;
510template <typename JSONParser>
511using JSONExtractUInt64Impl = JSONExtractNumericImpl<JSONParser, UInt64>;
512template <typename JSONParser>
513using JSONExtractFloat32Impl = JSONExtractNumericImpl<JSONParser, Float32>;
514template <typename JSONParser>
515using JSONExtractFloat64Impl = JSONExtractNumericImpl<JSONParser, Float64>;
516
517
518template <typename JSONParser>
519class JSONExtractBoolImpl
520{
521public:
522 static DataTypePtr getType(const char *, const ColumnsWithTypeAndName &)
523 {
524 return std::make_shared<DataTypeUInt8>();
525 }
526
527 using Iterator = typename JSONParser::Iterator;
528 static bool addValueToColumn(IColumn & dest, const Iterator & it)
529 {
530 if (!JSONParser::isBool(it))
531 return false;
532
533 auto & col_vec = assert_cast<ColumnVector<UInt8> &>(dest);
534 col_vec.insertValue(static_cast<UInt8>(JSONParser::getBool(it)));
535 return true;
536 }
537
538 static constexpr size_t num_extra_arguments = 0;
539 static void prepare(const char *, const Block &, const ColumnNumbers &, size_t) {}
540};
541
542
543template <typename JSONParser>
544class JSONExtractStringImpl
545{
546public:
547 static DataTypePtr getType(const char *, const ColumnsWithTypeAndName &)
548 {
549 return std::make_shared<DataTypeString>();
550 }
551
552 using Iterator = typename JSONParser::Iterator;
553 static bool addValueToColumn(IColumn & dest, const Iterator & it)
554 {
555 if (!JSONParser::isString(it))
556 return false;
557
558 StringRef str = JSONParser::getString(it);
559 ColumnString & col_str = assert_cast<ColumnString &>(dest);
560 col_str.insertData(str.data, str.size);
561 return true;
562 }
563
564 static constexpr size_t num_extra_arguments = 0;
565 static void prepare(const char *, const Block &, const ColumnNumbers &, size_t) {}
566};
567
568
569/// Nodes of the extract tree. We need the extract tree to extract from JSON complex values containing array, tuples or nullables.
570template <typename JSONParser>
571struct JSONExtractTree
572{
573 using Iterator = typename JSONParser::Iterator;
574
575 class Node
576 {
577 public:
578 Node() {}
579 virtual ~Node() {}
580 virtual bool addValueToColumn(IColumn &, const Iterator &) = 0;
581 };
582
583 template <typename NumberType>
584 class NumericNode : public Node
585 {
586 public:
587 bool addValueToColumn(IColumn & dest, const Iterator & it) override
588 {
589 return JSONExtractNumericImpl<JSONParser, NumberType, true>::addValueToColumn(dest, it);
590 }
591 };
592
593 class StringNode : public Node
594 {
595 public:
596 bool addValueToColumn(IColumn & dest, const Iterator & it) override
597 {
598 return JSONExtractStringImpl<JSONParser>::addValueToColumn(dest, it);
599 }
600 };
601
602 class FixedStringNode : public Node
603 {
604 public:
605 bool addValueToColumn(IColumn & dest, const Iterator & it) override
606 {
607 if (!JSONParser::isString(it))
608 return false;
609 auto & col_str = assert_cast<ColumnFixedString &>(dest);
610 StringRef str = JSONParser::getString(it);
611 if (str.size > col_str.getN())
612 return false;
613 col_str.insertData(str.data, str.size);
614 return true;
615 }
616 };
617
618 template <typename Type>
619 class EnumNode : public Node
620 {
621 public:
622 EnumNode(const std::vector<std::pair<String, Type>> & name_value_pairs_) : name_value_pairs(name_value_pairs_)
623 {
624 for (const auto & name_value_pair : name_value_pairs)
625 {
626 name_to_value_map.emplace(name_value_pair.first, name_value_pair.second);
627 only_values.emplace(name_value_pair.second);
628 }
629 }
630
631 bool addValueToColumn(IColumn & dest, const Iterator & it) override
632 {
633 auto & col_vec = assert_cast<ColumnVector<Type> &>(dest);
634
635 if (JSONParser::isInt64(it))
636 {
637 Type value;
638 if (!accurate::convertNumeric(JSONParser::getInt64(it), value) || !only_values.count(value))
639 return false;
640 col_vec.insertValue(value);
641 return true;
642 }
643
644 if (JSONParser::isUInt64(it))
645 {
646 Type value;
647 if (!accurate::convertNumeric(JSONParser::getUInt64(it), value) || !only_values.count(value))
648 return false;
649 col_vec.insertValue(value);
650 return true;
651 }
652
653 if (JSONParser::isString(it))
654 {
655 auto value = name_to_value_map.find(JSONParser::getString(it));
656 if (value == name_to_value_map.end())
657 return false;
658 col_vec.insertValue(value->second);
659 return true;
660 }
661
662 return false;
663 }
664
665 private:
666 std::vector<std::pair<String, Type>> name_value_pairs;
667 std::unordered_map<StringRef, Type> name_to_value_map;
668 std::unordered_set<Type> only_values;
669 };
670
671 class NullableNode : public Node
672 {
673 public:
674 NullableNode(std::unique_ptr<Node> nested_) : nested(std::move(nested_)) {}
675
676 bool addValueToColumn(IColumn & dest, const Iterator & it) override
677 {
678 ColumnNullable & col_null = assert_cast<ColumnNullable &>(dest);
679 if (!nested->addValueToColumn(col_null.getNestedColumn(), it))
680 return false;
681 col_null.getNullMapColumn().insertValue(0);
682 return true;
683 }
684
685 private:
686 std::unique_ptr<Node> nested;
687 };
688
689 class ArrayNode : public Node
690 {
691 public:
692 ArrayNode(std::unique_ptr<Node> nested_) : nested(std::move(nested_)) {}
693
694 bool addValueToColumn(IColumn & dest, const Iterator & it) override
695 {
696 if (!JSONParser::isArray(it))
697 return false;
698
699 Iterator it2 = it;
700 if (!JSONParser::firstArrayElement(it2))
701 return false;
702
703 ColumnArray & col_arr = assert_cast<ColumnArray &>(dest);
704 auto & data = col_arr.getData();
705 size_t old_size = data.size();
706 bool were_valid_elements = false;
707
708 do
709 {
710 if (nested->addValueToColumn(data, it2))
711 were_valid_elements = true;
712 else
713 data.insertDefault();
714 }
715 while (JSONParser::nextArrayElement(it2));
716
717 if (!were_valid_elements)
718 {
719 data.popBack(data.size() - old_size);
720 return false;
721 }
722
723 col_arr.getOffsets().push_back(data.size());
724 return true;
725 }
726
727 private:
728 std::unique_ptr<Node> nested;
729 };
730
731 class TupleNode : public Node
732 {
733 public:
734 TupleNode(std::vector<std::unique_ptr<Node>> nested_, const std::vector<String> & explicit_names_) : nested(std::move(nested_)), explicit_names(explicit_names_)
735 {
736 for (size_t i = 0; i != explicit_names.size(); ++i)
737 name_to_index_map.emplace(explicit_names[i], i);
738 }
739
740 bool addValueToColumn(IColumn & dest, const Iterator & it) override
741 {
742 ColumnTuple & tuple = assert_cast<ColumnTuple &>(dest);
743 size_t old_size = dest.size();
744 bool were_valid_elements = false;
745
746 auto set_size = [&](size_t size)
747 {
748 for (size_t i = 0; i != tuple.tupleSize(); ++i)
749 {
750 auto & col = tuple.getColumn(i);
751 if (col.size() != size)
752 {
753 if (col.size() > size)
754 col.popBack(col.size() - size);
755 else
756 while (col.size() < size)
757 col.insertDefault();
758 }
759 }
760 };
761
762 if (JSONParser::isArray(it))
763 {
764 Iterator it2 = it;
765 if (!JSONParser::firstArrayElement(it2))
766 return false;
767
768 for (size_t index = 0; index != nested.size(); ++index)
769 {
770 if (nested[index]->addValueToColumn(tuple.getColumn(index), it2))
771 were_valid_elements = true;
772 else
773 tuple.getColumn(index).insertDefault();
774 if (!JSONParser::nextArrayElement(it2))
775 break;
776 }
777
778 set_size(old_size + static_cast<size_t>(were_valid_elements));
779 return were_valid_elements;
780 }
781
782 if (JSONParser::isObject(it))
783 {
784 if (name_to_index_map.empty())
785 {
786 Iterator it2 = it;
787 if (!JSONParser::firstObjectMember(it2))
788 return false;
789
790 for (size_t index = 0; index != nested.size(); ++index)
791 {
792 if (nested[index]->addValueToColumn(tuple.getColumn(index), it2))
793 were_valid_elements = true;
794 else
795 tuple.getColumn(index).insertDefault();
796 if (!JSONParser::nextObjectMember(it2))
797 break;
798 }
799 }
800 else
801 {
802 Iterator it2 = it;
803 StringRef key;
804 if (!JSONParser::firstObjectMember(it2, key))
805 return false;
806
807 do
808 {
809 auto index = name_to_index_map.find(key);
810 if (index != name_to_index_map.end())
811 {
812 if (nested[index->second]->addValueToColumn(tuple.getColumn(index->second), it2))
813 were_valid_elements = true;
814 }
815 }
816 while (JSONParser::nextObjectMember(it2, key));
817 }
818
819 set_size(old_size + static_cast<size_t>(were_valid_elements));
820 return were_valid_elements;
821 }
822
823 return false;
824 }
825
826 private:
827 std::vector<std::unique_ptr<Node>> nested;
828 std::vector<String> explicit_names;
829 std::unordered_map<StringRef, size_t> name_to_index_map;
830 };
831
832 static std::unique_ptr<Node> build(const char * function_name, const DataTypePtr & type)
833 {
834 switch (type->getTypeId())
835 {
836 case TypeIndex::UInt8: return std::make_unique<NumericNode<UInt8>>();
837 case TypeIndex::UInt16: return std::make_unique<NumericNode<UInt16>>();
838 case TypeIndex::UInt32: return std::make_unique<NumericNode<UInt32>>();
839 case TypeIndex::UInt64: return std::make_unique<NumericNode<UInt64>>();
840 case TypeIndex::Int8: return std::make_unique<NumericNode<Int8>>();
841 case TypeIndex::Int16: return std::make_unique<NumericNode<Int16>>();
842 case TypeIndex::Int32: return std::make_unique<NumericNode<Int32>>();
843 case TypeIndex::Int64: return std::make_unique<NumericNode<Int64>>();
844 case TypeIndex::Float32: return std::make_unique<NumericNode<Float32>>();
845 case TypeIndex::Float64: return std::make_unique<NumericNode<Float64>>();
846 case TypeIndex::String: return std::make_unique<StringNode>();
847 case TypeIndex::FixedString: return std::make_unique<FixedStringNode>();
848 case TypeIndex::Enum8:
849 return std::make_unique<EnumNode<Int8>>(static_cast<const DataTypeEnum8 &>(*type).getValues());
850 case TypeIndex::Enum16:
851 return std::make_unique<EnumNode<Int16>>(static_cast<const DataTypeEnum16 &>(*type).getValues());
852 case TypeIndex::Nullable:
853 {
854 return std::make_unique<NullableNode>(build(function_name, static_cast<const DataTypeNullable &>(*type).getNestedType()));
855 }
856 case TypeIndex::Array:
857 {
858 return std::make_unique<ArrayNode>(build(function_name, static_cast<const DataTypeArray &>(*type).getNestedType()));
859 }
860 case TypeIndex::Tuple:
861 {
862 const auto & tuple = static_cast<const DataTypeTuple &>(*type);
863 const auto & tuple_elements = tuple.getElements();
864 std::vector<std::unique_ptr<Node>> elements;
865 for (const auto & tuple_element : tuple_elements)
866 elements.emplace_back(build(function_name, tuple_element));
867 return std::make_unique<TupleNode>(std::move(elements), tuple.haveExplicitNames() ? tuple.getElementNames() : Strings{});
868 }
869 default:
870 throw Exception{"Function " + String(function_name) + " doesn't support the return type schema: " + type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
871 }
872 }
873};
874
875template <typename JSONParser>
876class JSONExtractImpl
877{
878public:
879 static constexpr size_t num_extra_arguments = 1;
880
881 static DataTypePtr getType(const char * function_name, const ColumnsWithTypeAndName & arguments)
882 {
883 if (arguments.size() < 2)
884 throw Exception{"Function " + String(function_name) + " requires at least two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
885
886 const auto & col = arguments.back();
887 auto col_type_const = typeid_cast<const ColumnConst *>(col.column.get());
888 if (!col_type_const || !isString(col.type))
889 throw Exception{"The last argument of function " + String(function_name)
890 + " should be a constant string specifying the return data type, illegal value: " + col.column->getName(),
891 ErrorCodes::ILLEGAL_COLUMN};
892
893 return DataTypeFactory::instance().get(col_type_const->getValue<String>());
894 }
895
896 void prepare(const char * function_name, const Block & block, const ColumnNumbers &, size_t result_pos)
897 {
898 extract_tree = JSONExtractTree<JSONParser>::build(function_name, block.getByPosition(result_pos).type);
899 }
900
901 using Iterator = typename JSONParser::Iterator;
902 bool addValueToColumn(IColumn & dest, const Iterator & it)
903 {
904 return extract_tree->addValueToColumn(dest, it);
905 }
906
907protected:
908 std::unique_ptr<typename JSONExtractTree<JSONParser>::Node> extract_tree;
909};
910
911
912template <typename JSONParser>
913class JSONExtractKeysAndValuesImpl
914{
915public:
916 static constexpr size_t num_extra_arguments = 1;
917
918 static DataTypePtr getType(const char * function_name, const ColumnsWithTypeAndName & arguments)
919 {
920 if (arguments.size() < 2)
921 throw Exception{"Function " + String(function_name) + " requires at least two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
922
923 const auto & col = arguments.back();
924 auto col_type_const = typeid_cast<const ColumnConst *>(col.column.get());
925 if (!col_type_const || !isString(col.type))
926 throw Exception{"The last argument of function " + String(function_name)
927 + " should be a constant string specifying the values' data type, illegal value: " + col.column->getName(),
928 ErrorCodes::ILLEGAL_COLUMN};
929
930 DataTypePtr value_type = DataTypeFactory::instance().get(col_type_const->getValue<String>());
931 DataTypePtr key_type = std::make_unique<DataTypeString>();
932 DataTypePtr tuple_type = std::make_unique<DataTypeTuple>(DataTypes{key_type, value_type});
933 return std::make_unique<DataTypeArray>(tuple_type);
934 }
935
936 void prepare(const char * function_name, const Block & block, const ColumnNumbers &, size_t result_pos)
937 {
938 const auto & result_type = block.getByPosition(result_pos).type;
939 const auto tuple_type = typeid_cast<const DataTypeArray *>(result_type.get())->getNestedType();
940 const auto value_type = typeid_cast<const DataTypeTuple *>(tuple_type.get())->getElements()[1];
941 extract_tree = JSONExtractTree<JSONParser>::build(function_name, value_type);
942 }
943
944 using Iterator = typename JSONParser::Iterator;
945 bool addValueToColumn(IColumn & dest, const Iterator & it)
946 {
947 if (!JSONParser::isObject(it))
948 return false;
949
950 auto & col_arr = assert_cast<ColumnArray &>(dest);
951 auto & col_tuple = assert_cast<ColumnTuple &>(col_arr.getData());
952 size_t old_size = col_tuple.size();
953 auto & col_key = assert_cast<ColumnString &>(col_tuple.getColumn(0));
954 auto & col_value = col_tuple.getColumn(1);
955
956 StringRef key;
957 Iterator it2 = it;
958 if (!JSONParser::firstObjectMember(it2, key))
959 return false;
960
961 do
962 {
963 if (extract_tree->addValueToColumn(col_value, it2))
964 col_key.insertData(key.data, key.size);
965 }
966 while (JSONParser::nextObjectMember(it2, key));
967
968 if (col_tuple.size() == old_size)
969 return false;
970
971 col_arr.getOffsets().push_back(col_tuple.size());
972 return true;
973 }
974
975private:
976 std::unique_ptr<typename JSONExtractTree<JSONParser>::Node> extract_tree;
977};
978
979
980template <typename JSONParser>
981class JSONExtractRawImpl
982{
983public:
984 static DataTypePtr getType(const char *, const ColumnsWithTypeAndName &)
985 {
986 return std::make_shared<DataTypeString>();
987 }
988
989 using Iterator = typename JSONParser::Iterator;
990 static bool addValueToColumn(IColumn & dest, const Iterator & it)
991 {
992 ColumnString & col_str = assert_cast<ColumnString &>(dest);
993 auto & chars = col_str.getChars();
994 WriteBufferFromVector<ColumnString::Chars> buf(chars, WriteBufferFromVector<ColumnString::Chars>::AppendModeTag());
995 traverse(it, buf);
996 buf.finish();
997 chars.push_back(0);
998 col_str.getOffsets().push_back(chars.size());
999 return true;
1000 }
1001
1002 static constexpr size_t num_extra_arguments = 0;
1003 static void prepare(const char *, const Block &, const ColumnNumbers &, size_t) {}
1004
1005private:
1006 static void traverse(const Iterator & it, WriteBuffer & buf)
1007 {
1008 if (JSONParser::isInt64(it))
1009 {
1010 writeIntText(JSONParser::getInt64(it), buf);
1011 return;
1012 }
1013 if (JSONParser::isUInt64(it))
1014 {
1015 writeIntText(JSONParser::getUInt64(it), buf);
1016 return;
1017 }
1018 if (JSONParser::isDouble(it))
1019 {
1020 writeFloatText(JSONParser::getDouble(it), buf);
1021 return;
1022 }
1023 if (JSONParser::isBool(it))
1024 {
1025 if (JSONParser::getBool(it))
1026 writeCString("true", buf);
1027 else
1028 writeCString("false", buf);
1029 return;
1030 }
1031 if (JSONParser::isString(it))
1032 {
1033 writeJSONString(JSONParser::getString(it), buf, format_settings());
1034 return;
1035 }
1036 if (JSONParser::isArray(it))
1037 {
1038 writeChar('[', buf);
1039 Iterator it2 = it;
1040 if (JSONParser::firstArrayElement(it2))
1041 {
1042 traverse(it2, buf);
1043 while (JSONParser::nextArrayElement(it2))
1044 {
1045 writeChar(',', buf);
1046 traverse(it2, buf);
1047 }
1048 }
1049 writeChar(']', buf);
1050 return;
1051 }
1052 if (JSONParser::isObject(it))
1053 {
1054 writeChar('{', buf);
1055 Iterator it2 = it;
1056 StringRef key;
1057 if (JSONParser::firstObjectMember(it2, key))
1058 {
1059 writeJSONString(key, buf, format_settings());
1060 writeChar(':', buf);
1061 traverse(it2, buf);
1062 while (JSONParser::nextObjectMember(it2, key))
1063 {
1064 writeChar(',', buf);
1065 writeJSONString(key, buf, format_settings());
1066 writeChar(':', buf);
1067 traverse(it2, buf);
1068 }
1069 }
1070 writeChar('}', buf);
1071 return;
1072 }
1073 if (JSONParser::isNull(it))
1074 {
1075 writeCString("null", buf);
1076 return;
1077 }
1078 }
1079
1080 static const FormatSettings & format_settings()
1081 {
1082 static const FormatSettings the_instance = []
1083 {
1084 FormatSettings settings;
1085 settings.json.escape_forward_slashes = false;
1086 return settings;
1087 }();
1088 return the_instance;
1089 }
1090};
1091
1092template <typename JSONParser>
1093class JSONExtractArrayRawImpl
1094{
1095public:
1096 static DataTypePtr getType(const char *, const ColumnsWithTypeAndName &)
1097 {
1098 return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
1099 }
1100
1101 using Iterator = typename JSONParser::Iterator;
1102 static bool addValueToColumn(IColumn & dest, const Iterator & it)
1103 {
1104 if (!JSONParser::isArray(it))
1105 {
1106 return false;
1107 }
1108 ColumnArray & col_res = assert_cast<ColumnArray &>(dest);
1109 Iterator array_it = it;
1110 size_t size = 0;
1111 if (JSONParser::firstArrayElement(array_it))
1112 {
1113 do
1114 {
1115 JSONExtractRawImpl<JSONParser>::addValueToColumn(col_res.getData(), array_it);
1116 ++size;
1117 } while (JSONParser::nextArrayElement(array_it));
1118 }
1119
1120 col_res.getOffsets().push_back(col_res.getOffsets().back() + size);
1121 return true;
1122 }
1123
1124 static constexpr size_t num_extra_arguments = 0;
1125 static void prepare(const char *, const Block &, const ColumnNumbers &, size_t) {}
1126};
1127}
1128