JSONEachRowRowInputFormat.h source code [ClickHouse/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h]

1	#pragma once
2
3	#include <Core/Block.h>
4	#include <Processors/Formats/IRowInputFormat.h>
5	#include <Formats/FormatSettings.h>
6	#include <Common/HashTable/HashMap.h>
7
8
9	namespace DB
10	{
11
12	class ReadBuffer;
13
14
15	/* A stream for reading data in JSON format, where each row is represented by a separate JSON object.*
16	* Objects can be separated by line feed, other whitespace characters in any number and possibly a comma.
17	* Fields can be listed in any order (including, in different lines there may be different order),
18	* and some fields may be missing.
19	*/
20	class JSONEachRowRowInputFormat : public IRowInputFormat
21	{
22	public:
23	JSONEachRowRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_);
24
25	String getName() const override { return "JSONEachRowRowInputFormat"; }
26
27	bool readRow(MutableColumns & columns, RowReadExtension & ext) override;
28	bool allowSyncAfterError() const override { return true; }
29	void syncAfterError() override;
30	void resetParser() override;
31
32	private:
33	const String & columnName(size_t i) const;
34	size_t columnIndex(const StringRef & name, size_t key_index);
35	bool advanceToNextKey(size_t key_index);
36	void skipUnknownField(const StringRef & name_ref);
37	StringRef readColumnName(ReadBuffer & buf);
38	void readField(size_t index, MutableColumns & columns);
39	void readJSONObject(MutableColumns & columns);
40	void readNestedData(const String & name, MutableColumns & columns);
41
42	private:
43
44	const FormatSettings format_settings;
45
46	/// Buffer for the read from the stream field name. Used when you have to copy it.
47	/// Also, if processing of Nested data is in progress, it holds the common prefix
48	/// of the nested column names (so that appending the field name to it produces
49	/// the full column name)
50	String current_column_name;
51
52	/// If processing Nested data, holds the length of the common prefix
53	/// of the names of related nested columns. For example, for a table
54	/// created as follows
55	/// CREATE TABLE t (n Nested (i Int32, s String))
56	/// the nested column names are 'n.i' and 'n.s' and the nested prefix is 'n.'
57	size_t nested_prefix_length = `0`;
58
59	/// Set of columns for which the values were read. The rest will be filled with default values.
60	std::vector<UInt8> read_columns;
61	/// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name.
62	std::vector<UInt8> seen_columns;
63	/// These sets may be different, because if null_as_default=1 read_columns[i] will be false and seen_columns[i] will be true
64	/// for row like {..., "non-nullable column name" : null, ...}
65
66	/// Hash table match `field name -> position in the block`. NOTE You can use perfect hash map.
67	using NameMap = HashMap<StringRef, size_t, StringRefHash>;
68	NameMap name_map;
69
70	/// Cached search results for previous row (keyed as index in JSON object) - used as a hint.
71	std::vector<NameMap::LookupResult> prev_positions;
72	};
73
74	}
75

Browse the source code of ClickHouse/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h