| 1 | #pragma once |
| 2 | |
| 3 | #include <Core/Block.h> |
| 4 | #include <Processors/Formats/IRowInputFormat.h> |
| 5 | #include <Formats/FormatSettings.h> |
| 6 | #include <Common/HashTable/HashMap.h> |
| 7 | |
| 8 | |
| 9 | namespace DB |
| 10 | { |
| 11 | |
| 12 | class ReadBuffer; |
| 13 | |
| 14 | |
| 15 | /** Stream for reading data in TSKV format. |
| 16 | * TSKV is a very inefficient data format. |
| 17 | * Similar to TSV, but each field is written as key=value. |
| 18 | * Fields can be listed in any order (including, in different lines there may be different order), |
| 19 | * and some fields may be missing. |
| 20 | * An equal sign can be escaped in the field name. |
| 21 | * Also, as an additional element there may be a useless tskv fragment - it needs to be ignored. |
| 22 | */ |
| 23 | class TSKVRowInputFormat : public IRowInputFormat |
| 24 | { |
| 25 | public: |
| 26 | TSKVRowInputFormat(ReadBuffer & in_, Block , Params params_, const FormatSettings & format_settings_); |
| 27 | |
| 28 | String getName() const override { return "TSKVRowInputFormat" ; } |
| 29 | |
| 30 | bool readRow(MutableColumns & columns, RowReadExtension &) override; |
| 31 | bool allowSyncAfterError() const override { return true; } |
| 32 | void syncAfterError() override; |
| 33 | void resetParser() override; |
| 34 | |
| 35 | |
| 36 | private: |
| 37 | const FormatSettings format_settings; |
| 38 | |
| 39 | /// Buffer for the read from the stream the field name. Used when you have to copy it. |
| 40 | String name_buf; |
| 41 | |
| 42 | /// Hash table matching `field name -> position in the block`. NOTE You can use perfect hash map. |
| 43 | using NameMap = HashMap<StringRef, size_t, StringRefHash>; |
| 44 | NameMap name_map; |
| 45 | |
| 46 | /// Set of columns for which the values were read. The rest will be filled with default values. |
| 47 | std::vector<UInt8> read_columns; |
| 48 | /// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name. |
| 49 | std::vector<UInt8> seen_columns; |
| 50 | /// These sets may be different, because if null_as_default=1 read_columns[i] will be false and seen_columns[i] will be true |
| 51 | /// for row like ..., non-nullable column name=\N, ... |
| 52 | }; |
| 53 | |
| 54 | } |
| 55 | |