1 | #pragma once |
2 | |
3 | #include <Core/Block.h> |
4 | #include <Formats/FormatSettings.h> |
5 | #include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h> |
6 | |
7 | |
8 | namespace DB |
9 | { |
10 | |
11 | /** A stream to input data in tsv format. |
12 | */ |
13 | class TabSeparatedRowInputFormat : public RowInputFormatWithDiagnosticInfo |
14 | { |
15 | public: |
16 | /** with_names - the first line is the header with the names of the columns |
17 | * with_types - on the next line header with type names |
18 | */ |
19 | TabSeparatedRowInputFormat(const Block & , ReadBuffer & in_, const Params & params_, |
20 | bool with_names_, bool with_types_, const FormatSettings & format_settings_); |
21 | |
22 | String getName() const override { return "TabSeparatedRowInputFormat" ; } |
23 | |
24 | bool readRow(MutableColumns & columns, RowReadExtension &) override; |
25 | void readPrefix() override; |
26 | bool allowSyncAfterError() const override { return true; } |
27 | void syncAfterError() override; |
28 | |
29 | void resetParser() override; |
30 | |
31 | private: |
32 | bool with_names; |
33 | bool with_types; |
34 | const FormatSettings format_settings; |
35 | DataTypes data_types; |
36 | |
37 | using IndexesMap = std::unordered_map<String, size_t>; |
38 | IndexesMap column_indexes_by_names; |
39 | |
40 | using OptionalIndexes = std::vector<std::optional<size_t>>; |
41 | OptionalIndexes column_indexes_for_input_fields; |
42 | |
43 | std::vector<UInt8> read_columns; |
44 | std::vector<size_t> columns_to_fill_with_default_values; |
45 | |
46 | bool readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column); |
47 | |
48 | void addInputColumn(const String & column_name); |
49 | void setupAllColumnsByTableSchema(); |
50 | void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension& ext); |
51 | |
52 | bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override; |
53 | void tryDeserializeFiled(const DataTypePtr & type, IColumn & column, size_t file_column, |
54 | ReadBuffer::Position & prev_pos, ReadBuffer::Position & curr_pos) override; |
55 | bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override { return *pos != '\n' && *pos != '\t'; } |
56 | }; |
57 | |
58 | } |
59 | |