| 1 | #pragma once |
| 2 | |
| 3 | #include <Core/Block.h> |
| 4 | #include <Formats/FormatSettings.h> |
| 5 | #include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h> |
| 6 | |
| 7 | |
| 8 | namespace DB |
| 9 | { |
| 10 | |
| 11 | /** A stream to input data in tsv format. |
| 12 | */ |
| 13 | class TabSeparatedRowInputFormat : public RowInputFormatWithDiagnosticInfo |
| 14 | { |
| 15 | public: |
| 16 | /** with_names - the first line is the header with the names of the columns |
| 17 | * with_types - on the next line header with type names |
| 18 | */ |
| 19 | TabSeparatedRowInputFormat(const Block & , ReadBuffer & in_, const Params & params_, |
| 20 | bool with_names_, bool with_types_, const FormatSettings & format_settings_); |
| 21 | |
| 22 | String getName() const override { return "TabSeparatedRowInputFormat" ; } |
| 23 | |
| 24 | bool readRow(MutableColumns & columns, RowReadExtension &) override; |
| 25 | void readPrefix() override; |
| 26 | bool allowSyncAfterError() const override { return true; } |
| 27 | void syncAfterError() override; |
| 28 | |
| 29 | void resetParser() override; |
| 30 | |
| 31 | private: |
| 32 | bool with_names; |
| 33 | bool with_types; |
| 34 | const FormatSettings format_settings; |
| 35 | DataTypes data_types; |
| 36 | |
| 37 | using IndexesMap = std::unordered_map<String, size_t>; |
| 38 | IndexesMap column_indexes_by_names; |
| 39 | |
| 40 | using OptionalIndexes = std::vector<std::optional<size_t>>; |
| 41 | OptionalIndexes column_indexes_for_input_fields; |
| 42 | |
| 43 | std::vector<UInt8> read_columns; |
| 44 | std::vector<size_t> columns_to_fill_with_default_values; |
| 45 | |
| 46 | bool readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column); |
| 47 | |
| 48 | void addInputColumn(const String & column_name); |
| 49 | void setupAllColumnsByTableSchema(); |
| 50 | void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension& ext); |
| 51 | |
| 52 | bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override; |
| 53 | void tryDeserializeFiled(const DataTypePtr & type, IColumn & column, size_t file_column, |
| 54 | ReadBuffer::Position & prev_pos, ReadBuffer::Position & curr_pos) override; |
| 55 | bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override { return *pos != '\n' && *pos != '\t'; } |
| 56 | }; |
| 57 | |
| 58 | } |
| 59 | |