1#pragma once
2
3#include <optional>
4#include <unordered_map>
5
6#include <Core/Block.h>
7#include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h>
8#include <Formats/FormatSettings.h>
9
10
11namespace DB
12{
13
14/** A stream for inputting data in csv format.
15 * Does not conform with https://tools.ietf.org/html/rfc4180 because it skips spaces and tabs between values.
16 */
17class CSVRowInputFormat : public RowInputFormatWithDiagnosticInfo
18{
19public:
20 /** with_names - in the first line the header with column names
21 */
22 CSVRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_,
23 bool with_names_, const FormatSettings & format_settings_);
24
25 String getName() const override { return "CSVRowInputFormat"; }
26
27 bool readRow(MutableColumns & columns, RowReadExtension & ext) override;
28 void readPrefix() override;
29 bool allowSyncAfterError() const override { return true; }
30 void syncAfterError() override;
31 void resetParser() override;
32
33private:
34 bool with_names;
35 const FormatSettings format_settings;
36 DataTypes data_types;
37
38 using IndexesMap = std::unordered_map<String, size_t>;
39 IndexesMap column_indexes_by_names;
40
41 /// Maps indexes of columns in the input file to indexes of table columns
42 using OptionalIndexes = std::vector<std::optional<size_t>>;
43 OptionalIndexes column_indexes_for_input_fields;
44
45 /// Tracks which columns we have read in a single read() call.
46 /// For columns that are never read, it is initialized to false when we
47 /// read the file header, and never changed afterwards.
48 /// For other columns, it is updated on each read() call.
49 std::vector<UInt8> read_columns;
50
51 /// Whether we have any columns that are not read from file at all,
52 /// and must be always initialized with defaults.
53 bool have_always_default_columns = false;
54
55 void addInputColumn(const String & column_name);
56
57 bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override;
58 void tryDeserializeFiled(const DataTypePtr & type, IColumn & column, size_t file_column,
59 ReadBuffer::Position & prev_pos, ReadBuffer::Position & curr_pos) override;
60 bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override
61 {
62 return *pos != '\n' && *pos != '\r' && *pos != format_settings.csv.delimiter;
63 }
64
65 bool readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column);
66};
67
68}
69