read_csv.hpp source code [Velox/build/_deps/duckdb-src/src/include/duckdb/function/table/read_csv.hpp]

1	//===----------------------------------------------------------------------===//
2	// DuckDB
3	//
4	// duckdb/function/table/read_csv.hpp
5	//
6	//
7	//===----------------------------------------------------------------------===//
8
9	#pragma once
10
11	#include "duckdb/function/table_function.hpp"
12	#include "duckdb/function/scalar/strftime_format.hpp"
13	#include "duckdb/execution/operator/persistent/csv_reader_options.hpp"
14	#include "duckdb/execution/operator/persistent/buffered_csv_reader.hpp"
15	#include "duckdb/execution/operator/persistent/parallel_csv_reader.hpp"
16	#include "duckdb/execution/operator/persistent/csv_file_handle.hpp"
17	#include "duckdb/execution/operator/persistent/csv_buffer.hpp"
18	#include "duckdb/function/built_in_functions.hpp"
19
20	namespace duckdb {
21
22	class ReadCSV {
23	public:
24	static unique_ptr<CSVFileHandle> OpenCSV(const string &file_path, FileCompressionType compression,
25	ClientContext &context);
26	};
27
28	struct BaseCSVData : public TableFunctionData {
29	virtual ~BaseCSVData() {
30	}
31	//! The file path of the CSV file to read or write
32	vector<string> files;
33	//! The CSV reader options
34	BufferedCSVReaderOptions options;
35	//! Offsets for generated columns
36	idx_t filename_col_idx;
37	idx_t hive_partition_col_idx;
38
39	void Finalize();
40	};
41
42	struct WriteCSVData : public BaseCSVData {
43	WriteCSVData(string file_path, vector<LogicalType> sql_types, vector<string> names)
44	: sql_types (std::move(sql_types)) {
45	files.push_back(x: std::move(file_path));
46	options.name_list = std::move(names);
47	}
48
49	//! The SQL types to write
50	vector<LogicalType> sql_types;
51	//! The newline string to write
52	string newline = "\n";
53	//! Whether or not we are writing a simple CSV (delimiter, quote and escape are all 1 byte in length)
54	bool is_simple;
55	//! The size of the CSV file (in bytes) that we buffer before we flush it to disk
56	idx_t flush_size = `4096` * `8`;
57	//! For each byte whether or not the CSV file requires quotes when containing the byte
58	unsafe_unique_array<bool> requires_quotes;
59	};
60
61	struct ColumnInfo {
62	ColumnInfo() {
63	}
64	ColumnInfo(vector<std::string> names_p, vector<LogicalType> types_p) {
65	names = std::move(names_p);
66	types = std::move(types_p);
67	}
68	void Serialize(FieldWriter &writer) const {
69	writer.WriteList<string>(elements: names);
70	writer.WriteRegularSerializableList<LogicalType>(elements: types);
71	}
72
73	static ColumnInfo Deserialize(FieldReader &reader) {
74	ColumnInfo info;
75	info.names = reader.ReadRequiredList<string>();
76	info.types = reader.ReadRequiredSerializableList<LogicalType, LogicalType>();
77	return info;
78	}
79	vector<std::string> names;
80	vector<LogicalType> types;
81	};
82
83	struct ReadCSVData : public BaseCSVData {
84	//! The expected SQL types to read from the file
85	vector<LogicalType> csv_types;
86	//! The expected SQL names to be read from the file
87	vector<string> csv_names;
88	//! The expected SQL types to be returned from the read - including added constants (e.g. filename, hive partitions)
89	vector<LogicalType> return_types;
90	//! The expected SQL names to be returned from the read - including added constants (e.g. filename, hive partitions)
91	vector<string> return_names;
92	//! The initial reader (if any): this is used when automatic detection is used during binding.
93	//! In this case, the CSV reader is already created and might as well be re-used.
94	unique_ptr<BufferedCSVReader> initial_reader;
95	//! The union readers are created (when csv union_by_name option is on) during binding
96	//! Those readers can be re-used during ReadCSVFunction
97	vector<unique_ptr<BufferedCSVReader>> union_readers;
98	//! Whether or not the single-threaded reader should be used
99	bool single_threaded = false;
100	//! Reader bind data
101	MultiFileReaderBindData reader_bind;
102	vector<ColumnInfo> column_info;
103
104	void Initialize(unique_ptr<BufferedCSVReader> &reader) {
105	this->initial_reader = std::move(reader);
106	}
107	void FinalizeRead(ClientContext &context);
108	};
109
110	struct CSVCopyFunction {
111	static void RegisterFunction(BuiltinFunctions &set);
112	};
113
114	struct ReadCSVTableFunction {
115	static TableFunction GetFunction();
116	static TableFunction GetAutoFunction();
117	static void RegisterFunction(BuiltinFunctions &set);
118	};
119
120	} // namespace duckdb
121

Browse the source code of Velox/build/_deps/duckdb-src/src/include/duckdb/function/table/read_csv.hpp