1//===----------------------------------------------------------------------===//
2// DuckDB
3//
4// duckdb/function/table/read_csv.hpp
5//
6//
7//===----------------------------------------------------------------------===//
8
9#pragma once
10
11#include "duckdb/function/table_function.hpp"
12#include "duckdb/function/scalar/strftime_format.hpp"
13#include "duckdb/execution/operator/persistent/csv_reader_options.hpp"
14#include "duckdb/execution/operator/persistent/buffered_csv_reader.hpp"
15#include "duckdb/execution/operator/persistent/parallel_csv_reader.hpp"
16#include "duckdb/execution/operator/persistent/csv_file_handle.hpp"
17#include "duckdb/execution/operator/persistent/csv_buffer.hpp"
18#include "duckdb/function/built_in_functions.hpp"
19
20namespace duckdb {
21
22class ReadCSV {
23public:
24 static unique_ptr<CSVFileHandle> OpenCSV(const string &file_path, FileCompressionType compression,
25 ClientContext &context);
26};
27
28struct BaseCSVData : public TableFunctionData {
29 virtual ~BaseCSVData() {
30 }
31 //! The file path of the CSV file to read or write
32 vector<string> files;
33 //! The CSV reader options
34 BufferedCSVReaderOptions options;
35 //! Offsets for generated columns
36 idx_t filename_col_idx;
37 idx_t hive_partition_col_idx;
38
39 void Finalize();
40};
41
42struct WriteCSVData : public BaseCSVData {
43 WriteCSVData(string file_path, vector<LogicalType> sql_types, vector<string> names)
44 : sql_types(std::move(sql_types)) {
45 files.push_back(x: std::move(file_path));
46 options.name_list = std::move(names);
47 }
48
49 //! The SQL types to write
50 vector<LogicalType> sql_types;
51 //! The newline string to write
52 string newline = "\n";
53 //! Whether or not we are writing a simple CSV (delimiter, quote and escape are all 1 byte in length)
54 bool is_simple;
55 //! The size of the CSV file (in bytes) that we buffer before we flush it to disk
56 idx_t flush_size = 4096 * 8;
57 //! For each byte whether or not the CSV file requires quotes when containing the byte
58 unsafe_unique_array<bool> requires_quotes;
59};
60
61struct ColumnInfo {
62 ColumnInfo() {
63 }
64 ColumnInfo(vector<std::string> names_p, vector<LogicalType> types_p) {
65 names = std::move(names_p);
66 types = std::move(types_p);
67 }
68 void Serialize(FieldWriter &writer) const {
69 writer.WriteList<string>(elements: names);
70 writer.WriteRegularSerializableList<LogicalType>(elements: types);
71 }
72
73 static ColumnInfo Deserialize(FieldReader &reader) {
74 ColumnInfo info;
75 info.names = reader.ReadRequiredList<string>();
76 info.types = reader.ReadRequiredSerializableList<LogicalType, LogicalType>();
77 return info;
78 }
79 vector<std::string> names;
80 vector<LogicalType> types;
81};
82
83struct ReadCSVData : public BaseCSVData {
84 //! The expected SQL types to read from the file
85 vector<LogicalType> csv_types;
86 //! The expected SQL names to be read from the file
87 vector<string> csv_names;
88 //! The expected SQL types to be returned from the read - including added constants (e.g. filename, hive partitions)
89 vector<LogicalType> return_types;
90 //! The expected SQL names to be returned from the read - including added constants (e.g. filename, hive partitions)
91 vector<string> return_names;
92 //! The initial reader (if any): this is used when automatic detection is used during binding.
93 //! In this case, the CSV reader is already created and might as well be re-used.
94 unique_ptr<BufferedCSVReader> initial_reader;
95 //! The union readers are created (when csv union_by_name option is on) during binding
96 //! Those readers can be re-used during ReadCSVFunction
97 vector<unique_ptr<BufferedCSVReader>> union_readers;
98 //! Whether or not the single-threaded reader should be used
99 bool single_threaded = false;
100 //! Reader bind data
101 MultiFileReaderBindData reader_bind;
102 vector<ColumnInfo> column_info;
103
104 void Initialize(unique_ptr<BufferedCSVReader> &reader) {
105 this->initial_reader = std::move(reader);
106 }
107 void FinalizeRead(ClientContext &context);
108};
109
110struct CSVCopyFunction {
111 static void RegisterFunction(BuiltinFunctions &set);
112};
113
114struct ReadCSVTableFunction {
115 static TableFunction GetFunction();
116 static TableFunction GetAutoFunction();
117 static void RegisterFunction(BuiltinFunctions &set);
118};
119
120} // namespace duckdb
121