1 | #include "duckdb/parser/expression/columnref_expression.hpp" |
2 | #include "duckdb/parser/expression/star_expression.hpp" |
3 | #include "duckdb/parser/statement/copy_statement.hpp" |
4 | #include "duckdb/parser/statement/select_statement.hpp" |
5 | #include "duckdb/parser/tableref/basetableref.hpp" |
6 | #include "duckdb/parser/transformer.hpp" |
7 | #include "duckdb/common/string_util.hpp" |
8 | #include "duckdb/common/types/value.hpp" |
9 | |
10 | #include <cstring> |
11 | |
12 | using namespace duckdb; |
13 | using namespace std; |
14 | |
15 | static ExternalFileFormat StringToExternalFileFormat(const string &str) { |
16 | auto upper = StringUtil::Upper(str); |
17 | return ExternalFileFormat::CSV; |
18 | } |
19 | |
20 | void SetControlString(PGDefElem *def_elem, string option, string option_example, string &info_str) { |
21 | auto *val = (PGValue *)(def_elem->arg); |
22 | if (!val || val->type != T_PGString) { |
23 | throw ParserException("Unsupported parameter type for " + option + ": expected e.g. " + option_example); |
24 | } |
25 | info_str = val->val.str; |
26 | } |
27 | |
28 | void SubstringDetection(string &str_1, string &str_2, string name_str_1, string name_str_2) { |
29 | if (str_1.find(str_2) != string::npos || str_2.find(str_1) != std::string::npos) { |
30 | throw Exception("COPY " + name_str_1 + " must not appear in the " + name_str_2 + |
31 | " specification and vice versa" ); |
32 | } |
33 | } |
34 | |
35 | void HandleOptions(PGCopyStmt *stmt, CopyInfo &info) { |
36 | // option names |
37 | const string kDelimiterTok = "delimiter" ; |
38 | const string kFormatTok = "format" ; |
39 | const string kQuoteTok = "quote" ; |
40 | const string kEscapeTok = "escape" ; |
41 | const string = "header" ; |
42 | const string kNullTok = "null" ; |
43 | const string kForceQuoteTok = "force_quote" ; |
44 | const string kForceNotNullTok = "force_not_null" ; |
45 | const string kEncodingTok = "encoding" ; |
46 | |
47 | PGListCell *cell = nullptr; |
48 | |
49 | // iterate over each option |
50 | for_each_cell(cell, stmt->options->head) { |
51 | auto *def_elem = reinterpret_cast<PGDefElem *>(cell->data.ptr_value); |
52 | |
53 | if (StringUtil::StartsWith(def_elem->defname, "delim" ) || StringUtil::StartsWith(def_elem->defname, "sep" )) { |
54 | // delimiter |
55 | SetControlString(def_elem, "DELIMITER" , "DELIMITER ','" , info.delimiter); |
56 | |
57 | } else if (def_elem->defname == kFormatTok) { |
58 | // format |
59 | auto *format_val = (PGValue *)(def_elem->arg); |
60 | if (!format_val || format_val->type != T_PGString) { |
61 | throw ParserException("Unsupported parameter type for FORMAT: expected e.g. FORMAT 'csv', 'csv_auto'" ); |
62 | } |
63 | |
64 | if (StringUtil::Upper(format_val->val.str) != "CSV" && |
65 | StringUtil::Upper(format_val->val.str) != "CSV_AUTO" ) { |
66 | throw Exception("Copy is only supported for .CSV-files, FORMAT 'csv'" ); |
67 | } |
68 | |
69 | info.format = StringToExternalFileFormat("CSV" ); |
70 | |
71 | if (StringUtil::Upper(format_val->val.str) == "CSV_AUTO" ) { |
72 | info.auto_detect = true; |
73 | } |
74 | |
75 | } else if (def_elem->defname == kQuoteTok) { |
76 | // quote |
77 | SetControlString(def_elem, "QUOTE" , "QUOTE '\"'" , info.quote); |
78 | if (info.quote.length() == 0) { |
79 | throw Exception("QUOTE must not be empty" ); |
80 | } |
81 | |
82 | } else if (def_elem->defname == kEscapeTok) { |
83 | // escape |
84 | SetControlString(def_elem, "ESCAPE" , "ESCAPE '\"'" , info.escape); |
85 | if (info.escape.length() == 0) { |
86 | throw Exception("ESCAPE must not be empty" ); |
87 | } |
88 | |
89 | } else if (def_elem->defname == kHeaderTok) { |
90 | // header |
91 | auto * = (PGValue *)(def_elem->arg); |
92 | if (!header_val) { |
93 | info.header = true; |
94 | continue; |
95 | } |
96 | switch (header_val->type) { |
97 | case T_PGInteger: |
98 | info.header = header_val->val.ival == 1 ? true : false; |
99 | break; |
100 | case T_PGString: { |
101 | auto val = duckdb::Value(string(header_val->val.str)); |
102 | info.header = val.CastAs(TypeId::BOOL).value_.boolean; |
103 | break; |
104 | } |
105 | default: |
106 | throw ParserException("Unsupported parameter type for HEADER: expected e.g. HEADER 1" ); |
107 | } |
108 | |
109 | } else if (def_elem->defname == kNullTok) { |
110 | // null |
111 | SetControlString(def_elem, "NULL" , "NULL 'null'" , info.null_str); |
112 | |
113 | } else if (def_elem->defname == kForceQuoteTok) { |
114 | // force quote |
115 | // only for COPY ... TO ... |
116 | if (info.is_from) { |
117 | throw Exception("The FORCE_QUOTE option is only for COPY ... TO ..." ); |
118 | } |
119 | |
120 | auto *force_quote_val = def_elem->arg; |
121 | if (!force_quote_val || (force_quote_val->type != T_PGAStar && force_quote_val->type != T_PGList)) { |
122 | throw ParserException("Unsupported parameter type for FORCE_QUOTE: expected e.g. FORCE_QUOTE *" ); |
123 | } |
124 | |
125 | // * option (all columns) |
126 | if (force_quote_val->type == T_PGAStar) { |
127 | info.quote_all = true; |
128 | } |
129 | |
130 | // list of columns |
131 | if (force_quote_val->type == T_PGList) { |
132 | auto column_list = (PGList *)(force_quote_val); |
133 | for (auto c = column_list->head; c != NULL; c = lnext(c)) { |
134 | auto target = (PGResTarget *)(c->data.ptr_value); |
135 | info.force_quote_list.push_back(string(target->name)); |
136 | } |
137 | } |
138 | |
139 | } else if (def_elem->defname == kForceNotNullTok) { |
140 | // force not null |
141 | // only for COPY ... FROM ... |
142 | if (!info.is_from) { |
143 | throw Exception("The FORCE_NOT_NULL option is only for COPY ... FROM ..." ); |
144 | } |
145 | |
146 | auto *force_not_null_val = def_elem->arg; |
147 | if (!force_not_null_val || force_not_null_val->type != T_PGList) { |
148 | throw ParserException("Unsupported parameter type for FORCE_NOT_NULL: expected e.g. FORCE_NOT_NULL *" ); |
149 | } |
150 | |
151 | auto column_list = (PGList *)(force_not_null_val); |
152 | for (auto c = column_list->head; c != NULL; c = lnext(c)) { |
153 | auto target = (PGResTarget *)(c->data.ptr_value); |
154 | info.force_not_null_list.push_back(string(target->name)); |
155 | } |
156 | |
157 | } else if (def_elem->defname == kEncodingTok) { |
158 | // encoding |
159 | auto *encoding_val = (PGValue *)(def_elem->arg); |
160 | if (!encoding_val || encoding_val->type != T_PGString) { |
161 | throw ParserException("Unsupported parameter type for ENCODING: expected e.g. ENCODING 'UTF-8'" ); |
162 | } |
163 | if (StringUtil::Upper(encoding_val->val.str) != "UTF8" && |
164 | StringUtil::Upper(encoding_val->val.str) != "UTF-8" ) { |
165 | throw Exception("Copy is only supported for UTF-8 encoded files, ENCODING 'UTF-8'" ); |
166 | } |
167 | |
168 | } else { |
169 | throw ParserException("Unsupported COPY option: %s" , def_elem->defname); |
170 | } |
171 | } |
172 | } |
173 | |
174 | unique_ptr<CopyStatement> Transformer::TransformCopy(PGNode *node) { |
175 | auto stmt = reinterpret_cast<PGCopyStmt *>(node); |
176 | assert(stmt); |
177 | auto result = make_unique<CopyStatement>(); |
178 | auto &info = *result->info; |
179 | |
180 | // get file_path and is_from |
181 | info.file_path = stmt->filename; |
182 | info.is_from = stmt->is_from; |
183 | |
184 | // get select_list |
185 | if (stmt->attlist) { |
186 | for (auto n = stmt->attlist->head; n != nullptr; n = n->next) { |
187 | auto target = reinterpret_cast<PGResTarget *>(n->data.ptr_value); |
188 | if (target->name) { |
189 | info.select_list.push_back(string(target->name)); |
190 | } |
191 | } |
192 | } |
193 | |
194 | if (stmt->relation) { |
195 | auto ref = TransformRangeVar(stmt->relation); |
196 | if (info.is_from) { |
197 | // copy file into table |
198 | auto &table = *reinterpret_cast<BaseTableRef *>(ref.get()); |
199 | info.table = table.table_name; |
200 | info.schema = table.schema_name; |
201 | } else { |
202 | // copy table into file, generate SELECT * FROM table; |
203 | auto statement = make_unique<SelectNode>(); |
204 | statement->from_table = move(ref); |
205 | if (stmt->attlist) { |
206 | for (idx_t i = 0; i < info.select_list.size(); i++) |
207 | statement->select_list.push_back(make_unique<ColumnRefExpression>(info.select_list[i])); |
208 | } else { |
209 | statement->select_list.push_back(make_unique<StarExpression>()); |
210 | } |
211 | result->select_statement = move(statement); |
212 | } |
213 | } else { |
214 | result->select_statement = TransformSelectNode((PGSelectStmt *)stmt->query); |
215 | } |
216 | |
217 | // handle options, when no option were given, try auto detect |
218 | if (stmt->options) { |
219 | HandleOptions(stmt, info); |
220 | } |
221 | |
222 | // the default character of the ESCAPE option is the same as the QUOTE character |
223 | if (info.escape == "" ) { |
224 | info.escape = info.quote; |
225 | } |
226 | // escape and delimiter must not be substrings of each other |
227 | SubstringDetection(info.delimiter, info.escape, "DELIMITER" , "ESCAPE" ); |
228 | // delimiter and quote must not be substrings of each other |
229 | SubstringDetection(info.quote, info.delimiter, "DELIMITER" , "QUOTE" ); |
230 | // escape and quote must not be substrings of each other (but can be the same) |
231 | if (info.quote != info.escape) { |
232 | SubstringDetection(info.quote, info.escape, "QUOTE" , "ESCAPE" ); |
233 | } |
234 | // null string and delimiter must not be substrings of each other |
235 | if (info.null_str != "" ) { |
236 | SubstringDetection(info.delimiter, info.null_str, "DELIMITER" , "NULL" ); |
237 | } |
238 | |
239 | return result; |
240 | } |
241 | |