1#include "duckdb/parser/expression/columnref_expression.hpp"
2#include "duckdb/parser/expression/star_expression.hpp"
3#include "duckdb/parser/statement/copy_statement.hpp"
4#include "duckdb/parser/statement/select_statement.hpp"
5#include "duckdb/parser/tableref/basetableref.hpp"
6#include "duckdb/parser/transformer.hpp"
7#include "duckdb/common/string_util.hpp"
8#include "duckdb/common/types/value.hpp"
9
10#include <cstring>
11
12using namespace duckdb;
13using namespace std;
14
15static ExternalFileFormat StringToExternalFileFormat(const string &str) {
16 auto upper = StringUtil::Upper(str);
17 return ExternalFileFormat::CSV;
18}
19
20void SetControlString(PGDefElem *def_elem, string option, string option_example, string &info_str) {
21 auto *val = (PGValue *)(def_elem->arg);
22 if (!val || val->type != T_PGString) {
23 throw ParserException("Unsupported parameter type for " + option + ": expected e.g. " + option_example);
24 }
25 info_str = val->val.str;
26}
27
28void SubstringDetection(string &str_1, string &str_2, string name_str_1, string name_str_2) {
29 if (str_1.find(str_2) != string::npos || str_2.find(str_1) != std::string::npos) {
30 throw Exception("COPY " + name_str_1 + " must not appear in the " + name_str_2 +
31 " specification and vice versa");
32 }
33}
34
35void HandleOptions(PGCopyStmt *stmt, CopyInfo &info) {
36 // option names
37 const string kDelimiterTok = "delimiter";
38 const string kFormatTok = "format";
39 const string kQuoteTok = "quote";
40 const string kEscapeTok = "escape";
41 const string kHeaderTok = "header";
42 const string kNullTok = "null";
43 const string kForceQuoteTok = "force_quote";
44 const string kForceNotNullTok = "force_not_null";
45 const string kEncodingTok = "encoding";
46
47 PGListCell *cell = nullptr;
48
49 // iterate over each option
50 for_each_cell(cell, stmt->options->head) {
51 auto *def_elem = reinterpret_cast<PGDefElem *>(cell->data.ptr_value);
52
53 if (StringUtil::StartsWith(def_elem->defname, "delim") || StringUtil::StartsWith(def_elem->defname, "sep")) {
54 // delimiter
55 SetControlString(def_elem, "DELIMITER", "DELIMITER ','", info.delimiter);
56
57 } else if (def_elem->defname == kFormatTok) {
58 // format
59 auto *format_val = (PGValue *)(def_elem->arg);
60 if (!format_val || format_val->type != T_PGString) {
61 throw ParserException("Unsupported parameter type for FORMAT: expected e.g. FORMAT 'csv', 'csv_auto'");
62 }
63
64 if (StringUtil::Upper(format_val->val.str) != "CSV" &&
65 StringUtil::Upper(format_val->val.str) != "CSV_AUTO") {
66 throw Exception("Copy is only supported for .CSV-files, FORMAT 'csv'");
67 }
68
69 info.format = StringToExternalFileFormat("CSV");
70
71 if (StringUtil::Upper(format_val->val.str) == "CSV_AUTO") {
72 info.auto_detect = true;
73 }
74
75 } else if (def_elem->defname == kQuoteTok) {
76 // quote
77 SetControlString(def_elem, "QUOTE", "QUOTE '\"'", info.quote);
78 if (info.quote.length() == 0) {
79 throw Exception("QUOTE must not be empty");
80 }
81
82 } else if (def_elem->defname == kEscapeTok) {
83 // escape
84 SetControlString(def_elem, "ESCAPE", "ESCAPE '\"'", info.escape);
85 if (info.escape.length() == 0) {
86 throw Exception("ESCAPE must not be empty");
87 }
88
89 } else if (def_elem->defname == kHeaderTok) {
90 // header
91 auto *header_val = (PGValue *)(def_elem->arg);
92 if (!header_val) {
93 info.header = true;
94 continue;
95 }
96 switch (header_val->type) {
97 case T_PGInteger:
98 info.header = header_val->val.ival == 1 ? true : false;
99 break;
100 case T_PGString: {
101 auto val = duckdb::Value(string(header_val->val.str));
102 info.header = val.CastAs(TypeId::BOOL).value_.boolean;
103 break;
104 }
105 default:
106 throw ParserException("Unsupported parameter type for HEADER: expected e.g. HEADER 1");
107 }
108
109 } else if (def_elem->defname == kNullTok) {
110 // null
111 SetControlString(def_elem, "NULL", "NULL 'null'", info.null_str);
112
113 } else if (def_elem->defname == kForceQuoteTok) {
114 // force quote
115 // only for COPY ... TO ...
116 if (info.is_from) {
117 throw Exception("The FORCE_QUOTE option is only for COPY ... TO ...");
118 }
119
120 auto *force_quote_val = def_elem->arg;
121 if (!force_quote_val || (force_quote_val->type != T_PGAStar && force_quote_val->type != T_PGList)) {
122 throw ParserException("Unsupported parameter type for FORCE_QUOTE: expected e.g. FORCE_QUOTE *");
123 }
124
125 // * option (all columns)
126 if (force_quote_val->type == T_PGAStar) {
127 info.quote_all = true;
128 }
129
130 // list of columns
131 if (force_quote_val->type == T_PGList) {
132 auto column_list = (PGList *)(force_quote_val);
133 for (auto c = column_list->head; c != NULL; c = lnext(c)) {
134 auto target = (PGResTarget *)(c->data.ptr_value);
135 info.force_quote_list.push_back(string(target->name));
136 }
137 }
138
139 } else if (def_elem->defname == kForceNotNullTok) {
140 // force not null
141 // only for COPY ... FROM ...
142 if (!info.is_from) {
143 throw Exception("The FORCE_NOT_NULL option is only for COPY ... FROM ...");
144 }
145
146 auto *force_not_null_val = def_elem->arg;
147 if (!force_not_null_val || force_not_null_val->type != T_PGList) {
148 throw ParserException("Unsupported parameter type for FORCE_NOT_NULL: expected e.g. FORCE_NOT_NULL *");
149 }
150
151 auto column_list = (PGList *)(force_not_null_val);
152 for (auto c = column_list->head; c != NULL; c = lnext(c)) {
153 auto target = (PGResTarget *)(c->data.ptr_value);
154 info.force_not_null_list.push_back(string(target->name));
155 }
156
157 } else if (def_elem->defname == kEncodingTok) {
158 // encoding
159 auto *encoding_val = (PGValue *)(def_elem->arg);
160 if (!encoding_val || encoding_val->type != T_PGString) {
161 throw ParserException("Unsupported parameter type for ENCODING: expected e.g. ENCODING 'UTF-8'");
162 }
163 if (StringUtil::Upper(encoding_val->val.str) != "UTF8" &&
164 StringUtil::Upper(encoding_val->val.str) != "UTF-8") {
165 throw Exception("Copy is only supported for UTF-8 encoded files, ENCODING 'UTF-8'");
166 }
167
168 } else {
169 throw ParserException("Unsupported COPY option: %s", def_elem->defname);
170 }
171 }
172}
173
174unique_ptr<CopyStatement> Transformer::TransformCopy(PGNode *node) {
175 auto stmt = reinterpret_cast<PGCopyStmt *>(node);
176 assert(stmt);
177 auto result = make_unique<CopyStatement>();
178 auto &info = *result->info;
179
180 // get file_path and is_from
181 info.file_path = stmt->filename;
182 info.is_from = stmt->is_from;
183
184 // get select_list
185 if (stmt->attlist) {
186 for (auto n = stmt->attlist->head; n != nullptr; n = n->next) {
187 auto target = reinterpret_cast<PGResTarget *>(n->data.ptr_value);
188 if (target->name) {
189 info.select_list.push_back(string(target->name));
190 }
191 }
192 }
193
194 if (stmt->relation) {
195 auto ref = TransformRangeVar(stmt->relation);
196 if (info.is_from) {
197 // copy file into table
198 auto &table = *reinterpret_cast<BaseTableRef *>(ref.get());
199 info.table = table.table_name;
200 info.schema = table.schema_name;
201 } else {
202 // copy table into file, generate SELECT * FROM table;
203 auto statement = make_unique<SelectNode>();
204 statement->from_table = move(ref);
205 if (stmt->attlist) {
206 for (idx_t i = 0; i < info.select_list.size(); i++)
207 statement->select_list.push_back(make_unique<ColumnRefExpression>(info.select_list[i]));
208 } else {
209 statement->select_list.push_back(make_unique<StarExpression>());
210 }
211 result->select_statement = move(statement);
212 }
213 } else {
214 result->select_statement = TransformSelectNode((PGSelectStmt *)stmt->query);
215 }
216
217 // handle options, when no option were given, try auto detect
218 if (stmt->options) {
219 HandleOptions(stmt, info);
220 }
221
222 // the default character of the ESCAPE option is the same as the QUOTE character
223 if (info.escape == "") {
224 info.escape = info.quote;
225 }
226 // escape and delimiter must not be substrings of each other
227 SubstringDetection(info.delimiter, info.escape, "DELIMITER", "ESCAPE");
228 // delimiter and quote must not be substrings of each other
229 SubstringDetection(info.quote, info.delimiter, "DELIMITER", "QUOTE");
230 // escape and quote must not be substrings of each other (but can be the same)
231 if (info.quote != info.escape) {
232 SubstringDetection(info.quote, info.escape, "QUOTE", "ESCAPE");
233 }
234 // null string and delimiter must not be substrings of each other
235 if (info.null_str != "") {
236 SubstringDetection(info.delimiter, info.null_str, "DELIMITER", "NULL");
237 }
238
239 return result;
240}
241