| 1 | #include "duckdb/parser/expression/columnref_expression.hpp" |
| 2 | #include "duckdb/parser/expression/star_expression.hpp" |
| 3 | #include "duckdb/parser/statement/copy_statement.hpp" |
| 4 | #include "duckdb/parser/statement/select_statement.hpp" |
| 5 | #include "duckdb/parser/tableref/basetableref.hpp" |
| 6 | #include "duckdb/parser/transformer.hpp" |
| 7 | #include "duckdb/common/string_util.hpp" |
| 8 | #include "duckdb/common/types/value.hpp" |
| 9 | |
| 10 | #include <cstring> |
| 11 | |
| 12 | using namespace duckdb; |
| 13 | using namespace std; |
| 14 | |
| 15 | static ExternalFileFormat StringToExternalFileFormat(const string &str) { |
| 16 | auto upper = StringUtil::Upper(str); |
| 17 | return ExternalFileFormat::CSV; |
| 18 | } |
| 19 | |
| 20 | void SetControlString(PGDefElem *def_elem, string option, string option_example, string &info_str) { |
| 21 | auto *val = (PGValue *)(def_elem->arg); |
| 22 | if (!val || val->type != T_PGString) { |
| 23 | throw ParserException("Unsupported parameter type for " + option + ": expected e.g. " + option_example); |
| 24 | } |
| 25 | info_str = val->val.str; |
| 26 | } |
| 27 | |
| 28 | void SubstringDetection(string &str_1, string &str_2, string name_str_1, string name_str_2) { |
| 29 | if (str_1.find(str_2) != string::npos || str_2.find(str_1) != std::string::npos) { |
| 30 | throw Exception("COPY " + name_str_1 + " must not appear in the " + name_str_2 + |
| 31 | " specification and vice versa" ); |
| 32 | } |
| 33 | } |
| 34 | |
| 35 | void HandleOptions(PGCopyStmt *stmt, CopyInfo &info) { |
| 36 | // option names |
| 37 | const string kDelimiterTok = "delimiter" ; |
| 38 | const string kFormatTok = "format" ; |
| 39 | const string kQuoteTok = "quote" ; |
| 40 | const string kEscapeTok = "escape" ; |
| 41 | const string = "header" ; |
| 42 | const string kNullTok = "null" ; |
| 43 | const string kForceQuoteTok = "force_quote" ; |
| 44 | const string kForceNotNullTok = "force_not_null" ; |
| 45 | const string kEncodingTok = "encoding" ; |
| 46 | |
| 47 | PGListCell *cell = nullptr; |
| 48 | |
| 49 | // iterate over each option |
| 50 | for_each_cell(cell, stmt->options->head) { |
| 51 | auto *def_elem = reinterpret_cast<PGDefElem *>(cell->data.ptr_value); |
| 52 | |
| 53 | if (StringUtil::StartsWith(def_elem->defname, "delim" ) || StringUtil::StartsWith(def_elem->defname, "sep" )) { |
| 54 | // delimiter |
| 55 | SetControlString(def_elem, "DELIMITER" , "DELIMITER ','" , info.delimiter); |
| 56 | |
| 57 | } else if (def_elem->defname == kFormatTok) { |
| 58 | // format |
| 59 | auto *format_val = (PGValue *)(def_elem->arg); |
| 60 | if (!format_val || format_val->type != T_PGString) { |
| 61 | throw ParserException("Unsupported parameter type for FORMAT: expected e.g. FORMAT 'csv', 'csv_auto'" ); |
| 62 | } |
| 63 | |
| 64 | if (StringUtil::Upper(format_val->val.str) != "CSV" && |
| 65 | StringUtil::Upper(format_val->val.str) != "CSV_AUTO" ) { |
| 66 | throw Exception("Copy is only supported for .CSV-files, FORMAT 'csv'" ); |
| 67 | } |
| 68 | |
| 69 | info.format = StringToExternalFileFormat("CSV" ); |
| 70 | |
| 71 | if (StringUtil::Upper(format_val->val.str) == "CSV_AUTO" ) { |
| 72 | info.auto_detect = true; |
| 73 | } |
| 74 | |
| 75 | } else if (def_elem->defname == kQuoteTok) { |
| 76 | // quote |
| 77 | SetControlString(def_elem, "QUOTE" , "QUOTE '\"'" , info.quote); |
| 78 | if (info.quote.length() == 0) { |
| 79 | throw Exception("QUOTE must not be empty" ); |
| 80 | } |
| 81 | |
| 82 | } else if (def_elem->defname == kEscapeTok) { |
| 83 | // escape |
| 84 | SetControlString(def_elem, "ESCAPE" , "ESCAPE '\"'" , info.escape); |
| 85 | if (info.escape.length() == 0) { |
| 86 | throw Exception("ESCAPE must not be empty" ); |
| 87 | } |
| 88 | |
| 89 | } else if (def_elem->defname == kHeaderTok) { |
| 90 | // header |
| 91 | auto * = (PGValue *)(def_elem->arg); |
| 92 | if (!header_val) { |
| 93 | info.header = true; |
| 94 | continue; |
| 95 | } |
| 96 | switch (header_val->type) { |
| 97 | case T_PGInteger: |
| 98 | info.header = header_val->val.ival == 1 ? true : false; |
| 99 | break; |
| 100 | case T_PGString: { |
| 101 | auto val = duckdb::Value(string(header_val->val.str)); |
| 102 | info.header = val.CastAs(TypeId::BOOL).value_.boolean; |
| 103 | break; |
| 104 | } |
| 105 | default: |
| 106 | throw ParserException("Unsupported parameter type for HEADER: expected e.g. HEADER 1" ); |
| 107 | } |
| 108 | |
| 109 | } else if (def_elem->defname == kNullTok) { |
| 110 | // null |
| 111 | SetControlString(def_elem, "NULL" , "NULL 'null'" , info.null_str); |
| 112 | |
| 113 | } else if (def_elem->defname == kForceQuoteTok) { |
| 114 | // force quote |
| 115 | // only for COPY ... TO ... |
| 116 | if (info.is_from) { |
| 117 | throw Exception("The FORCE_QUOTE option is only for COPY ... TO ..." ); |
| 118 | } |
| 119 | |
| 120 | auto *force_quote_val = def_elem->arg; |
| 121 | if (!force_quote_val || (force_quote_val->type != T_PGAStar && force_quote_val->type != T_PGList)) { |
| 122 | throw ParserException("Unsupported parameter type for FORCE_QUOTE: expected e.g. FORCE_QUOTE *" ); |
| 123 | } |
| 124 | |
| 125 | // * option (all columns) |
| 126 | if (force_quote_val->type == T_PGAStar) { |
| 127 | info.quote_all = true; |
| 128 | } |
| 129 | |
| 130 | // list of columns |
| 131 | if (force_quote_val->type == T_PGList) { |
| 132 | auto column_list = (PGList *)(force_quote_val); |
| 133 | for (auto c = column_list->head; c != NULL; c = lnext(c)) { |
| 134 | auto target = (PGResTarget *)(c->data.ptr_value); |
| 135 | info.force_quote_list.push_back(string(target->name)); |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | } else if (def_elem->defname == kForceNotNullTok) { |
| 140 | // force not null |
| 141 | // only for COPY ... FROM ... |
| 142 | if (!info.is_from) { |
| 143 | throw Exception("The FORCE_NOT_NULL option is only for COPY ... FROM ..." ); |
| 144 | } |
| 145 | |
| 146 | auto *force_not_null_val = def_elem->arg; |
| 147 | if (!force_not_null_val || force_not_null_val->type != T_PGList) { |
| 148 | throw ParserException("Unsupported parameter type for FORCE_NOT_NULL: expected e.g. FORCE_NOT_NULL *" ); |
| 149 | } |
| 150 | |
| 151 | auto column_list = (PGList *)(force_not_null_val); |
| 152 | for (auto c = column_list->head; c != NULL; c = lnext(c)) { |
| 153 | auto target = (PGResTarget *)(c->data.ptr_value); |
| 154 | info.force_not_null_list.push_back(string(target->name)); |
| 155 | } |
| 156 | |
| 157 | } else if (def_elem->defname == kEncodingTok) { |
| 158 | // encoding |
| 159 | auto *encoding_val = (PGValue *)(def_elem->arg); |
| 160 | if (!encoding_val || encoding_val->type != T_PGString) { |
| 161 | throw ParserException("Unsupported parameter type for ENCODING: expected e.g. ENCODING 'UTF-8'" ); |
| 162 | } |
| 163 | if (StringUtil::Upper(encoding_val->val.str) != "UTF8" && |
| 164 | StringUtil::Upper(encoding_val->val.str) != "UTF-8" ) { |
| 165 | throw Exception("Copy is only supported for UTF-8 encoded files, ENCODING 'UTF-8'" ); |
| 166 | } |
| 167 | |
| 168 | } else { |
| 169 | throw ParserException("Unsupported COPY option: %s" , def_elem->defname); |
| 170 | } |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | unique_ptr<CopyStatement> Transformer::TransformCopy(PGNode *node) { |
| 175 | auto stmt = reinterpret_cast<PGCopyStmt *>(node); |
| 176 | assert(stmt); |
| 177 | auto result = make_unique<CopyStatement>(); |
| 178 | auto &info = *result->info; |
| 179 | |
| 180 | // get file_path and is_from |
| 181 | info.file_path = stmt->filename; |
| 182 | info.is_from = stmt->is_from; |
| 183 | |
| 184 | // get select_list |
| 185 | if (stmt->attlist) { |
| 186 | for (auto n = stmt->attlist->head; n != nullptr; n = n->next) { |
| 187 | auto target = reinterpret_cast<PGResTarget *>(n->data.ptr_value); |
| 188 | if (target->name) { |
| 189 | info.select_list.push_back(string(target->name)); |
| 190 | } |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | if (stmt->relation) { |
| 195 | auto ref = TransformRangeVar(stmt->relation); |
| 196 | if (info.is_from) { |
| 197 | // copy file into table |
| 198 | auto &table = *reinterpret_cast<BaseTableRef *>(ref.get()); |
| 199 | info.table = table.table_name; |
| 200 | info.schema = table.schema_name; |
| 201 | } else { |
| 202 | // copy table into file, generate SELECT * FROM table; |
| 203 | auto statement = make_unique<SelectNode>(); |
| 204 | statement->from_table = move(ref); |
| 205 | if (stmt->attlist) { |
| 206 | for (idx_t i = 0; i < info.select_list.size(); i++) |
| 207 | statement->select_list.push_back(make_unique<ColumnRefExpression>(info.select_list[i])); |
| 208 | } else { |
| 209 | statement->select_list.push_back(make_unique<StarExpression>()); |
| 210 | } |
| 211 | result->select_statement = move(statement); |
| 212 | } |
| 213 | } else { |
| 214 | result->select_statement = TransformSelectNode((PGSelectStmt *)stmt->query); |
| 215 | } |
| 216 | |
| 217 | // handle options, when no option were given, try auto detect |
| 218 | if (stmt->options) { |
| 219 | HandleOptions(stmt, info); |
| 220 | } |
| 221 | |
| 222 | // the default character of the ESCAPE option is the same as the QUOTE character |
| 223 | if (info.escape == "" ) { |
| 224 | info.escape = info.quote; |
| 225 | } |
| 226 | // escape and delimiter must not be substrings of each other |
| 227 | SubstringDetection(info.delimiter, info.escape, "DELIMITER" , "ESCAPE" ); |
| 228 | // delimiter and quote must not be substrings of each other |
| 229 | SubstringDetection(info.quote, info.delimiter, "DELIMITER" , "QUOTE" ); |
| 230 | // escape and quote must not be substrings of each other (but can be the same) |
| 231 | if (info.quote != info.escape) { |
| 232 | SubstringDetection(info.quote, info.escape, "QUOTE" , "ESCAPE" ); |
| 233 | } |
| 234 | // null string and delimiter must not be substrings of each other |
| 235 | if (info.null_str != "" ) { |
| 236 | SubstringDetection(info.delimiter, info.null_str, "DELIMITER" , "NULL" ); |
| 237 | } |
| 238 | |
| 239 | return result; |
| 240 | } |
| 241 | |