1 | /* |
2 | Copyright (c) 2000, 2016, Oracle and/or its affiliates. |
3 | Copyright (c) 2010, 2018, MariaDB Corporation. |
4 | |
5 | This program is free software; you can redistribute it and/or modify |
6 | it under the terms of the GNU General Public License as published by |
7 | the Free Software Foundation; version 2 of the License. |
8 | |
9 | This program is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | GNU General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU General Public License |
15 | along with this program; if not, write to the Free Software |
16 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ |
17 | |
18 | |
19 | /* Copy data from a textfile to table */ |
20 | /* 2006-12 Erik Wetterberg : LOAD XML added */ |
21 | |
22 | #include "mariadb.h" |
23 | #include "sql_priv.h" |
24 | #include "unireg.h" |
25 | #include "sql_load.h" |
26 | #include "sql_load.h" |
27 | #include "sql_cache.h" // query_cache_* |
28 | #include "sql_base.h" // fill_record_n_invoke_before_triggers |
29 | #include <my_dir.h> |
30 | #include "sql_view.h" // check_key_in_view |
31 | #include "sql_insert.h" // check_that_all_fields_are_given_values, |
32 | // write_record |
33 | #include "sql_acl.h" // INSERT_ACL, UPDATE_ACL |
34 | #include "log_event.h" // Delete_file_log_event, |
35 | // Execute_load_query_log_event, |
36 | // LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F |
37 | #include <m_ctype.h> |
38 | #include "rpl_mi.h" |
39 | #include "sql_repl.h" |
40 | #include "sp_head.h" |
41 | #include "sql_trigger.h" |
42 | #include "sql_derived.h" |
43 | #include "sql_show.h" |
44 | |
45 | extern "C" int _my_b_net_read(IO_CACHE *info, uchar *Buffer, size_t Count); |
46 | |
47 | class XML_TAG { |
48 | public: |
49 | int level; |
50 | String field; |
51 | String value; |
52 | XML_TAG(int l, String f, String v); |
53 | }; |
54 | |
55 | |
56 | XML_TAG::XML_TAG(int l, String f, String v) |
57 | { |
58 | level= l; |
59 | field.append(f); |
60 | value.append(v); |
61 | } |
62 | |
63 | |
64 | /* |
65 | Field and line terminators must be interpreted as sequence of unsigned char. |
66 | Otherwise, non-ascii terminators will be negative on some platforms, |
67 | and positive on others (depending on the implementation of char). |
68 | */ |
69 | class Term_string |
70 | { |
71 | const uchar *m_ptr; |
72 | uint m_length; |
73 | int m_initial_byte; |
74 | public: |
75 | Term_string(const String &str) : |
76 | m_ptr(static_cast<const uchar*>(static_cast<const void*>(str.ptr()))), |
77 | m_length(str.length()), |
78 | m_initial_byte((uchar) (str.length() ? str.ptr()[0] : INT_MAX)) |
79 | { } |
80 | void set(const uchar *str, uint length, int initial_byte) |
81 | { |
82 | m_ptr= str; |
83 | m_length= length; |
84 | m_initial_byte= initial_byte; |
85 | } |
86 | void reset() { set(NULL, 0, INT_MAX); } |
87 | const uchar *ptr() const { return m_ptr; } |
88 | uint length() const { return m_length; } |
89 | int initial_byte() const { return m_initial_byte; } |
90 | bool eq(const Term_string &other) const |
91 | { |
92 | return length() == other.length() && !memcmp(ptr(), other.ptr(), length()); |
93 | } |
94 | }; |
95 | |
96 | |
97 | #define GET (stack_pos != stack ? *--stack_pos : my_b_get(&cache)) |
98 | #define PUSH(A) *(stack_pos++)=(A) |
99 | |
100 | #ifdef WITH_WSREP |
101 | /** If requested by wsrep_load_data_splitting, commit and restart |
102 | the transaction after every 10,000 inserted rows. */ |
103 | |
104 | static bool wsrep_load_data_split(THD *thd, const TABLE *table, |
105 | const COPY_INFO &info) |
106 | { |
107 | DBUG_ENTER("wsrep_load_data_split" ); |
108 | |
109 | if (!wsrep_load_data_splitting || !wsrep_on(thd) |
110 | || !info.records || (info.records % 10000) |
111 | || !thd->transaction.stmt.ha_list |
112 | || thd->transaction.stmt.ha_list->ht() != binlog_hton |
113 | || !thd->transaction.stmt.ha_list->next() |
114 | || thd->transaction.stmt.ha_list->next()->next()) |
115 | DBUG_RETURN(false); |
116 | |
117 | if (handlerton* hton= thd->transaction.stmt.ha_list->next()->ht()) |
118 | { |
119 | if (hton->db_type != DB_TYPE_INNODB) |
120 | DBUG_RETURN(false); |
121 | WSREP_DEBUG("intermediate transaction commit in LOAD DATA" ); |
122 | if (wsrep_run_wsrep_commit(thd, true) != WSREP_TRX_OK) DBUG_RETURN(true); |
123 | if (binlog_hton->commit(binlog_hton, thd, true)) DBUG_RETURN(true); |
124 | wsrep_post_commit(thd, true); |
125 | hton->commit(hton, thd, true); |
126 | table->file->extra(HA_EXTRA_FAKE_START_STMT); |
127 | } |
128 | |
129 | DBUG_RETURN(false); |
130 | } |
131 | # define WSREP_LOAD_DATA_SPLIT(thd,table,info) \ |
132 | if (wsrep_load_data_split(thd,table,info)) \ |
133 | { \ |
134 | table->auto_increment_field_not_null= FALSE; \ |
135 | DBUG_RETURN(1); \ |
136 | } |
137 | #else /* WITH_WSREP */ |
138 | #define WSREP_LOAD_DATA_SPLIT(thd,table,info) /* empty */ |
139 | #endif /* WITH_WSREP */ |
140 | |
141 | class READ_INFO: public Load_data_param |
142 | { |
143 | File file; |
144 | String data; /* Read buffer */ |
145 | Term_string m_field_term; /* FIELDS TERMINATED BY 'string' */ |
146 | Term_string m_line_term; /* LINES TERMINATED BY 'string' */ |
147 | Term_string m_line_start; /* LINES STARTING BY 'string' */ |
148 | int enclosed_char,escape_char; |
149 | int *stack,*stack_pos; |
150 | bool found_end_of_line,start_of_line,eof; |
151 | int level; /* for load xml */ |
152 | |
153 | bool getbyte(char *to) |
154 | { |
155 | int chr= GET; |
156 | if (chr == my_b_EOF) |
157 | return (eof= true); |
158 | *to= chr; |
159 | return false; |
160 | } |
161 | |
162 | /** |
163 | Read a tail of a multi-byte character. |
164 | The first byte of the character is assumed to be already |
165 | read from the file and appended to "str". |
166 | |
167 | @returns true - if EOF happened unexpectedly |
168 | @returns false - no EOF happened: found a good multi-byte character, |
169 | or a bad byte sequence |
170 | |
171 | Note: |
172 | The return value depends only on EOF: |
173 | - read_mbtail() returns "false" is a good character was read, but also |
174 | - read_mbtail() returns "false" if an incomplete byte sequence was found |
175 | and no EOF happened. |
176 | |
177 | For example, suppose we have an ujis file with bytes 0x8FA10A, where: |
178 | - 0x8FA1 is an incomplete prefix of a 3-byte character |
179 | (it should be [8F][A1-FE][A1-FE] to make a full 3-byte character) |
180 | - 0x0A is a line demiliter |
181 | This file has some broken data, the trailing [A1-FE] is missing. |
182 | |
183 | In this example it works as follows: |
184 | - 0x8F is read from the file and put into "data" before the call |
185 | for read_mbtail() |
186 | - 0xA1 is read from the file and put into "data" by read_mbtail() |
187 | - 0x0A is kept in the read queue, so the next read iteration after |
188 | the current read_mbtail() call will normally find it and recognize as |
189 | a line delimiter |
190 | - the current call for read_mbtail() returns "false", |
191 | because no EOF happened |
192 | */ |
193 | bool read_mbtail(String *str) |
194 | { |
195 | int chlen; |
196 | if ((chlen= my_charlen(charset(), str->end() - 1, str->end())) == 1) |
197 | return false; // Single byte character found |
198 | for (uint32 length0= str->length() - 1 ; MY_CS_IS_TOOSMALL(chlen); ) |
199 | { |
200 | int chr= GET; |
201 | if (chr == my_b_EOF) |
202 | { |
203 | DBUG_PRINT("info" , ("read_mbtail: chlen=%d; unexpected EOF" , chlen)); |
204 | return true; // EOF |
205 | } |
206 | str->append(chr); |
207 | chlen= my_charlen(charset(), str->ptr() + length0, str->end()); |
208 | if (chlen == MY_CS_ILSEQ) |
209 | { |
210 | /** |
211 | It has been an incomplete (but a valid) sequence so far, |
212 | but the last byte turned it into a bad byte sequence. |
213 | Unget the very last byte. |
214 | */ |
215 | str->length(str->length() - 1); |
216 | PUSH(chr); |
217 | DBUG_PRINT("info" , ("read_mbtail: ILSEQ" )); |
218 | return false; // Bad byte sequence |
219 | } |
220 | } |
221 | DBUG_PRINT("info" , ("read_mbtail: chlen=%d" , chlen)); |
222 | return false; // Good multi-byte character |
223 | } |
224 | |
225 | public: |
226 | bool error,line_cuted,found_null,enclosed; |
227 | uchar *row_start, /* Found row starts here */ |
228 | *row_end; /* Found row ends here */ |
229 | LOAD_FILE_IO_CACHE cache; |
230 | |
231 | READ_INFO(THD *thd, File file, const Load_data_param ¶m, |
232 | String &field_term,String &line_start,String &line_term, |
233 | String &enclosed,int escape,bool get_it_from_net, bool is_fifo); |
234 | ~READ_INFO(); |
235 | int read_field(); |
236 | int read_fixed_length(void); |
237 | int next_line(void); |
238 | char unescape(char chr); |
239 | bool terminator(const uchar *ptr, uint length); |
240 | bool terminator(const Term_string &str) |
241 | { return terminator(str.ptr(), str.length()); } |
242 | bool terminator(int chr, const Term_string &str) |
243 | { return str.initial_byte() == chr && terminator(str); } |
244 | bool find_start_of_fields(); |
245 | /* load xml */ |
246 | List<XML_TAG> taglist; |
247 | int read_value(int delim, String *val); |
248 | int read_xml(THD *thd); |
249 | int clear_level(int level); |
250 | |
251 | my_off_t file_length() { return cache.end_of_file; } |
252 | my_off_t position() { return my_b_tell(&cache); } |
253 | |
254 | /** |
255 | skip all data till the eof. |
256 | */ |
257 | void skip_data_till_eof() |
258 | { |
259 | while (GET != my_b_EOF) |
260 | ; |
261 | } |
262 | }; |
263 | |
264 | static int read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, |
265 | List<Item> &fields_vars, List<Item> &set_fields, |
266 | List<Item> &set_values, READ_INFO &read_info, |
267 | ulong skip_lines, |
268 | bool ignore_check_option_errors); |
269 | static int read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, |
270 | List<Item> &fields_vars, List<Item> &set_fields, |
271 | List<Item> &set_values, READ_INFO &read_info, |
272 | String &enclosed, ulong skip_lines, |
273 | bool ignore_check_option_errors); |
274 | |
275 | static int read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, |
276 | List<Item> &fields_vars, List<Item> &set_fields, |
277 | List<Item> &set_values, READ_INFO &read_info, |
278 | String &enclosed, ulong skip_lines, |
279 | bool ignore_check_option_errors); |
280 | |
281 | #ifndef EMBEDDED_LIBRARY |
282 | static bool write_execute_load_query_log_event(THD *, const sql_exchange*, const |
283 | char*, const char*, bool, enum enum_duplicates, bool, bool, int); |
284 | #endif /* EMBEDDED_LIBRARY */ |
285 | |
286 | |
287 | bool Load_data_param::add_outvar_field(THD *thd, const Field *field) |
288 | { |
289 | if (field->flags & BLOB_FLAG) |
290 | { |
291 | m_use_blobs= true; |
292 | m_fixed_length+= 256; // Will be extended if needed |
293 | } |
294 | else |
295 | m_fixed_length+= field->field_length; |
296 | return false; |
297 | } |
298 | |
299 | |
300 | bool Load_data_param::add_outvar_user_var(THD *thd) |
301 | { |
302 | if (m_is_fixed_length) |
303 | { |
304 | my_error(ER_LOAD_FROM_FIXED_SIZE_ROWS_TO_VAR, MYF(0)); |
305 | return true; |
306 | } |
307 | return false; |
308 | } |
309 | |
310 | |
311 | /* |
312 | Execute LOAD DATA query |
313 | |
314 | SYNOPSYS |
315 | mysql_load() |
316 | thd - current thread |
317 | ex - sql_exchange object representing source file and its parsing rules |
318 | table_list - list of tables to which we are loading data |
319 | fields_vars - list of fields and variables to which we read |
320 | data from file |
321 | set_fields - list of fields mentioned in set clause |
322 | set_values - expressions to assign to fields in previous list |
323 | handle_duplicates - indicates whenever we should emit error or |
324 | replace row if we will meet duplicates. |
325 | ignore - - indicates whenever we should ignore duplicates |
326 | read_file_from_client - is this LOAD DATA LOCAL ? |
327 | |
328 | RETURN VALUES |
329 | TRUE - error / FALSE - success |
330 | */ |
331 | |
332 | int mysql_load(THD *thd, const sql_exchange *ex, TABLE_LIST *table_list, |
333 | List<Item> &fields_vars, List<Item> &set_fields, |
334 | List<Item> &set_values, |
335 | enum enum_duplicates handle_duplicates, bool ignore, |
336 | bool read_file_from_client) |
337 | { |
338 | char name[FN_REFLEN]; |
339 | File file; |
340 | TABLE *table= NULL; |
341 | int error= 0; |
342 | bool is_fifo=0; |
343 | #ifndef EMBEDDED_LIBRARY |
344 | killed_state killed_status; |
345 | bool is_concurrent; |
346 | #endif |
347 | const char *db= table_list->db.str; // This is never null |
348 | /* |
349 | If path for file is not defined, we will use the current database. |
350 | If this is not set, we will use the directory where the table to be |
351 | loaded is located |
352 | */ |
353 | const char *tdb= thd->db.str ? thd->db.str : db; // Result is never null |
354 | ulong skip_lines= ex->skip_lines; |
355 | bool transactional_table __attribute__((unused)); |
356 | DBUG_ENTER("mysql_load" ); |
357 | |
358 | /* |
359 | Bug #34283 |
360 | mysqlbinlog leaves tmpfile after termination if binlog contains |
361 | load data infile, so in mixed mode we go to row-based for |
362 | avoiding the problem. |
363 | */ |
364 | thd->set_current_stmt_binlog_format_row_if_mixed(); |
365 | |
366 | #ifdef EMBEDDED_LIBRARY |
367 | read_file_from_client = 0; //server is always in the same process |
368 | #endif |
369 | |
370 | if (ex->escaped->length() > 1 || ex->enclosed->length() > 1) |
371 | { |
372 | my_message(ER_WRONG_FIELD_TERMINATORS, |
373 | ER_THD(thd, ER_WRONG_FIELD_TERMINATORS), |
374 | MYF(0)); |
375 | DBUG_RETURN(TRUE); |
376 | } |
377 | |
378 | /* Report problems with non-ascii separators */ |
379 | if (!ex->escaped->is_ascii() || !ex->enclosed->is_ascii() || |
380 | !ex->field_term->is_ascii() || |
381 | !ex->line_term->is_ascii() || !ex->line_start->is_ascii()) |
382 | { |
383 | push_warning(thd, Sql_condition::WARN_LEVEL_WARN, |
384 | WARN_NON_ASCII_SEPARATOR_NOT_IMPLEMENTED, |
385 | ER_THD(thd, WARN_NON_ASCII_SEPARATOR_NOT_IMPLEMENTED)); |
386 | } |
387 | |
388 | if (open_and_lock_tables(thd, table_list, TRUE, 0)) |
389 | DBUG_RETURN(TRUE); |
390 | if (mysql_handle_single_derived(thd->lex, table_list, DT_MERGE_FOR_INSERT) || |
391 | mysql_handle_single_derived(thd->lex, table_list, DT_PREPARE)) |
392 | DBUG_RETURN(TRUE); |
393 | if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context, |
394 | &thd->lex->select_lex.top_join_list, |
395 | table_list, |
396 | thd->lex->select_lex.leaf_tables, FALSE, |
397 | INSERT_ACL | UPDATE_ACL, |
398 | INSERT_ACL | UPDATE_ACL, FALSE)) |
399 | DBUG_RETURN(-1); |
400 | if (!table_list->table || // do not suport join view |
401 | !table_list->single_table_updatable() || // and derived tables |
402 | check_key_in_view(thd, table_list)) |
403 | { |
404 | my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias.str, "LOAD" ); |
405 | DBUG_RETURN(TRUE); |
406 | } |
407 | if (table_list->prepare_where(thd, 0, TRUE) || |
408 | table_list->prepare_check_option(thd)) |
409 | { |
410 | DBUG_RETURN(TRUE); |
411 | } |
412 | thd_proc_info(thd, "Executing" ); |
413 | /* |
414 | Let us emit an error if we are loading data to table which is used |
415 | in subselect in SET clause like we do it for INSERT. |
416 | |
417 | The main thing to fix to remove this restriction is to ensure that the |
418 | table is marked to be 'used for insert' in which case we should never |
419 | mark this table as 'const table' (ie, one that has only one row). |
420 | */ |
421 | if (unique_table(thd, table_list, table_list->next_global, 0)) |
422 | { |
423 | my_error(ER_UPDATE_TABLE_USED, MYF(0), table_list->table_name.str, |
424 | "LOAD DATA" ); |
425 | DBUG_RETURN(TRUE); |
426 | } |
427 | |
428 | table= table_list->table; |
429 | transactional_table= table->file->has_transactions(); |
430 | #ifndef EMBEDDED_LIBRARY |
431 | is_concurrent= (table_list->lock_type == TL_WRITE_CONCURRENT_INSERT); |
432 | #endif |
433 | |
434 | if (table->versioned(VERS_TIMESTAMP) && handle_duplicates == DUP_REPLACE) |
435 | { |
436 | // Additional memory may be required to create historical items. |
437 | if (table_list->set_insert_values(thd->mem_root)) |
438 | DBUG_RETURN(TRUE); |
439 | } |
440 | |
441 | if (!fields_vars.elements) |
442 | { |
443 | Field_iterator_table_ref field_iterator; |
444 | field_iterator.set(table_list); |
445 | for (; !field_iterator.end_of_fields(); field_iterator.next()) |
446 | { |
447 | if (field_iterator.field() && |
448 | field_iterator.field()->invisible > VISIBLE) |
449 | continue; |
450 | Item *item; |
451 | if (!(item= field_iterator.create_item(thd))) |
452 | DBUG_RETURN(TRUE); |
453 | fields_vars.push_back(item->real_item(), thd->mem_root); |
454 | } |
455 | bitmap_set_all(table->write_set); |
456 | /* |
457 | Let us also prepare SET clause, altough it is probably empty |
458 | in this case. |
459 | */ |
460 | if (setup_fields(thd, Ref_ptr_array(), |
461 | set_fields, MARK_COLUMNS_WRITE, 0, NULL, 0) || |
462 | setup_fields(thd, Ref_ptr_array(), |
463 | set_values, MARK_COLUMNS_READ, 0, NULL, 0)) |
464 | DBUG_RETURN(TRUE); |
465 | } |
466 | else |
467 | { // Part field list |
468 | /* TODO: use this conds for 'WITH CHECK OPTIONS' */ |
469 | if (setup_fields(thd, Ref_ptr_array(), |
470 | fields_vars, MARK_COLUMNS_WRITE, 0, NULL, 0) || |
471 | setup_fields(thd, Ref_ptr_array(), |
472 | set_fields, MARK_COLUMNS_WRITE, 0, NULL, 0) || |
473 | check_that_all_fields_are_given_values(thd, table, table_list)) |
474 | DBUG_RETURN(TRUE); |
475 | /* Fix the expressions in SET clause */ |
476 | if (setup_fields(thd, Ref_ptr_array(), |
477 | set_values, MARK_COLUMNS_READ, 0, NULL, 0)) |
478 | DBUG_RETURN(TRUE); |
479 | } |
480 | switch_to_nullable_trigger_fields(fields_vars, table); |
481 | switch_to_nullable_trigger_fields(set_fields, table); |
482 | switch_to_nullable_trigger_fields(set_values, table); |
483 | |
484 | table->prepare_triggers_for_insert_stmt_or_event(); |
485 | table->mark_columns_needed_for_insert(); |
486 | |
487 | Load_data_param param(ex->cs ? ex->cs : thd->variables.collation_database, |
488 | !ex->field_term->length() && !ex->enclosed->length()); |
489 | List_iterator_fast<Item> it(fields_vars); |
490 | Item *item; |
491 | |
492 | while ((item= it++)) |
493 | { |
494 | const Load_data_outvar *var= item->get_load_data_outvar_or_error(); |
495 | if (!var || var->load_data_add_outvar(thd, ¶m)) |
496 | DBUG_RETURN(true); |
497 | } |
498 | if (param.use_blobs() && !ex->line_term->length() && !ex->field_term->length()) |
499 | { |
500 | my_message(ER_BLOBS_AND_NO_TERMINATED, |
501 | ER_THD(thd, ER_BLOBS_AND_NO_TERMINATED), MYF(0)); |
502 | DBUG_RETURN(TRUE); |
503 | } |
504 | |
505 | /* We can't give an error in the middle when using LOCAL files */ |
506 | if (read_file_from_client && handle_duplicates == DUP_ERROR) |
507 | ignore= 1; |
508 | |
509 | #ifndef EMBEDDED_LIBRARY |
510 | if (read_file_from_client) |
511 | { |
512 | (void)net_request_file(&thd->net,ex->file_name); |
513 | file = -1; |
514 | } |
515 | else |
516 | #endif |
517 | { |
518 | #ifdef DONT_ALLOW_FULL_LOAD_DATA_PATHS |
519 | ex->file_name+=dirname_length(ex->file_name); |
520 | #endif |
521 | if (!dirname_length(ex->file_name)) |
522 | { |
523 | strxnmov(name, FN_REFLEN-1, mysql_real_data_home, tdb, NullS); |
524 | (void) fn_format(name, ex->file_name, name, "" , |
525 | MY_RELATIVE_PATH | MY_UNPACK_FILENAME); |
526 | } |
527 | else |
528 | { |
529 | (void) fn_format(name, ex->file_name, mysql_real_data_home, "" , |
530 | MY_RELATIVE_PATH | MY_UNPACK_FILENAME | |
531 | MY_RETURN_REAL_PATH); |
532 | } |
533 | |
534 | if (thd->rgi_slave) |
535 | { |
536 | #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) |
537 | if (strncmp(thd->rgi_slave->rli->slave_patternload_file, name, |
538 | thd->rgi_slave->rli->slave_patternload_file_size)) |
539 | { |
540 | /* |
541 | LOAD DATA INFILE in the slave SQL Thread can only read from |
542 | --slave-load-tmpdir". This should never happen. Please, report a bug. |
543 | */ |
544 | |
545 | sql_print_error("LOAD DATA INFILE in the slave SQL Thread can only read from --slave-load-tmpdir. " \ |
546 | "Please, report a bug." ); |
547 | my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--slave-load-tmpdir" ); |
548 | DBUG_RETURN(TRUE); |
549 | } |
550 | #else |
551 | /* |
552 | This is impossible and should never happen. |
553 | */ |
554 | DBUG_ASSERT(FALSE); |
555 | #endif |
556 | } |
557 | else if (!is_secure_file_path(name)) |
558 | { |
559 | /* Read only allowed from within dir specified by secure_file_priv */ |
560 | my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--secure-file-priv" ); |
561 | DBUG_RETURN(TRUE); |
562 | } |
563 | |
564 | #if !defined(__WIN__) && ! defined(__NETWARE__) |
565 | MY_STAT stat_info; |
566 | if (!my_stat(name, &stat_info, MYF(MY_WME))) |
567 | DBUG_RETURN(TRUE); |
568 | |
569 | // if we are not in slave thread, the file must be: |
570 | if (!thd->slave_thread && |
571 | !((stat_info.st_mode & S_IFLNK) != S_IFLNK && // symlink |
572 | ((stat_info.st_mode & S_IFREG) == S_IFREG || // regular file |
573 | (stat_info.st_mode & S_IFIFO) == S_IFIFO))) // named pipe |
574 | { |
575 | my_error(ER_TEXTFILE_NOT_READABLE, MYF(0), name); |
576 | DBUG_RETURN(TRUE); |
577 | } |
578 | if ((stat_info.st_mode & S_IFIFO) == S_IFIFO) |
579 | is_fifo= 1; |
580 | #endif |
581 | if ((file= mysql_file_open(key_file_load, |
582 | name, O_RDONLY, MYF(MY_WME))) < 0) |
583 | |
584 | DBUG_RETURN(TRUE); |
585 | } |
586 | |
587 | COPY_INFO info; |
588 | bzero((char*) &info,sizeof(info)); |
589 | info.ignore= ignore; |
590 | info.handle_duplicates=handle_duplicates; |
591 | info.escape_char= (ex->escaped->length() && (ex->escaped_given() || |
592 | !(thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES))) |
593 | ? (*ex->escaped)[0] : INT_MAX; |
594 | |
595 | READ_INFO read_info(thd, file, param, |
596 | *ex->field_term, *ex->line_start, |
597 | *ex->line_term, *ex->enclosed, |
598 | info.escape_char, read_file_from_client, is_fifo); |
599 | if (unlikely(read_info.error)) |
600 | { |
601 | if (file >= 0) |
602 | mysql_file_close(file, MYF(0)); // no files in net reading |
603 | DBUG_RETURN(TRUE); // Can't allocate buffers |
604 | } |
605 | |
606 | #ifndef EMBEDDED_LIBRARY |
607 | if (mysql_bin_log.is_open()) |
608 | { |
609 | read_info.cache.thd = thd; |
610 | read_info.cache.wrote_create_file = 0; |
611 | read_info.cache.last_pos_in_file = HA_POS_ERROR; |
612 | read_info.cache.log_delayed= transactional_table; |
613 | } |
614 | #endif /*!EMBEDDED_LIBRARY*/ |
615 | |
616 | thd->count_cuted_fields= CHECK_FIELD_WARN; /* calc cuted fields */ |
617 | thd->cuted_fields=0L; |
618 | /* Skip lines if there is a line terminator */ |
619 | if (ex->line_term->length() && ex->filetype != FILETYPE_XML) |
620 | { |
621 | /* ex->skip_lines needs to be preserved for logging */ |
622 | while (skip_lines > 0) |
623 | { |
624 | skip_lines--; |
625 | if (read_info.next_line()) |
626 | break; |
627 | } |
628 | } |
629 | |
630 | thd_proc_info(thd, "Reading file" ); |
631 | if (likely(!(error= MY_TEST(read_info.error)))) |
632 | { |
633 | table->reset_default_fields(); |
634 | table->next_number_field=table->found_next_number_field; |
635 | if (ignore || |
636 | handle_duplicates == DUP_REPLACE) |
637 | table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); |
638 | if (handle_duplicates == DUP_REPLACE && |
639 | (!table->triggers || |
640 | !table->triggers->has_delete_triggers())) |
641 | table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE); |
642 | if (thd->locked_tables_mode <= LTM_LOCK_TABLES) |
643 | table->file->ha_start_bulk_insert((ha_rows) 0); |
644 | table->copy_blobs=1; |
645 | |
646 | thd->abort_on_warning= !ignore && thd->is_strict_mode(); |
647 | |
648 | thd_progress_init(thd, 2); |
649 | if (table_list->table->validate_default_values_of_unset_fields(thd)) |
650 | { |
651 | read_info.error= true; |
652 | error= 1; |
653 | } |
654 | else if (ex->filetype == FILETYPE_XML) /* load xml */ |
655 | error= read_xml_field(thd, info, table_list, fields_vars, |
656 | set_fields, set_values, read_info, |
657 | *(ex->line_term), skip_lines, ignore); |
658 | else if (read_info.is_fixed_length()) |
659 | error= read_fixed_length(thd, info, table_list, fields_vars, |
660 | set_fields, set_values, read_info, |
661 | skip_lines, ignore); |
662 | else |
663 | error= read_sep_field(thd, info, table_list, fields_vars, |
664 | set_fields, set_values, read_info, |
665 | *ex->enclosed, skip_lines, ignore); |
666 | |
667 | thd_proc_info(thd, "End bulk insert" ); |
668 | if (likely(!error)) |
669 | thd_progress_next_stage(thd); |
670 | if (thd->locked_tables_mode <= LTM_LOCK_TABLES && |
671 | table->file->ha_end_bulk_insert() && !error) |
672 | { |
673 | table->file->print_error(my_errno, MYF(0)); |
674 | error= 1; |
675 | } |
676 | table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); |
677 | table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE); |
678 | table->next_number_field=0; |
679 | } |
680 | if (file >= 0) |
681 | mysql_file_close(file, MYF(0)); |
682 | free_blobs(table); /* if pack_blob was used */ |
683 | table->copy_blobs=0; |
684 | thd->count_cuted_fields= CHECK_FIELD_IGNORE; |
685 | /* |
686 | simulated killing in the middle of per-row loop |
687 | must be effective for binlogging |
688 | */ |
689 | DBUG_EXECUTE_IF("simulate_kill_bug27571" , |
690 | { |
691 | error=1; |
692 | thd->set_killed(KILL_QUERY); |
693 | };); |
694 | |
695 | #ifndef EMBEDDED_LIBRARY |
696 | killed_status= (error == 0) ? NOT_KILLED : thd->killed; |
697 | #endif |
698 | |
699 | /* |
700 | We must invalidate the table in query cache before binlog writing and |
701 | ha_autocommit_... |
702 | */ |
703 | query_cache_invalidate3(thd, table_list, 0); |
704 | if (error) |
705 | { |
706 | if (read_file_from_client) |
707 | read_info.skip_data_till_eof(); |
708 | |
709 | #ifndef EMBEDDED_LIBRARY |
710 | if (mysql_bin_log.is_open()) |
711 | { |
712 | { |
713 | /* |
714 | Make sure last block (the one which caused the error) gets |
715 | logged. |
716 | */ |
717 | log_loaded_block(&read_info.cache, 0, 0); |
718 | /* If the file was not empty, wrote_create_file is true */ |
719 | if (read_info.cache.wrote_create_file) |
720 | { |
721 | int errcode= query_error_code(thd, killed_status == NOT_KILLED); |
722 | |
723 | /* since there is already an error, the possible error of |
724 | writing binary log will be ignored */ |
725 | if (thd->transaction.stmt.modified_non_trans_table) |
726 | (void) write_execute_load_query_log_event(thd, ex, |
727 | table_list->db.str, |
728 | table_list->table_name.str, |
729 | is_concurrent, |
730 | handle_duplicates, ignore, |
731 | transactional_table, |
732 | errcode); |
733 | else |
734 | { |
735 | Delete_file_log_event d(thd, db, transactional_table); |
736 | (void) mysql_bin_log.write(&d); |
737 | } |
738 | } |
739 | } |
740 | } |
741 | #endif /*!EMBEDDED_LIBRARY*/ |
742 | error= -1; // Error on read |
743 | goto err; |
744 | } |
745 | sprintf(name, ER_THD(thd, ER_LOAD_INFO), |
746 | (ulong) info.records, (ulong) info.deleted, |
747 | (ulong) (info.records - info.copied), |
748 | (long) thd->get_stmt_da()->current_statement_warn_count()); |
749 | |
750 | if (thd->transaction.stmt.modified_non_trans_table) |
751 | thd->transaction.all.modified_non_trans_table= TRUE; |
752 | thd->transaction.all.m_unsafe_rollback_flags|= |
753 | (thd->transaction.stmt.m_unsafe_rollback_flags & THD_TRANS::DID_WAIT); |
754 | #ifndef EMBEDDED_LIBRARY |
755 | if (mysql_bin_log.is_open()) |
756 | { |
757 | /* |
758 | We need to do the job that is normally done inside |
759 | binlog_query() here, which is to ensure that the pending event |
760 | is written before tables are unlocked and before any other |
761 | events are written. We also need to update the table map |
762 | version for the binary log to mark that table maps are invalid |
763 | after this point. |
764 | */ |
765 | if (thd->is_current_stmt_binlog_format_row()) |
766 | error= thd->binlog_flush_pending_rows_event(TRUE, transactional_table); |
767 | else |
768 | { |
769 | /* |
770 | As already explained above, we need to call log_loaded_block() to have |
771 | the last block logged |
772 | */ |
773 | log_loaded_block(&read_info.cache, 0, 0); |
774 | if (read_info.cache.wrote_create_file) |
775 | { |
776 | int errcode= query_error_code(thd, killed_status == NOT_KILLED); |
777 | error= write_execute_load_query_log_event(thd, ex, |
778 | table_list->db.str, |
779 | table_list->table_name.str, |
780 | is_concurrent, |
781 | handle_duplicates, ignore, |
782 | transactional_table, |
783 | errcode); |
784 | } |
785 | |
786 | /* |
787 | Flushing the IO CACHE while writing the execute load query log event |
788 | may result in error (for instance, because the max_binlog_size has been |
789 | reached, and rotation of the binary log failed). |
790 | */ |
791 | error= error || mysql_bin_log.get_log_file()->error; |
792 | } |
793 | if (unlikely(error)) |
794 | goto err; |
795 | } |
796 | #endif /*!EMBEDDED_LIBRARY*/ |
797 | |
798 | /* ok to client sent only after binlog write and engine commit */ |
799 | my_ok(thd, info.copied + info.deleted, 0L, name); |
800 | err: |
801 | DBUG_ASSERT(transactional_table || !(info.copied || info.deleted) || |
802 | thd->transaction.stmt.modified_non_trans_table); |
803 | table->file->ha_release_auto_increment(); |
804 | table->auto_increment_field_not_null= FALSE; |
805 | thd->abort_on_warning= 0; |
806 | DBUG_RETURN(error); |
807 | } |
808 | |
809 | |
810 | #ifndef EMBEDDED_LIBRARY |
811 | |
812 | /* Not a very useful function; just to avoid duplication of code */ |
813 | static bool write_execute_load_query_log_event(THD *thd, const sql_exchange* ex, |
814 | const char* db_arg, /* table's database */ |
815 | const char* table_name_arg, |
816 | bool is_concurrent, |
817 | enum enum_duplicates duplicates, |
818 | bool ignore, |
819 | bool transactional_table, |
820 | int errcode) |
821 | { |
822 | char *load_data_query; |
823 | my_off_t fname_start, |
824 | fname_end; |
825 | List<Item> fv; |
826 | Item *item, *val; |
827 | int n; |
828 | const char *tdb= (thd->db.str != NULL ? thd->db.str : db_arg); |
829 | const char *qualify_db= NULL; |
830 | char command_buffer[1024]; |
831 | String query_str(command_buffer, sizeof(command_buffer), |
832 | system_charset_info); |
833 | |
834 | Load_log_event lle(thd, ex, tdb, table_name_arg, fv, is_concurrent, |
835 | duplicates, ignore, transactional_table); |
836 | |
837 | /* |
838 | force in a LOCAL if there was one in the original. |
839 | */ |
840 | if (thd->lex->local_file) |
841 | lle.set_fname_outside_temp_buf(ex->file_name, strlen(ex->file_name)); |
842 | |
843 | query_str.length(0); |
844 | if (!thd->db.str || strcmp(db_arg, thd->db.str)) |
845 | { |
846 | /* |
847 | If used database differs from table's database, |
848 | prefix table name with database name so that it |
849 | becomes a FQ name. |
850 | */ |
851 | qualify_db= db_arg; |
852 | } |
853 | lle.print_query(thd, FALSE, (const char *) ex->cs?ex->cs->csname:NULL, |
854 | &query_str, &fname_start, &fname_end, qualify_db); |
855 | |
856 | /* |
857 | prepare fields-list and SET if needed; print_query won't do that for us. |
858 | */ |
859 | if (!thd->lex->field_list.is_empty()) |
860 | { |
861 | List_iterator<Item> li(thd->lex->field_list); |
862 | |
863 | query_str.append(" (" ); |
864 | n= 0; |
865 | |
866 | while ((item= li++)) |
867 | { |
868 | if (n++) |
869 | query_str.append(", " ); |
870 | const Load_data_outvar *var= item->get_load_data_outvar(); |
871 | DBUG_ASSERT(var); |
872 | var->load_data_print_for_log_event(thd, &query_str); |
873 | } |
874 | query_str.append(")" ); |
875 | } |
876 | |
877 | if (!thd->lex->update_list.is_empty()) |
878 | { |
879 | List_iterator<Item> lu(thd->lex->update_list); |
880 | List_iterator<Item> lv(thd->lex->value_list); |
881 | |
882 | query_str.append(STRING_WITH_LEN(" SET " )); |
883 | n= 0; |
884 | |
885 | while ((item= lu++)) |
886 | { |
887 | val= lv++; |
888 | if (n++) |
889 | query_str.append(STRING_WITH_LEN(", " )); |
890 | append_identifier(thd, &query_str, &item->name); |
891 | query_str.append(&val->name); |
892 | } |
893 | } |
894 | |
895 | if (!(load_data_query= (char *)thd->strmake(query_str.ptr(), query_str.length()))) |
896 | return TRUE; |
897 | |
898 | Execute_load_query_log_event |
899 | e(thd, load_data_query, query_str.length(), |
900 | (uint) (fname_start - 1), (uint) fname_end, |
901 | (duplicates == DUP_REPLACE) ? LOAD_DUP_REPLACE : |
902 | (ignore ? LOAD_DUP_IGNORE : LOAD_DUP_ERROR), |
903 | transactional_table, FALSE, FALSE, errcode); |
904 | return mysql_bin_log.write(&e); |
905 | } |
906 | |
907 | #endif |
908 | |
909 | /**************************************************************************** |
910 | ** Read of rows of fixed size + optional garage + optonal newline |
911 | ****************************************************************************/ |
912 | |
913 | static int |
914 | read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, |
915 | List<Item> &fields_vars, List<Item> &set_fields, |
916 | List<Item> &set_values, READ_INFO &read_info, |
917 | ulong skip_lines, bool ignore_check_option_errors) |
918 | { |
919 | List_iterator_fast<Item> it(fields_vars); |
920 | Item *item; |
921 | TABLE *table= table_list->table; |
922 | bool err, progress_reports; |
923 | ulonglong counter, time_to_report_progress; |
924 | DBUG_ENTER("read_fixed_length" ); |
925 | |
926 | counter= 0; |
927 | time_to_report_progress= MY_HOW_OFTEN_TO_WRITE/10; |
928 | progress_reports= 1; |
929 | if ((thd->progress.max_counter= read_info.file_length()) == ~(my_off_t) 0) |
930 | progress_reports= 0; |
931 | |
932 | while (!read_info.read_fixed_length()) |
933 | { |
934 | if (thd->killed) |
935 | { |
936 | thd->send_kill_message(); |
937 | DBUG_RETURN(1); |
938 | } |
939 | if (progress_reports) |
940 | { |
941 | thd->progress.counter= read_info.position(); |
942 | if (++counter >= time_to_report_progress) |
943 | { |
944 | time_to_report_progress+= MY_HOW_OFTEN_TO_WRITE/10; |
945 | thd_progress_report(thd, thd->progress.counter, |
946 | thd->progress.max_counter); |
947 | } |
948 | } |
949 | if (skip_lines) |
950 | { |
951 | /* |
952 | We could implement this with a simple seek if: |
953 | - We are not using DATA INFILE LOCAL |
954 | - escape character is "" |
955 | - line starting prefix is "" |
956 | */ |
957 | skip_lines--; |
958 | continue; |
959 | } |
960 | it.rewind(); |
961 | uchar *pos=read_info.row_start; |
962 | #ifdef HAVE_valgrind |
963 | read_info.row_end[0]=0; |
964 | #endif |
965 | |
966 | restore_record(table, s->default_values); |
967 | |
968 | while ((item= it++)) |
969 | { |
970 | Load_data_outvar *dst= item->get_load_data_outvar(); |
971 | DBUG_ASSERT(dst); |
972 | if (pos == read_info.row_end) |
973 | { |
974 | if (dst->load_data_set_no_data(thd, &read_info)) |
975 | DBUG_RETURN(1); |
976 | } |
977 | else |
978 | { |
979 | uint length, fixed_length= dst->load_data_fixed_length(); |
980 | uchar save_chr; |
981 | if ((length=(uint) (read_info.row_end - pos)) > fixed_length) |
982 | length= fixed_length; |
983 | save_chr= pos[length]; pos[length]= '\0'; // Safeguard aganst malloc |
984 | dst->load_data_set_value(thd, (const char *) pos, length, &read_info); |
985 | pos[length]= save_chr; |
986 | if ((pos+= length) > read_info.row_end) |
987 | pos= read_info.row_end; // Fills rest with space |
988 | } |
989 | } |
990 | if (pos != read_info.row_end) |
991 | { |
992 | thd->cuted_fields++; /* To long row */ |
993 | push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, |
994 | ER_WARN_TOO_MANY_RECORDS, |
995 | ER_THD(thd, ER_WARN_TOO_MANY_RECORDS), |
996 | thd->get_stmt_da()->current_row_for_warning()); |
997 | } |
998 | |
999 | if (thd->killed || |
1000 | fill_record_n_invoke_before_triggers(thd, table, set_fields, set_values, |
1001 | ignore_check_option_errors, |
1002 | TRG_EVENT_INSERT)) |
1003 | DBUG_RETURN(1); |
1004 | |
1005 | switch (table_list->view_check_option(thd, ignore_check_option_errors)) { |
1006 | case VIEW_CHECK_SKIP: |
1007 | read_info.next_line(); |
1008 | goto continue_loop; |
1009 | case VIEW_CHECK_ERROR: |
1010 | DBUG_RETURN(-1); |
1011 | } |
1012 | |
1013 | WSREP_LOAD_DATA_SPLIT(thd, table, info); |
1014 | err= write_record(thd, table, &info); |
1015 | table->auto_increment_field_not_null= FALSE; |
1016 | if (err) |
1017 | DBUG_RETURN(1); |
1018 | |
1019 | /* |
1020 | We don't need to reset auto-increment field since we are restoring |
1021 | its default value at the beginning of each loop iteration. |
1022 | */ |
1023 | if (read_info.next_line()) // Skip to next line |
1024 | break; |
1025 | if (read_info.line_cuted) |
1026 | { |
1027 | thd->cuted_fields++; /* To long row */ |
1028 | push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, |
1029 | ER_WARN_TOO_MANY_RECORDS, |
1030 | ER_THD(thd, ER_WARN_TOO_MANY_RECORDS), |
1031 | thd->get_stmt_da()->current_row_for_warning()); |
1032 | } |
1033 | thd->get_stmt_da()->inc_current_row_for_warning(); |
1034 | continue_loop:; |
1035 | } |
1036 | DBUG_RETURN(MY_TEST(read_info.error)); |
1037 | } |
1038 | |
1039 | |
1040 | static int |
1041 | read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, |
1042 | List<Item> &fields_vars, List<Item> &set_fields, |
1043 | List<Item> &set_values, READ_INFO &read_info, |
1044 | String &enclosed, ulong skip_lines, |
1045 | bool ignore_check_option_errors) |
1046 | { |
1047 | List_iterator_fast<Item> it(fields_vars); |
1048 | Item *item; |
1049 | TABLE *table= table_list->table; |
1050 | uint enclosed_length; |
1051 | bool err, progress_reports; |
1052 | ulonglong counter, time_to_report_progress; |
1053 | DBUG_ENTER("read_sep_field" ); |
1054 | |
1055 | enclosed_length=enclosed.length(); |
1056 | |
1057 | counter= 0; |
1058 | time_to_report_progress= MY_HOW_OFTEN_TO_WRITE/10; |
1059 | progress_reports= 1; |
1060 | if ((thd->progress.max_counter= read_info.file_length()) == ~(my_off_t) 0) |
1061 | progress_reports= 0; |
1062 | |
1063 | for (;;it.rewind()) |
1064 | { |
1065 | if (thd->killed) |
1066 | { |
1067 | thd->send_kill_message(); |
1068 | DBUG_RETURN(1); |
1069 | } |
1070 | |
1071 | if (progress_reports) |
1072 | { |
1073 | thd->progress.counter= read_info.position(); |
1074 | if (++counter >= time_to_report_progress) |
1075 | { |
1076 | time_to_report_progress+= MY_HOW_OFTEN_TO_WRITE/10; |
1077 | thd_progress_report(thd, thd->progress.counter, |
1078 | thd->progress.max_counter); |
1079 | } |
1080 | } |
1081 | restore_record(table, s->default_values); |
1082 | |
1083 | while ((item= it++)) |
1084 | { |
1085 | uint length; |
1086 | uchar *pos; |
1087 | if (read_info.read_field()) |
1088 | break; |
1089 | |
1090 | /* If this line is to be skipped we don't want to fill field or var */ |
1091 | if (skip_lines) |
1092 | continue; |
1093 | |
1094 | pos=read_info.row_start; |
1095 | length=(uint) (read_info.row_end-pos); |
1096 | |
1097 | Load_data_outvar *dst= item->get_load_data_outvar_or_error(); |
1098 | DBUG_ASSERT(dst); |
1099 | |
1100 | if ((!read_info.enclosed && |
1101 | (enclosed_length && length == 4 && |
1102 | !memcmp(pos, STRING_WITH_LEN("NULL" )))) || |
1103 | (length == 1 && read_info.found_null)) |
1104 | { |
1105 | if (dst->load_data_set_null(thd, &read_info)) |
1106 | DBUG_RETURN(1); |
1107 | } |
1108 | else |
1109 | { |
1110 | read_info.row_end[0]= 0; // Safe to change end marker |
1111 | if (dst->load_data_set_value(thd, (const char *) pos, length, &read_info)) |
1112 | DBUG_RETURN(1); |
1113 | } |
1114 | } |
1115 | |
1116 | if (unlikely(thd->is_error())) |
1117 | read_info.error= 1; |
1118 | if (unlikely(read_info.error)) |
1119 | break; |
1120 | |
1121 | if (skip_lines) |
1122 | { |
1123 | skip_lines--; |
1124 | continue; |
1125 | } |
1126 | if (item) |
1127 | { |
1128 | /* Have not read any field, thus input file is simply ended */ |
1129 | if (item == fields_vars.head()) |
1130 | break; |
1131 | for (; item ; item= it++) |
1132 | { |
1133 | Load_data_outvar *dst= item->get_load_data_outvar_or_error(); |
1134 | DBUG_ASSERT(dst); |
1135 | if (unlikely(dst->load_data_set_no_data(thd, &read_info))) |
1136 | DBUG_RETURN(1); |
1137 | } |
1138 | } |
1139 | |
1140 | if (unlikely(thd->killed) || |
1141 | unlikely(fill_record_n_invoke_before_triggers(thd, table, set_fields, |
1142 | set_values, |
1143 | ignore_check_option_errors, |
1144 | TRG_EVENT_INSERT))) |
1145 | DBUG_RETURN(1); |
1146 | |
1147 | switch (table_list->view_check_option(thd, |
1148 | ignore_check_option_errors)) { |
1149 | case VIEW_CHECK_SKIP: |
1150 | read_info.next_line(); |
1151 | goto continue_loop; |
1152 | case VIEW_CHECK_ERROR: |
1153 | DBUG_RETURN(-1); |
1154 | } |
1155 | |
1156 | WSREP_LOAD_DATA_SPLIT(thd, table, info); |
1157 | err= write_record(thd, table, &info); |
1158 | table->auto_increment_field_not_null= FALSE; |
1159 | if (err) |
1160 | DBUG_RETURN(1); |
1161 | /* |
1162 | We don't need to reset auto-increment field since we are restoring |
1163 | its default value at the beginning of each loop iteration. |
1164 | */ |
1165 | if (read_info.next_line()) // Skip to next line |
1166 | break; |
1167 | if (read_info.line_cuted) |
1168 | { |
1169 | thd->cuted_fields++; /* To long row */ |
1170 | push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, |
1171 | ER_WARN_TOO_MANY_RECORDS, |
1172 | ER_THD(thd, ER_WARN_TOO_MANY_RECORDS), |
1173 | thd->get_stmt_da()->current_row_for_warning()); |
1174 | if (thd->killed) |
1175 | DBUG_RETURN(1); |
1176 | } |
1177 | thd->get_stmt_da()->inc_current_row_for_warning(); |
1178 | continue_loop:; |
1179 | } |
1180 | DBUG_RETURN(MY_TEST(read_info.error)); |
1181 | } |
1182 | |
1183 | |
1184 | /**************************************************************************** |
1185 | ** Read rows in xml format |
1186 | ****************************************************************************/ |
1187 | static int |
1188 | read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, |
1189 | List<Item> &fields_vars, List<Item> &set_fields, |
1190 | List<Item> &set_values, READ_INFO &read_info, |
1191 | String &row_tag, ulong skip_lines, |
1192 | bool ignore_check_option_errors) |
1193 | { |
1194 | List_iterator_fast<Item> it(fields_vars); |
1195 | Item *item; |
1196 | TABLE *table= table_list->table; |
1197 | bool no_trans_update_stmt; |
1198 | DBUG_ENTER("read_xml_field" ); |
1199 | |
1200 | no_trans_update_stmt= !table->file->has_transactions(); |
1201 | |
1202 | for ( ; ; it.rewind()) |
1203 | { |
1204 | bool err; |
1205 | if (thd->killed) |
1206 | { |
1207 | thd->send_kill_message(); |
1208 | DBUG_RETURN(1); |
1209 | } |
1210 | |
1211 | // read row tag and save values into tag list |
1212 | if (read_info.read_xml(thd)) |
1213 | break; |
1214 | |
1215 | List_iterator_fast<XML_TAG> xmlit(read_info.taglist); |
1216 | xmlit.rewind(); |
1217 | XML_TAG *tag= NULL; |
1218 | |
1219 | #ifndef DBUG_OFF |
1220 | DBUG_PRINT("read_xml_field" , ("skip_lines=%d" , (int) skip_lines)); |
1221 | while ((tag= xmlit++)) |
1222 | { |
1223 | DBUG_PRINT("read_xml_field" , ("got tag:%i '%s' '%s'" , |
1224 | tag->level, tag->field.c_ptr(), |
1225 | tag->value.c_ptr())); |
1226 | } |
1227 | #endif |
1228 | |
1229 | restore_record(table, s->default_values); |
1230 | |
1231 | while ((item= it++)) |
1232 | { |
1233 | /* If this line is to be skipped we don't want to fill field or var */ |
1234 | if (skip_lines) |
1235 | continue; |
1236 | |
1237 | /* find field in tag list */ |
1238 | xmlit.rewind(); |
1239 | tag= xmlit++; |
1240 | |
1241 | while(tag && strcmp(tag->field.c_ptr(), item->name.str) != 0) |
1242 | tag= xmlit++; |
1243 | |
1244 | Load_data_outvar *dst= item->get_load_data_outvar_or_error(); |
1245 | DBUG_ASSERT(dst); |
1246 | if (!tag ? dst->load_data_set_null(thd, &read_info) : |
1247 | dst->load_data_set_value(thd, tag->value.ptr(), |
1248 | tag->value.length(), |
1249 | &read_info)) |
1250 | DBUG_RETURN(1); |
1251 | } |
1252 | |
1253 | if (unlikely(read_info.error)) |
1254 | break; |
1255 | |
1256 | if (skip_lines) |
1257 | { |
1258 | skip_lines--; |
1259 | continue; |
1260 | } |
1261 | |
1262 | DBUG_ASSERT(!item); |
1263 | |
1264 | if (thd->killed || |
1265 | fill_record_n_invoke_before_triggers(thd, table, set_fields, set_values, |
1266 | ignore_check_option_errors, |
1267 | TRG_EVENT_INSERT)) |
1268 | DBUG_RETURN(1); |
1269 | |
1270 | switch (table_list->view_check_option(thd, |
1271 | ignore_check_option_errors)) { |
1272 | case VIEW_CHECK_SKIP: |
1273 | read_info.next_line(); |
1274 | goto continue_loop; |
1275 | case VIEW_CHECK_ERROR: |
1276 | DBUG_RETURN(-1); |
1277 | } |
1278 | |
1279 | WSREP_LOAD_DATA_SPLIT(thd, table, info); |
1280 | err= write_record(thd, table, &info); |
1281 | table->auto_increment_field_not_null= false; |
1282 | if (err) |
1283 | DBUG_RETURN(1); |
1284 | |
1285 | /* |
1286 | We don't need to reset auto-increment field since we are restoring |
1287 | its default value at the beginning of each loop iteration. |
1288 | */ |
1289 | thd->transaction.stmt.modified_non_trans_table= no_trans_update_stmt; |
1290 | thd->get_stmt_da()->inc_current_row_for_warning(); |
1291 | continue_loop:; |
1292 | } |
1293 | DBUG_RETURN(MY_TEST(read_info.error) || thd->is_error()); |
1294 | } /* load xml end */ |
1295 | |
1296 | |
1297 | /* Unescape all escape characters, mark \N as null */ |
1298 | |
1299 | char |
1300 | READ_INFO::unescape(char chr) |
1301 | { |
1302 | /* keep this switch synchornous with the ESCAPE_CHARS macro */ |
1303 | switch(chr) { |
1304 | case 'n': return '\n'; |
1305 | case 't': return '\t'; |
1306 | case 'r': return '\r'; |
1307 | case 'b': return '\b'; |
1308 | case '0': return 0; // Ascii null |
1309 | case 'Z': return '\032'; // Win32 end of file |
1310 | case 'N': found_null=1; |
1311 | |
1312 | /* fall through */ |
1313 | default: return chr; |
1314 | } |
1315 | } |
1316 | |
1317 | |
1318 | /* |
1319 | Read a line using buffering |
1320 | If last line is empty (in line mode) then it isn't outputed |
1321 | */ |
1322 | |
1323 | |
1324 | READ_INFO::READ_INFO(THD *thd, File file_par, |
1325 | const Load_data_param ¶m, |
1326 | String &field_term, String &line_start, String &line_term, |
1327 | String &enclosed_par, int escape, bool get_it_from_net, |
1328 | bool is_fifo) |
1329 | :Load_data_param(param), |
1330 | file(file_par), |
1331 | m_field_term(field_term), m_line_term(line_term), m_line_start(line_start), |
1332 | escape_char(escape), found_end_of_line(false), eof(false), |
1333 | error(false), line_cuted(false), found_null(false) |
1334 | { |
1335 | data.set_thread_specific(); |
1336 | /* |
1337 | Field and line terminators must be interpreted as sequence of unsigned char. |
1338 | Otherwise, non-ascii terminators will be negative on some platforms, |
1339 | and positive on others (depending on the implementation of char). |
1340 | */ |
1341 | |
1342 | level= 0; /* for load xml */ |
1343 | start_of_line= line_start.length() != 0; |
1344 | /* If field_terminator == line_terminator, don't use line_terminator */ |
1345 | if (m_field_term.eq(m_line_term)) |
1346 | m_line_term.reset(); |
1347 | enclosed_char= enclosed_par.length() ? (uchar) enclosed_par[0] : INT_MAX; |
1348 | |
1349 | /* Set of a stack for unget if long terminators */ |
1350 | uint length= MY_MAX(charset()->mbmaxlen, MY_MAX(m_field_term.length(), |
1351 | m_line_term.length())) + 1; |
1352 | set_if_bigger(length,line_start.length()); |
1353 | stack= stack_pos= (int*) thd->alloc(sizeof(int) * length); |
1354 | |
1355 | DBUG_ASSERT(m_fixed_length < UINT_MAX32); |
1356 | if (data.reserve((size_t) m_fixed_length)) |
1357 | error=1; /* purecov: inspected */ |
1358 | else |
1359 | { |
1360 | if (init_io_cache(&cache,(get_it_from_net) ? -1 : file, 0, |
1361 | (get_it_from_net) ? READ_NET : |
1362 | (is_fifo ? READ_FIFO : READ_CACHE),0L,1, |
1363 | MYF(MY_WME | MY_THREAD_SPECIFIC))) |
1364 | { |
1365 | error=1; |
1366 | } |
1367 | else |
1368 | { |
1369 | #ifndef EMBEDDED_LIBRARY |
1370 | if (get_it_from_net) |
1371 | cache.read_function = _my_b_net_read; |
1372 | |
1373 | if (mysql_bin_log.is_open()) |
1374 | { |
1375 | cache.real_read_function= cache.read_function; |
1376 | cache.read_function= log_loaded_block; |
1377 | } |
1378 | #endif |
1379 | } |
1380 | } |
1381 | } |
1382 | |
1383 | |
1384 | READ_INFO::~READ_INFO() |
1385 | { |
1386 | ::end_io_cache(&cache); |
1387 | List_iterator<XML_TAG> xmlit(taglist); |
1388 | XML_TAG *t; |
1389 | while ((t= xmlit++)) |
1390 | delete(t); |
1391 | } |
1392 | |
1393 | |
1394 | inline bool READ_INFO::terminator(const uchar *ptr, uint length) |
1395 | { |
1396 | int chr=0; // Keep gcc happy |
1397 | uint i; |
1398 | for (i=1 ; i < length ; i++) |
1399 | { |
1400 | if ((chr=GET) != *(uchar*)++ptr) |
1401 | { |
1402 | break; |
1403 | } |
1404 | } |
1405 | if (i == length) |
1406 | return true; |
1407 | PUSH(chr); |
1408 | while (i-- > 1) |
1409 | PUSH(*--ptr); |
1410 | return false; |
1411 | } |
1412 | |
1413 | |
1414 | /** |
1415 | Read a field. |
1416 | |
1417 | The data in the loaded file was presumably escaped using |
1418 | - either select_export::send_data() OUTFILE |
1419 | - or mysql_real_escape_string() |
1420 | using the same character set with the one specified in the current |
1421 | "LOAD DATA INFILE ... CHARACTER SET ..." (or the default LOAD character set). |
1422 | |
1423 | Note, non-escaped multi-byte characters are scanned as a single entity. |
1424 | This is needed to correctly distinguish between: |
1425 | - 0x5C as an escape character versus |
1426 | - 0x5C as the second byte in a multi-byte sequence (big5, cp932, gbk, sjis) |
1427 | |
1428 | Parts of escaped multi-byte characters are scanned on different loop |
1429 | iterations. See the comment about 0x5C handling in select_export::send_data() |
1430 | in sql_class.cc. |
1431 | |
1432 | READ_INFO::read_field() does not check wellformedness. |
1433 | Raising wellformedness errors or warnings in READ_INFO::read_field() |
1434 | would be wrong, as the data after unescaping can go into a BLOB field, |
1435 | or into a TEXT/VARCHAR field of a different character set. |
1436 | The loop below only makes sure to revert escaping made by |
1437 | select_export::send_data() or mysql_real_escape_string(). |
1438 | Wellformedness is checked later, during Field::store(str,length,cs) time. |
1439 | |
1440 | Note, in some cases users can supply data which did not go through |
1441 | escaping properly. For example, utf8 "\<C3><A4>" |
1442 | (backslash followed by LATIN SMALL LETTER A WITH DIAERESIS) |
1443 | is improperly escaped data that could not be generated by |
1444 | select_export::send_data() / mysql_real_escape_string(): |
1445 | - either there should be two backslashes: "\\<C3><A4>" |
1446 | - or there should be no backslashes at all: "<C3><A4>" |
1447 | "\<C3>" and "<A4> are scanned on two different loop iterations and |
1448 | store "<C3><A4>" into the field. |
1449 | |
1450 | Note, adding useless escapes before multi-byte characters like in the |
1451 | example above is safe in case of utf8, but is not safe in case of |
1452 | character sets that have escape_with_backslash_is_dangerous==TRUE, |
1453 | such as big5, cp932, gbk, sjis. This can lead to mis-interpretation of the |
1454 | data. Suppose we have a big5 character "<EE><5C>" followed by <30> (digit 0). |
1455 | If we add an extra escape before this sequence, then we'll get |
1456 | <5C><EE><5C><30>. The first loop iteration will turn <5C><EE> into <EE>. |
1457 | The second loop iteration will turn <5C><30> into <30>. |
1458 | So the program that generates a dump file for further use with LOAD DATA |
1459 | must make sure to use escapes properly. |
1460 | */ |
1461 | |
1462 | int READ_INFO::read_field() |
1463 | { |
1464 | int chr,found_enclosed_char; |
1465 | |
1466 | found_null=0; |
1467 | if (found_end_of_line) |
1468 | return 1; // One have to call next_line |
1469 | |
1470 | /* Skip until we find 'line_start' */ |
1471 | |
1472 | if (start_of_line) |
1473 | { // Skip until line_start |
1474 | start_of_line=0; |
1475 | if (find_start_of_fields()) |
1476 | return 1; |
1477 | } |
1478 | if ((chr=GET) == my_b_EOF) |
1479 | { |
1480 | found_end_of_line=eof=1; |
1481 | return 1; |
1482 | } |
1483 | data.length(0); |
1484 | if (chr == enclosed_char) |
1485 | { |
1486 | found_enclosed_char=enclosed_char; |
1487 | data.append(chr); // If error |
1488 | } |
1489 | else |
1490 | { |
1491 | found_enclosed_char= INT_MAX; |
1492 | PUSH(chr); |
1493 | } |
1494 | |
1495 | for (;;) |
1496 | { |
1497 | // Make sure we have enough space for the longest multi-byte character. |
1498 | while (data.length() + charset()->mbmaxlen <= data.alloced_length()) |
1499 | { |
1500 | chr = GET; |
1501 | if (chr == my_b_EOF) |
1502 | goto found_eof; |
1503 | if (chr == escape_char) |
1504 | { |
1505 | if ((chr=GET) == my_b_EOF) |
1506 | { |
1507 | data.append(escape_char); |
1508 | goto found_eof; |
1509 | } |
1510 | /* |
1511 | When escape_char == enclosed_char, we treat it like we do for |
1512 | handling quotes in SQL parsing -- you can double-up the |
1513 | escape_char to include it literally, but it doesn't do escapes |
1514 | like \n. This allows: LOAD DATA ... ENCLOSED BY '"' ESCAPED BY '"' |
1515 | with data like: "fie""ld1", "field2" |
1516 | */ |
1517 | if (escape_char != enclosed_char || chr == escape_char) |
1518 | { |
1519 | data.append(unescape((char) chr)); |
1520 | continue; |
1521 | } |
1522 | PUSH(chr); |
1523 | chr= escape_char; |
1524 | } |
1525 | #ifdef ALLOW_LINESEPARATOR_IN_STRINGS |
1526 | if (chr == m_line_term.initial_byte()) |
1527 | #else |
1528 | if (chr == m_line_term.initial_byte() && found_enclosed_char == INT_MAX) |
1529 | #endif |
1530 | { |
1531 | if (terminator(m_line_term)) |
1532 | { // Maybe unexpected linefeed |
1533 | enclosed=0; |
1534 | found_end_of_line=1; |
1535 | row_start= (uchar *) data.ptr(); |
1536 | row_end= (uchar *) data.end(); |
1537 | return 0; |
1538 | } |
1539 | } |
1540 | if (chr == found_enclosed_char) |
1541 | { |
1542 | if ((chr=GET) == found_enclosed_char) |
1543 | { // Remove dupplicated |
1544 | data.append(chr); |
1545 | continue; |
1546 | } |
1547 | // End of enclosed field if followed by field_term or line_term |
1548 | if (chr == my_b_EOF || terminator(chr, m_line_term)) |
1549 | { |
1550 | /* Maybe unexpected linefeed */ |
1551 | enclosed=1; |
1552 | found_end_of_line=1; |
1553 | row_start= (uchar *) data.ptr() + 1; |
1554 | row_end= (uchar *) data.end(); |
1555 | return 0; |
1556 | } |
1557 | if (terminator(chr, m_field_term)) |
1558 | { |
1559 | enclosed=1; |
1560 | row_start= (uchar *) data.ptr() + 1; |
1561 | row_end= (uchar *) data.end(); |
1562 | return 0; |
1563 | } |
1564 | /* |
1565 | The string didn't terminate yet. |
1566 | Store back next character for the loop |
1567 | */ |
1568 | PUSH(chr); |
1569 | /* copy the found term character to 'to' */ |
1570 | chr= found_enclosed_char; |
1571 | } |
1572 | else if (chr == m_field_term.initial_byte() && |
1573 | found_enclosed_char == INT_MAX) |
1574 | { |
1575 | if (terminator(m_field_term)) |
1576 | { |
1577 | enclosed=0; |
1578 | row_start= (uchar *) data.ptr(); |
1579 | row_end= (uchar *) data.end(); |
1580 | return 0; |
1581 | } |
1582 | } |
1583 | data.append(chr); |
1584 | if (use_mb(charset()) && read_mbtail(&data)) |
1585 | goto found_eof; |
1586 | } |
1587 | /* |
1588 | ** We come here if buffer is too small. Enlarge it and continue |
1589 | */ |
1590 | if (data.reserve(IO_SIZE)) |
1591 | return (error= 1); |
1592 | } |
1593 | |
1594 | found_eof: |
1595 | enclosed=0; |
1596 | found_end_of_line=eof=1; |
1597 | row_start= (uchar *) data.ptr(); |
1598 | row_end= (uchar *) data.end(); |
1599 | return 0; |
1600 | } |
1601 | |
1602 | /* |
1603 | Read a row with fixed length. |
1604 | |
1605 | NOTES |
1606 | The row may not be fixed size on disk if there are escape |
1607 | characters in the file. |
1608 | |
1609 | IMPLEMENTATION NOTE |
1610 | One can't use fixed length with multi-byte charset ** |
1611 | |
1612 | RETURN |
1613 | 0 ok |
1614 | 1 error |
1615 | */ |
1616 | |
1617 | int READ_INFO::read_fixed_length() |
1618 | { |
1619 | int chr; |
1620 | if (found_end_of_line) |
1621 | return 1; // One have to call next_line |
1622 | |
1623 | if (start_of_line) |
1624 | { // Skip until line_start |
1625 | start_of_line=0; |
1626 | if (find_start_of_fields()) |
1627 | return 1; |
1628 | } |
1629 | |
1630 | for (data.length(0); data.length() < m_fixed_length ; ) |
1631 | { |
1632 | if ((chr=GET) == my_b_EOF) |
1633 | goto found_eof; |
1634 | if (chr == escape_char) |
1635 | { |
1636 | if ((chr=GET) == my_b_EOF) |
1637 | { |
1638 | data.append(escape_char); |
1639 | goto found_eof; |
1640 | } |
1641 | data.append((uchar) unescape((char) chr)); |
1642 | continue; |
1643 | } |
1644 | if (terminator(chr, m_line_term)) |
1645 | { // Maybe unexpected linefeed |
1646 | found_end_of_line= true; |
1647 | break; |
1648 | } |
1649 | data.append(chr); |
1650 | } |
1651 | row_start= (uchar *) data.ptr(); |
1652 | row_end= (uchar *) data.end(); // Found full line |
1653 | return 0; |
1654 | |
1655 | found_eof: |
1656 | found_end_of_line=eof=1; |
1657 | row_start= (uchar *) data.ptr(); |
1658 | row_end= (uchar *) data.end(); |
1659 | return data.length() == 0 ? 1 : 0; |
1660 | } |
1661 | |
1662 | |
1663 | int READ_INFO::next_line() |
1664 | { |
1665 | line_cuted=0; |
1666 | start_of_line= m_line_start.length() != 0; |
1667 | if (found_end_of_line || eof) |
1668 | { |
1669 | found_end_of_line=0; |
1670 | return eof; |
1671 | } |
1672 | found_end_of_line=0; |
1673 | if (!m_line_term.length()) |
1674 | return 0; // No lines |
1675 | for (;;) |
1676 | { |
1677 | int chlen; |
1678 | char buf[MY_CS_MBMAXLEN]; |
1679 | |
1680 | if (getbyte(&buf[0])) |
1681 | return 1; // EOF |
1682 | |
1683 | if (use_mb(charset()) && |
1684 | (chlen= my_charlen(charset(), buf, buf + 1)) != 1) |
1685 | { |
1686 | uint i; |
1687 | for (i= 1; MY_CS_IS_TOOSMALL(chlen); ) |
1688 | { |
1689 | DBUG_ASSERT(i < sizeof(buf)); |
1690 | DBUG_ASSERT(chlen != 1); |
1691 | if (getbyte(&buf[i++])) |
1692 | return 1; // EOF |
1693 | chlen= my_charlen(charset(), buf, buf + i); |
1694 | } |
1695 | |
1696 | /* |
1697 | Either a complete multi-byte sequence, |
1698 | or a broken byte sequence was found. |
1699 | Check if the sequence is a prefix of the "LINES TERMINATED BY" string. |
1700 | */ |
1701 | if ((uchar) buf[0] == m_line_term.initial_byte() && |
1702 | i <= m_line_term.length() && |
1703 | !memcmp(buf, m_line_term.ptr(), i)) |
1704 | { |
1705 | if (m_line_term.length() == i) |
1706 | { |
1707 | /* |
1708 | We found a "LINES TERMINATED BY" string that consists |
1709 | of a single multi-byte character. |
1710 | */ |
1711 | return 0; |
1712 | } |
1713 | /* |
1714 | buf[] is a prefix of "LINES TERMINATED BY". |
1715 | Now check the suffix. Length of the suffix of line_term_ptr |
1716 | that still needs to be checked is (line_term_length - i). |
1717 | Note, READ_INFO::terminator() assumes that the leftmost byte of the |
1718 | argument is already scanned from the file and is checked to |
1719 | be a known prefix (e.g. against line_term.initial_char()). |
1720 | So we need to pass one extra byte. |
1721 | */ |
1722 | if (terminator(m_line_term.ptr() + i - 1, |
1723 | m_line_term.length() - i + 1)) |
1724 | return 0; |
1725 | } |
1726 | /* |
1727 | Here we have a good multi-byte sequence or a broken byte sequence, |
1728 | and the sequence is not equal to "LINES TERMINATED BY". |
1729 | No needs to check for escape_char, because: |
1730 | - multi-byte escape characters in "FIELDS ESCAPED BY" are not |
1731 | supported and are rejected at parse time. |
1732 | - broken single-byte sequences are not recognized as escapes, |
1733 | they are considered to be a part of the data and are converted to |
1734 | question marks. |
1735 | */ |
1736 | line_cuted= true; |
1737 | continue; |
1738 | } |
1739 | if (buf[0] == escape_char) |
1740 | { |
1741 | line_cuted= true; |
1742 | if (GET == my_b_EOF) |
1743 | return 1; |
1744 | continue; |
1745 | } |
1746 | if (terminator(buf[0], m_line_term)) |
1747 | return 0; |
1748 | line_cuted= true; |
1749 | } |
1750 | } |
1751 | |
1752 | |
1753 | bool READ_INFO::find_start_of_fields() |
1754 | { |
1755 | for (int chr= GET ; chr != my_b_EOF ; chr= GET) |
1756 | { |
1757 | if (terminator(chr, m_line_start)) |
1758 | return false; |
1759 | } |
1760 | return (found_end_of_line= eof= true); |
1761 | } |
1762 | |
1763 | |
1764 | /* |
1765 | Clear taglist from tags with a specified level |
1766 | */ |
1767 | int READ_INFO::clear_level(int level_arg) |
1768 | { |
1769 | DBUG_ENTER("READ_INFO::read_xml clear_level" ); |
1770 | List_iterator<XML_TAG> xmlit(taglist); |
1771 | xmlit.rewind(); |
1772 | XML_TAG *tag; |
1773 | |
1774 | while ((tag= xmlit++)) |
1775 | { |
1776 | if(tag->level >= level_arg) |
1777 | { |
1778 | xmlit.remove(); |
1779 | delete tag; |
1780 | } |
1781 | } |
1782 | DBUG_RETURN(0); |
1783 | } |
1784 | |
1785 | |
1786 | /* |
1787 | Convert an XML entity to Unicode value. |
1788 | Return -1 on error; |
1789 | */ |
1790 | static int |
1791 | my_xml_entity_to_char(const char *name, uint length) |
1792 | { |
1793 | if (length == 2) |
1794 | { |
1795 | if (!memcmp(name, "gt" , length)) |
1796 | return '>'; |
1797 | if (!memcmp(name, "lt" , length)) |
1798 | return '<'; |
1799 | } |
1800 | else if (length == 3) |
1801 | { |
1802 | if (!memcmp(name, "amp" , length)) |
1803 | return '&'; |
1804 | } |
1805 | else if (length == 4) |
1806 | { |
1807 | if (!memcmp(name, "quot" , length)) |
1808 | return '"'; |
1809 | if (!memcmp(name, "apos" , length)) |
1810 | return '\''; |
1811 | } |
1812 | return -1; |
1813 | } |
1814 | |
1815 | |
1816 | /** |
1817 | @brief Convert newline, linefeed, tab to space |
1818 | |
1819 | @param chr character |
1820 | |
1821 | @details According to the "XML 1.0" standard, |
1822 | only space (#x20) characters, carriage returns, |
1823 | line feeds or tabs are considered as spaces. |
1824 | Convert all of them to space (#x20) for parsing simplicity. |
1825 | */ |
1826 | static int |
1827 | my_tospace(int chr) |
1828 | { |
1829 | return (chr == '\t' || chr == '\r' || chr == '\n') ? ' ' : chr; |
1830 | } |
1831 | |
1832 | |
1833 | /* |
1834 | Read an xml value: handle multibyte and xml escape |
1835 | */ |
1836 | int READ_INFO::read_value(int delim, String *val) |
1837 | { |
1838 | int chr; |
1839 | String tmp; |
1840 | |
1841 | for (chr= GET; my_tospace(chr) != delim && chr != my_b_EOF; chr= GET) |
1842 | { |
1843 | if(chr == '&') |
1844 | { |
1845 | tmp.length(0); |
1846 | for (chr= my_tospace(GET) ; chr != ';' ; chr= my_tospace(GET)) |
1847 | { |
1848 | if (chr == my_b_EOF) |
1849 | return chr; |
1850 | tmp.append(chr); |
1851 | } |
1852 | if ((chr= my_xml_entity_to_char(tmp.ptr(), tmp.length())) >= 0) |
1853 | val->append(chr); |
1854 | else |
1855 | { |
1856 | val->append('&'); |
1857 | val->append(tmp); |
1858 | val->append(';'); |
1859 | } |
1860 | } |
1861 | else |
1862 | { |
1863 | val->append(chr); |
1864 | if (use_mb(charset()) && read_mbtail(val)) |
1865 | return my_b_EOF; |
1866 | } |
1867 | } |
1868 | return my_tospace(chr); |
1869 | } |
1870 | |
1871 | |
1872 | /* |
1873 | Read a record in xml format |
1874 | tags and attributes are stored in taglist |
1875 | when tag set in ROWS IDENTIFIED BY is closed, we are ready and return |
1876 | */ |
1877 | int READ_INFO::read_xml(THD *thd) |
1878 | { |
1879 | DBUG_ENTER("READ_INFO::read_xml" ); |
1880 | int chr, chr2, chr3; |
1881 | int delim= 0; |
1882 | String tag, attribute, value; |
1883 | bool in_tag= false; |
1884 | |
1885 | tag.length(0); |
1886 | attribute.length(0); |
1887 | value.length(0); |
1888 | |
1889 | for (chr= my_tospace(GET); chr != my_b_EOF ; ) |
1890 | { |
1891 | switch(chr){ |
1892 | case '<': /* read tag */ |
1893 | /* TODO: check if this is a comment <!-- comment --> */ |
1894 | chr= my_tospace(GET); |
1895 | if(chr == '!') |
1896 | { |
1897 | chr2= GET; |
1898 | chr3= GET; |
1899 | |
1900 | if(chr2 == '-' && chr3 == '-') |
1901 | { |
1902 | chr2= 0; |
1903 | chr3= 0; |
1904 | chr= my_tospace(GET); |
1905 | |
1906 | while(chr != '>' || chr2 != '-' || chr3 != '-') |
1907 | { |
1908 | if(chr == '-') |
1909 | { |
1910 | chr3= chr2; |
1911 | chr2= chr; |
1912 | } |
1913 | else if (chr2 == '-') |
1914 | { |
1915 | chr2= 0; |
1916 | chr3= 0; |
1917 | } |
1918 | chr= my_tospace(GET); |
1919 | if (chr == my_b_EOF) |
1920 | goto found_eof; |
1921 | } |
1922 | break; |
1923 | } |
1924 | } |
1925 | |
1926 | tag.length(0); |
1927 | while(chr != '>' && chr != ' ' && chr != '/' && chr != my_b_EOF) |
1928 | { |
1929 | if(chr != delim) /* fix for the '<field name =' format */ |
1930 | tag.append(chr); |
1931 | chr= my_tospace(GET); |
1932 | } |
1933 | |
1934 | // row tag should be in ROWS IDENTIFIED BY '<row>' - stored in line_term |
1935 | if((tag.length() == m_line_term.length() - 2) && |
1936 | (memcmp(tag.ptr(), m_line_term.ptr() + 1, tag.length()) == 0)) |
1937 | { |
1938 | DBUG_PRINT("read_xml" , ("start-of-row: %i %s %s" , |
1939 | level,tag.c_ptr_safe(), m_line_term.ptr())); |
1940 | } |
1941 | |
1942 | if(chr == ' ' || chr == '>') |
1943 | { |
1944 | level++; |
1945 | clear_level(level + 1); |
1946 | } |
1947 | |
1948 | if (chr == ' ') |
1949 | in_tag= true; |
1950 | else |
1951 | in_tag= false; |
1952 | break; |
1953 | |
1954 | case ' ': /* read attribute */ |
1955 | while(chr == ' ') /* skip blanks */ |
1956 | chr= my_tospace(GET); |
1957 | |
1958 | if(!in_tag) |
1959 | break; |
1960 | |
1961 | while(chr != '=' && chr != '/' && chr != '>' && chr != my_b_EOF) |
1962 | { |
1963 | attribute.append(chr); |
1964 | chr= my_tospace(GET); |
1965 | } |
1966 | break; |
1967 | |
1968 | case '>': /* end tag - read tag value */ |
1969 | in_tag= false; |
1970 | chr= read_value('<', &value); |
1971 | if(chr == my_b_EOF) |
1972 | goto found_eof; |
1973 | |
1974 | /* save value to list */ |
1975 | if (tag.length() > 0 && value.length() > 0) |
1976 | { |
1977 | DBUG_PRINT("read_xml" , ("lev:%i tag:%s val:%s" , |
1978 | level,tag.c_ptr_safe(), value.c_ptr_safe())); |
1979 | XML_TAG *tmp= new XML_TAG(level, tag, value); |
1980 | if (!tmp || taglist.push_front(tmp, thd->mem_root)) |
1981 | DBUG_RETURN(1); // End of memory |
1982 | } |
1983 | tag.length(0); |
1984 | value.length(0); |
1985 | attribute.length(0); |
1986 | break; |
1987 | |
1988 | case '/': /* close tag */ |
1989 | chr= my_tospace(GET); |
1990 | /* Decrease the 'level' only when (i) It's not an */ |
1991 | /* (without space) empty tag i.e. <tag/> or, (ii) */ |
1992 | /* It is of format <row col="val" .../> */ |
1993 | if(chr != '>' || in_tag) |
1994 | { |
1995 | level--; |
1996 | in_tag= false; |
1997 | } |
1998 | if(chr != '>') /* if this is an empty tag <tag /> */ |
1999 | tag.length(0); /* we should keep tag value */ |
2000 | while(chr != '>' && chr != my_b_EOF) |
2001 | { |
2002 | tag.append(chr); |
2003 | chr= my_tospace(GET); |
2004 | } |
2005 | |
2006 | if((tag.length() == m_line_term.length() - 2) && |
2007 | (memcmp(tag.ptr(), m_line_term.ptr() + 1, tag.length()) == 0)) |
2008 | { |
2009 | DBUG_PRINT("read_xml" , ("found end-of-row %i %s" , |
2010 | level, tag.c_ptr_safe())); |
2011 | DBUG_RETURN(0); //normal return |
2012 | } |
2013 | chr= my_tospace(GET); |
2014 | break; |
2015 | |
2016 | case '=': /* attribute name end - read the value */ |
2017 | //check for tag field and attribute name |
2018 | if(!memcmp(tag.c_ptr_safe(), STRING_WITH_LEN("field" )) && |
2019 | !memcmp(attribute.c_ptr_safe(), STRING_WITH_LEN("name" ))) |
2020 | { |
2021 | /* |
2022 | this is format <field name="xx">xx</field> |
2023 | where actual fieldname is in attribute |
2024 | */ |
2025 | delim= my_tospace(GET); |
2026 | tag.length(0); |
2027 | attribute.length(0); |
2028 | chr= '<'; /* we pretend that it is a tag */ |
2029 | level--; |
2030 | break; |
2031 | } |
2032 | |
2033 | //check for " or ' |
2034 | chr= GET; |
2035 | if (chr == my_b_EOF) |
2036 | goto found_eof; |
2037 | if(chr == '"' || chr == '\'') |
2038 | { |
2039 | delim= chr; |
2040 | } |
2041 | else |
2042 | { |
2043 | delim= ' '; /* no delimiter, use space */ |
2044 | PUSH(chr); |
2045 | } |
2046 | |
2047 | chr= read_value(delim, &value); |
2048 | if (attribute.length() > 0 && value.length() > 0) |
2049 | { |
2050 | DBUG_PRINT("read_xml" , ("lev:%i att:%s val:%s\n" , |
2051 | level + 1, |
2052 | attribute.c_ptr_safe(), |
2053 | value.c_ptr_safe())); |
2054 | XML_TAG *tmp= new XML_TAG(level + 1, attribute, value); |
2055 | if (!tmp || taglist.push_front(tmp, thd->mem_root)) |
2056 | DBUG_RETURN(1); // End of memory |
2057 | } |
2058 | attribute.length(0); |
2059 | value.length(0); |
2060 | if (chr != ' ') |
2061 | chr= my_tospace(GET); |
2062 | break; |
2063 | |
2064 | default: |
2065 | chr= my_tospace(GET); |
2066 | } /* end switch */ |
2067 | } /* end while */ |
2068 | |
2069 | found_eof: |
2070 | DBUG_PRINT("read_xml" ,("Found eof" )); |
2071 | eof= 1; |
2072 | DBUG_RETURN(1); |
2073 | } |
2074 | |