1/*
2 Copyright (c) 2000, 2016, Oracle and/or its affiliates.
3 Copyright (c) 2010, 2018, MariaDB Corporation.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; version 2 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
17
18
19/* Copy data from a textfile to table */
20/* 2006-12 Erik Wetterberg : LOAD XML added */
21
22#include "mariadb.h"
23#include "sql_priv.h"
24#include "unireg.h"
25#include "sql_load.h"
26#include "sql_load.h"
27#include "sql_cache.h" // query_cache_*
28#include "sql_base.h" // fill_record_n_invoke_before_triggers
29#include <my_dir.h>
30#include "sql_view.h" // check_key_in_view
31#include "sql_insert.h" // check_that_all_fields_are_given_values,
32 // write_record
33#include "sql_acl.h" // INSERT_ACL, UPDATE_ACL
34#include "log_event.h" // Delete_file_log_event,
35 // Execute_load_query_log_event,
36 // LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F
37#include <m_ctype.h>
38#include "rpl_mi.h"
39#include "sql_repl.h"
40#include "sp_head.h"
41#include "sql_trigger.h"
42#include "sql_derived.h"
43#include "sql_show.h"
44
45extern "C" int _my_b_net_read(IO_CACHE *info, uchar *Buffer, size_t Count);
46
47class XML_TAG {
48public:
49 int level;
50 String field;
51 String value;
52 XML_TAG(int l, String f, String v);
53};
54
55
56XML_TAG::XML_TAG(int l, String f, String v)
57{
58 level= l;
59 field.append(f);
60 value.append(v);
61}
62
63
64/*
65 Field and line terminators must be interpreted as sequence of unsigned char.
66 Otherwise, non-ascii terminators will be negative on some platforms,
67 and positive on others (depending on the implementation of char).
68*/
69class Term_string
70{
71 const uchar *m_ptr;
72 uint m_length;
73 int m_initial_byte;
74public:
75 Term_string(const String &str) :
76 m_ptr(static_cast<const uchar*>(static_cast<const void*>(str.ptr()))),
77 m_length(str.length()),
78 m_initial_byte((uchar) (str.length() ? str.ptr()[0] : INT_MAX))
79 { }
80 void set(const uchar *str, uint length, int initial_byte)
81 {
82 m_ptr= str;
83 m_length= length;
84 m_initial_byte= initial_byte;
85 }
86 void reset() { set(NULL, 0, INT_MAX); }
87 const uchar *ptr() const { return m_ptr; }
88 uint length() const { return m_length; }
89 int initial_byte() const { return m_initial_byte; }
90 bool eq(const Term_string &other) const
91 {
92 return length() == other.length() && !memcmp(ptr(), other.ptr(), length());
93 }
94};
95
96
97#define GET (stack_pos != stack ? *--stack_pos : my_b_get(&cache))
98#define PUSH(A) *(stack_pos++)=(A)
99
100#ifdef WITH_WSREP
101/** If requested by wsrep_load_data_splitting, commit and restart
102the transaction after every 10,000 inserted rows. */
103
104static bool wsrep_load_data_split(THD *thd, const TABLE *table,
105 const COPY_INFO &info)
106{
107 DBUG_ENTER("wsrep_load_data_split");
108
109 if (!wsrep_load_data_splitting || !wsrep_on(thd)
110 || !info.records || (info.records % 10000)
111 || !thd->transaction.stmt.ha_list
112 || thd->transaction.stmt.ha_list->ht() != binlog_hton
113 || !thd->transaction.stmt.ha_list->next()
114 || thd->transaction.stmt.ha_list->next()->next())
115 DBUG_RETURN(false);
116
117 if (handlerton* hton= thd->transaction.stmt.ha_list->next()->ht())
118 {
119 if (hton->db_type != DB_TYPE_INNODB)
120 DBUG_RETURN(false);
121 WSREP_DEBUG("intermediate transaction commit in LOAD DATA");
122 if (wsrep_run_wsrep_commit(thd, true) != WSREP_TRX_OK) DBUG_RETURN(true);
123 if (binlog_hton->commit(binlog_hton, thd, true)) DBUG_RETURN(true);
124 wsrep_post_commit(thd, true);
125 hton->commit(hton, thd, true);
126 table->file->extra(HA_EXTRA_FAKE_START_STMT);
127 }
128
129 DBUG_RETURN(false);
130}
131# define WSREP_LOAD_DATA_SPLIT(thd,table,info) \
132 if (wsrep_load_data_split(thd,table,info)) \
133 { \
134 table->auto_increment_field_not_null= FALSE; \
135 DBUG_RETURN(1); \
136 }
137#else /* WITH_WSREP */
138#define WSREP_LOAD_DATA_SPLIT(thd,table,info) /* empty */
139#endif /* WITH_WSREP */
140
141class READ_INFO: public Load_data_param
142{
143 File file;
144 String data; /* Read buffer */
145 Term_string m_field_term; /* FIELDS TERMINATED BY 'string' */
146 Term_string m_line_term; /* LINES TERMINATED BY 'string' */
147 Term_string m_line_start; /* LINES STARTING BY 'string' */
148 int enclosed_char,escape_char;
149 int *stack,*stack_pos;
150 bool found_end_of_line,start_of_line,eof;
151 int level; /* for load xml */
152
153 bool getbyte(char *to)
154 {
155 int chr= GET;
156 if (chr == my_b_EOF)
157 return (eof= true);
158 *to= chr;
159 return false;
160 }
161
162 /**
163 Read a tail of a multi-byte character.
164 The first byte of the character is assumed to be already
165 read from the file and appended to "str".
166
167 @returns true - if EOF happened unexpectedly
168 @returns false - no EOF happened: found a good multi-byte character,
169 or a bad byte sequence
170
171 Note:
172 The return value depends only on EOF:
173 - read_mbtail() returns "false" is a good character was read, but also
174 - read_mbtail() returns "false" if an incomplete byte sequence was found
175 and no EOF happened.
176
177 For example, suppose we have an ujis file with bytes 0x8FA10A, where:
178 - 0x8FA1 is an incomplete prefix of a 3-byte character
179 (it should be [8F][A1-FE][A1-FE] to make a full 3-byte character)
180 - 0x0A is a line demiliter
181 This file has some broken data, the trailing [A1-FE] is missing.
182
183 In this example it works as follows:
184 - 0x8F is read from the file and put into "data" before the call
185 for read_mbtail()
186 - 0xA1 is read from the file and put into "data" by read_mbtail()
187 - 0x0A is kept in the read queue, so the next read iteration after
188 the current read_mbtail() call will normally find it and recognize as
189 a line delimiter
190 - the current call for read_mbtail() returns "false",
191 because no EOF happened
192 */
193 bool read_mbtail(String *str)
194 {
195 int chlen;
196 if ((chlen= my_charlen(charset(), str->end() - 1, str->end())) == 1)
197 return false; // Single byte character found
198 for (uint32 length0= str->length() - 1 ; MY_CS_IS_TOOSMALL(chlen); )
199 {
200 int chr= GET;
201 if (chr == my_b_EOF)
202 {
203 DBUG_PRINT("info", ("read_mbtail: chlen=%d; unexpected EOF", chlen));
204 return true; // EOF
205 }
206 str->append(chr);
207 chlen= my_charlen(charset(), str->ptr() + length0, str->end());
208 if (chlen == MY_CS_ILSEQ)
209 {
210 /**
211 It has been an incomplete (but a valid) sequence so far,
212 but the last byte turned it into a bad byte sequence.
213 Unget the very last byte.
214 */
215 str->length(str->length() - 1);
216 PUSH(chr);
217 DBUG_PRINT("info", ("read_mbtail: ILSEQ"));
218 return false; // Bad byte sequence
219 }
220 }
221 DBUG_PRINT("info", ("read_mbtail: chlen=%d", chlen));
222 return false; // Good multi-byte character
223 }
224
225public:
226 bool error,line_cuted,found_null,enclosed;
227 uchar *row_start, /* Found row starts here */
228 *row_end; /* Found row ends here */
229 LOAD_FILE_IO_CACHE cache;
230
231 READ_INFO(THD *thd, File file, const Load_data_param &param,
232 String &field_term,String &line_start,String &line_term,
233 String &enclosed,int escape,bool get_it_from_net, bool is_fifo);
234 ~READ_INFO();
235 int read_field();
236 int read_fixed_length(void);
237 int next_line(void);
238 char unescape(char chr);
239 bool terminator(const uchar *ptr, uint length);
240 bool terminator(const Term_string &str)
241 { return terminator(str.ptr(), str.length()); }
242 bool terminator(int chr, const Term_string &str)
243 { return str.initial_byte() == chr && terminator(str); }
244 bool find_start_of_fields();
245 /* load xml */
246 List<XML_TAG> taglist;
247 int read_value(int delim, String *val);
248 int read_xml(THD *thd);
249 int clear_level(int level);
250
251 my_off_t file_length() { return cache.end_of_file; }
252 my_off_t position() { return my_b_tell(&cache); }
253
254 /**
255 skip all data till the eof.
256 */
257 void skip_data_till_eof()
258 {
259 while (GET != my_b_EOF)
260 ;
261 }
262};
263
264static int read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
265 List<Item> &fields_vars, List<Item> &set_fields,
266 List<Item> &set_values, READ_INFO &read_info,
267 ulong skip_lines,
268 bool ignore_check_option_errors);
269static int read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
270 List<Item> &fields_vars, List<Item> &set_fields,
271 List<Item> &set_values, READ_INFO &read_info,
272 String &enclosed, ulong skip_lines,
273 bool ignore_check_option_errors);
274
275static int read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
276 List<Item> &fields_vars, List<Item> &set_fields,
277 List<Item> &set_values, READ_INFO &read_info,
278 String &enclosed, ulong skip_lines,
279 bool ignore_check_option_errors);
280
281#ifndef EMBEDDED_LIBRARY
282static bool write_execute_load_query_log_event(THD *, const sql_exchange*, const
283 char*, const char*, bool, enum enum_duplicates, bool, bool, int);
284#endif /* EMBEDDED_LIBRARY */
285
286
287bool Load_data_param::add_outvar_field(THD *thd, const Field *field)
288{
289 if (field->flags & BLOB_FLAG)
290 {
291 m_use_blobs= true;
292 m_fixed_length+= 256; // Will be extended if needed
293 }
294 else
295 m_fixed_length+= field->field_length;
296 return false;
297}
298
299
300bool Load_data_param::add_outvar_user_var(THD *thd)
301{
302 if (m_is_fixed_length)
303 {
304 my_error(ER_LOAD_FROM_FIXED_SIZE_ROWS_TO_VAR, MYF(0));
305 return true;
306 }
307 return false;
308}
309
310
311/*
312 Execute LOAD DATA query
313
314 SYNOPSYS
315 mysql_load()
316 thd - current thread
317 ex - sql_exchange object representing source file and its parsing rules
318 table_list - list of tables to which we are loading data
319 fields_vars - list of fields and variables to which we read
320 data from file
321 set_fields - list of fields mentioned in set clause
322 set_values - expressions to assign to fields in previous list
323 handle_duplicates - indicates whenever we should emit error or
324 replace row if we will meet duplicates.
325 ignore - - indicates whenever we should ignore duplicates
326 read_file_from_client - is this LOAD DATA LOCAL ?
327
328 RETURN VALUES
329 TRUE - error / FALSE - success
330*/
331
332int mysql_load(THD *thd, const sql_exchange *ex, TABLE_LIST *table_list,
333 List<Item> &fields_vars, List<Item> &set_fields,
334 List<Item> &set_values,
335 enum enum_duplicates handle_duplicates, bool ignore,
336 bool read_file_from_client)
337{
338 char name[FN_REFLEN];
339 File file;
340 TABLE *table= NULL;
341 int error= 0;
342 bool is_fifo=0;
343#ifndef EMBEDDED_LIBRARY
344 killed_state killed_status;
345 bool is_concurrent;
346#endif
347 const char *db= table_list->db.str; // This is never null
348 /*
349 If path for file is not defined, we will use the current database.
350 If this is not set, we will use the directory where the table to be
351 loaded is located
352 */
353 const char *tdb= thd->db.str ? thd->db.str : db; // Result is never null
354 ulong skip_lines= ex->skip_lines;
355 bool transactional_table __attribute__((unused));
356 DBUG_ENTER("mysql_load");
357
358 /*
359 Bug #34283
360 mysqlbinlog leaves tmpfile after termination if binlog contains
361 load data infile, so in mixed mode we go to row-based for
362 avoiding the problem.
363 */
364 thd->set_current_stmt_binlog_format_row_if_mixed();
365
366#ifdef EMBEDDED_LIBRARY
367 read_file_from_client = 0; //server is always in the same process
368#endif
369
370 if (ex->escaped->length() > 1 || ex->enclosed->length() > 1)
371 {
372 my_message(ER_WRONG_FIELD_TERMINATORS,
373 ER_THD(thd, ER_WRONG_FIELD_TERMINATORS),
374 MYF(0));
375 DBUG_RETURN(TRUE);
376 }
377
378 /* Report problems with non-ascii separators */
379 if (!ex->escaped->is_ascii() || !ex->enclosed->is_ascii() ||
380 !ex->field_term->is_ascii() ||
381 !ex->line_term->is_ascii() || !ex->line_start->is_ascii())
382 {
383 push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
384 WARN_NON_ASCII_SEPARATOR_NOT_IMPLEMENTED,
385 ER_THD(thd, WARN_NON_ASCII_SEPARATOR_NOT_IMPLEMENTED));
386 }
387
388 if (open_and_lock_tables(thd, table_list, TRUE, 0))
389 DBUG_RETURN(TRUE);
390 if (mysql_handle_single_derived(thd->lex, table_list, DT_MERGE_FOR_INSERT) ||
391 mysql_handle_single_derived(thd->lex, table_list, DT_PREPARE))
392 DBUG_RETURN(TRUE);
393 if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context,
394 &thd->lex->select_lex.top_join_list,
395 table_list,
396 thd->lex->select_lex.leaf_tables, FALSE,
397 INSERT_ACL | UPDATE_ACL,
398 INSERT_ACL | UPDATE_ACL, FALSE))
399 DBUG_RETURN(-1);
400 if (!table_list->table || // do not suport join view
401 !table_list->single_table_updatable() || // and derived tables
402 check_key_in_view(thd, table_list))
403 {
404 my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias.str, "LOAD");
405 DBUG_RETURN(TRUE);
406 }
407 if (table_list->prepare_where(thd, 0, TRUE) ||
408 table_list->prepare_check_option(thd))
409 {
410 DBUG_RETURN(TRUE);
411 }
412 thd_proc_info(thd, "Executing");
413 /*
414 Let us emit an error if we are loading data to table which is used
415 in subselect in SET clause like we do it for INSERT.
416
417 The main thing to fix to remove this restriction is to ensure that the
418 table is marked to be 'used for insert' in which case we should never
419 mark this table as 'const table' (ie, one that has only one row).
420 */
421 if (unique_table(thd, table_list, table_list->next_global, 0))
422 {
423 my_error(ER_UPDATE_TABLE_USED, MYF(0), table_list->table_name.str,
424 "LOAD DATA");
425 DBUG_RETURN(TRUE);
426 }
427
428 table= table_list->table;
429 transactional_table= table->file->has_transactions();
430#ifndef EMBEDDED_LIBRARY
431 is_concurrent= (table_list->lock_type == TL_WRITE_CONCURRENT_INSERT);
432#endif
433
434 if (table->versioned(VERS_TIMESTAMP) && handle_duplicates == DUP_REPLACE)
435 {
436 // Additional memory may be required to create historical items.
437 if (table_list->set_insert_values(thd->mem_root))
438 DBUG_RETURN(TRUE);
439 }
440
441 if (!fields_vars.elements)
442 {
443 Field_iterator_table_ref field_iterator;
444 field_iterator.set(table_list);
445 for (; !field_iterator.end_of_fields(); field_iterator.next())
446 {
447 if (field_iterator.field() &&
448 field_iterator.field()->invisible > VISIBLE)
449 continue;
450 Item *item;
451 if (!(item= field_iterator.create_item(thd)))
452 DBUG_RETURN(TRUE);
453 fields_vars.push_back(item->real_item(), thd->mem_root);
454 }
455 bitmap_set_all(table->write_set);
456 /*
457 Let us also prepare SET clause, altough it is probably empty
458 in this case.
459 */
460 if (setup_fields(thd, Ref_ptr_array(),
461 set_fields, MARK_COLUMNS_WRITE, 0, NULL, 0) ||
462 setup_fields(thd, Ref_ptr_array(),
463 set_values, MARK_COLUMNS_READ, 0, NULL, 0))
464 DBUG_RETURN(TRUE);
465 }
466 else
467 { // Part field list
468 /* TODO: use this conds for 'WITH CHECK OPTIONS' */
469 if (setup_fields(thd, Ref_ptr_array(),
470 fields_vars, MARK_COLUMNS_WRITE, 0, NULL, 0) ||
471 setup_fields(thd, Ref_ptr_array(),
472 set_fields, MARK_COLUMNS_WRITE, 0, NULL, 0) ||
473 check_that_all_fields_are_given_values(thd, table, table_list))
474 DBUG_RETURN(TRUE);
475 /* Fix the expressions in SET clause */
476 if (setup_fields(thd, Ref_ptr_array(),
477 set_values, MARK_COLUMNS_READ, 0, NULL, 0))
478 DBUG_RETURN(TRUE);
479 }
480 switch_to_nullable_trigger_fields(fields_vars, table);
481 switch_to_nullable_trigger_fields(set_fields, table);
482 switch_to_nullable_trigger_fields(set_values, table);
483
484 table->prepare_triggers_for_insert_stmt_or_event();
485 table->mark_columns_needed_for_insert();
486
487 Load_data_param param(ex->cs ? ex->cs : thd->variables.collation_database,
488 !ex->field_term->length() && !ex->enclosed->length());
489 List_iterator_fast<Item> it(fields_vars);
490 Item *item;
491
492 while ((item= it++))
493 {
494 const Load_data_outvar *var= item->get_load_data_outvar_or_error();
495 if (!var || var->load_data_add_outvar(thd, &param))
496 DBUG_RETURN(true);
497 }
498 if (param.use_blobs() && !ex->line_term->length() && !ex->field_term->length())
499 {
500 my_message(ER_BLOBS_AND_NO_TERMINATED,
501 ER_THD(thd, ER_BLOBS_AND_NO_TERMINATED), MYF(0));
502 DBUG_RETURN(TRUE);
503 }
504
505 /* We can't give an error in the middle when using LOCAL files */
506 if (read_file_from_client && handle_duplicates == DUP_ERROR)
507 ignore= 1;
508
509#ifndef EMBEDDED_LIBRARY
510 if (read_file_from_client)
511 {
512 (void)net_request_file(&thd->net,ex->file_name);
513 file = -1;
514 }
515 else
516#endif
517 {
518#ifdef DONT_ALLOW_FULL_LOAD_DATA_PATHS
519 ex->file_name+=dirname_length(ex->file_name);
520#endif
521 if (!dirname_length(ex->file_name))
522 {
523 strxnmov(name, FN_REFLEN-1, mysql_real_data_home, tdb, NullS);
524 (void) fn_format(name, ex->file_name, name, "",
525 MY_RELATIVE_PATH | MY_UNPACK_FILENAME);
526 }
527 else
528 {
529 (void) fn_format(name, ex->file_name, mysql_real_data_home, "",
530 MY_RELATIVE_PATH | MY_UNPACK_FILENAME |
531 MY_RETURN_REAL_PATH);
532 }
533
534 if (thd->rgi_slave)
535 {
536#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
537 if (strncmp(thd->rgi_slave->rli->slave_patternload_file, name,
538 thd->rgi_slave->rli->slave_patternload_file_size))
539 {
540 /*
541 LOAD DATA INFILE in the slave SQL Thread can only read from
542 --slave-load-tmpdir". This should never happen. Please, report a bug.
543 */
544
545 sql_print_error("LOAD DATA INFILE in the slave SQL Thread can only read from --slave-load-tmpdir. " \
546 "Please, report a bug.");
547 my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--slave-load-tmpdir");
548 DBUG_RETURN(TRUE);
549 }
550#else
551 /*
552 This is impossible and should never happen.
553 */
554 DBUG_ASSERT(FALSE);
555#endif
556 }
557 else if (!is_secure_file_path(name))
558 {
559 /* Read only allowed from within dir specified by secure_file_priv */
560 my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--secure-file-priv");
561 DBUG_RETURN(TRUE);
562 }
563
564#if !defined(__WIN__) && ! defined(__NETWARE__)
565 MY_STAT stat_info;
566 if (!my_stat(name, &stat_info, MYF(MY_WME)))
567 DBUG_RETURN(TRUE);
568
569 // if we are not in slave thread, the file must be:
570 if (!thd->slave_thread &&
571 !((stat_info.st_mode & S_IFLNK) != S_IFLNK && // symlink
572 ((stat_info.st_mode & S_IFREG) == S_IFREG || // regular file
573 (stat_info.st_mode & S_IFIFO) == S_IFIFO))) // named pipe
574 {
575 my_error(ER_TEXTFILE_NOT_READABLE, MYF(0), name);
576 DBUG_RETURN(TRUE);
577 }
578 if ((stat_info.st_mode & S_IFIFO) == S_IFIFO)
579 is_fifo= 1;
580#endif
581 if ((file= mysql_file_open(key_file_load,
582 name, O_RDONLY, MYF(MY_WME))) < 0)
583
584 DBUG_RETURN(TRUE);
585 }
586
587 COPY_INFO info;
588 bzero((char*) &info,sizeof(info));
589 info.ignore= ignore;
590 info.handle_duplicates=handle_duplicates;
591 info.escape_char= (ex->escaped->length() && (ex->escaped_given() ||
592 !(thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES)))
593 ? (*ex->escaped)[0] : INT_MAX;
594
595 READ_INFO read_info(thd, file, param,
596 *ex->field_term, *ex->line_start,
597 *ex->line_term, *ex->enclosed,
598 info.escape_char, read_file_from_client, is_fifo);
599 if (unlikely(read_info.error))
600 {
601 if (file >= 0)
602 mysql_file_close(file, MYF(0)); // no files in net reading
603 DBUG_RETURN(TRUE); // Can't allocate buffers
604 }
605
606#ifndef EMBEDDED_LIBRARY
607 if (mysql_bin_log.is_open())
608 {
609 read_info.cache.thd = thd;
610 read_info.cache.wrote_create_file = 0;
611 read_info.cache.last_pos_in_file = HA_POS_ERROR;
612 read_info.cache.log_delayed= transactional_table;
613 }
614#endif /*!EMBEDDED_LIBRARY*/
615
616 thd->count_cuted_fields= CHECK_FIELD_WARN; /* calc cuted fields */
617 thd->cuted_fields=0L;
618 /* Skip lines if there is a line terminator */
619 if (ex->line_term->length() && ex->filetype != FILETYPE_XML)
620 {
621 /* ex->skip_lines needs to be preserved for logging */
622 while (skip_lines > 0)
623 {
624 skip_lines--;
625 if (read_info.next_line())
626 break;
627 }
628 }
629
630 thd_proc_info(thd, "Reading file");
631 if (likely(!(error= MY_TEST(read_info.error))))
632 {
633 table->reset_default_fields();
634 table->next_number_field=table->found_next_number_field;
635 if (ignore ||
636 handle_duplicates == DUP_REPLACE)
637 table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
638 if (handle_duplicates == DUP_REPLACE &&
639 (!table->triggers ||
640 !table->triggers->has_delete_triggers()))
641 table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE);
642 if (thd->locked_tables_mode <= LTM_LOCK_TABLES)
643 table->file->ha_start_bulk_insert((ha_rows) 0);
644 table->copy_blobs=1;
645
646 thd->abort_on_warning= !ignore && thd->is_strict_mode();
647
648 thd_progress_init(thd, 2);
649 if (table_list->table->validate_default_values_of_unset_fields(thd))
650 {
651 read_info.error= true;
652 error= 1;
653 }
654 else if (ex->filetype == FILETYPE_XML) /* load xml */
655 error= read_xml_field(thd, info, table_list, fields_vars,
656 set_fields, set_values, read_info,
657 *(ex->line_term), skip_lines, ignore);
658 else if (read_info.is_fixed_length())
659 error= read_fixed_length(thd, info, table_list, fields_vars,
660 set_fields, set_values, read_info,
661 skip_lines, ignore);
662 else
663 error= read_sep_field(thd, info, table_list, fields_vars,
664 set_fields, set_values, read_info,
665 *ex->enclosed, skip_lines, ignore);
666
667 thd_proc_info(thd, "End bulk insert");
668 if (likely(!error))
669 thd_progress_next_stage(thd);
670 if (thd->locked_tables_mode <= LTM_LOCK_TABLES &&
671 table->file->ha_end_bulk_insert() && !error)
672 {
673 table->file->print_error(my_errno, MYF(0));
674 error= 1;
675 }
676 table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
677 table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
678 table->next_number_field=0;
679 }
680 if (file >= 0)
681 mysql_file_close(file, MYF(0));
682 free_blobs(table); /* if pack_blob was used */
683 table->copy_blobs=0;
684 thd->count_cuted_fields= CHECK_FIELD_IGNORE;
685 /*
686 simulated killing in the middle of per-row loop
687 must be effective for binlogging
688 */
689 DBUG_EXECUTE_IF("simulate_kill_bug27571",
690 {
691 error=1;
692 thd->set_killed(KILL_QUERY);
693 };);
694
695#ifndef EMBEDDED_LIBRARY
696 killed_status= (error == 0) ? NOT_KILLED : thd->killed;
697#endif
698
699 /*
700 We must invalidate the table in query cache before binlog writing and
701 ha_autocommit_...
702 */
703 query_cache_invalidate3(thd, table_list, 0);
704 if (error)
705 {
706 if (read_file_from_client)
707 read_info.skip_data_till_eof();
708
709#ifndef EMBEDDED_LIBRARY
710 if (mysql_bin_log.is_open())
711 {
712 {
713 /*
714 Make sure last block (the one which caused the error) gets
715 logged.
716 */
717 log_loaded_block(&read_info.cache, 0, 0);
718 /* If the file was not empty, wrote_create_file is true */
719 if (read_info.cache.wrote_create_file)
720 {
721 int errcode= query_error_code(thd, killed_status == NOT_KILLED);
722
723 /* since there is already an error, the possible error of
724 writing binary log will be ignored */
725 if (thd->transaction.stmt.modified_non_trans_table)
726 (void) write_execute_load_query_log_event(thd, ex,
727 table_list->db.str,
728 table_list->table_name.str,
729 is_concurrent,
730 handle_duplicates, ignore,
731 transactional_table,
732 errcode);
733 else
734 {
735 Delete_file_log_event d(thd, db, transactional_table);
736 (void) mysql_bin_log.write(&d);
737 }
738 }
739 }
740 }
741#endif /*!EMBEDDED_LIBRARY*/
742 error= -1; // Error on read
743 goto err;
744 }
745 sprintf(name, ER_THD(thd, ER_LOAD_INFO),
746 (ulong) info.records, (ulong) info.deleted,
747 (ulong) (info.records - info.copied),
748 (long) thd->get_stmt_da()->current_statement_warn_count());
749
750 if (thd->transaction.stmt.modified_non_trans_table)
751 thd->transaction.all.modified_non_trans_table= TRUE;
752 thd->transaction.all.m_unsafe_rollback_flags|=
753 (thd->transaction.stmt.m_unsafe_rollback_flags & THD_TRANS::DID_WAIT);
754#ifndef EMBEDDED_LIBRARY
755 if (mysql_bin_log.is_open())
756 {
757 /*
758 We need to do the job that is normally done inside
759 binlog_query() here, which is to ensure that the pending event
760 is written before tables are unlocked and before any other
761 events are written. We also need to update the table map
762 version for the binary log to mark that table maps are invalid
763 after this point.
764 */
765 if (thd->is_current_stmt_binlog_format_row())
766 error= thd->binlog_flush_pending_rows_event(TRUE, transactional_table);
767 else
768 {
769 /*
770 As already explained above, we need to call log_loaded_block() to have
771 the last block logged
772 */
773 log_loaded_block(&read_info.cache, 0, 0);
774 if (read_info.cache.wrote_create_file)
775 {
776 int errcode= query_error_code(thd, killed_status == NOT_KILLED);
777 error= write_execute_load_query_log_event(thd, ex,
778 table_list->db.str,
779 table_list->table_name.str,
780 is_concurrent,
781 handle_duplicates, ignore,
782 transactional_table,
783 errcode);
784 }
785
786 /*
787 Flushing the IO CACHE while writing the execute load query log event
788 may result in error (for instance, because the max_binlog_size has been
789 reached, and rotation of the binary log failed).
790 */
791 error= error || mysql_bin_log.get_log_file()->error;
792 }
793 if (unlikely(error))
794 goto err;
795 }
796#endif /*!EMBEDDED_LIBRARY*/
797
798 /* ok to client sent only after binlog write and engine commit */
799 my_ok(thd, info.copied + info.deleted, 0L, name);
800err:
801 DBUG_ASSERT(transactional_table || !(info.copied || info.deleted) ||
802 thd->transaction.stmt.modified_non_trans_table);
803 table->file->ha_release_auto_increment();
804 table->auto_increment_field_not_null= FALSE;
805 thd->abort_on_warning= 0;
806 DBUG_RETURN(error);
807}
808
809
810#ifndef EMBEDDED_LIBRARY
811
812/* Not a very useful function; just to avoid duplication of code */
813static bool write_execute_load_query_log_event(THD *thd, const sql_exchange* ex,
814 const char* db_arg, /* table's database */
815 const char* table_name_arg,
816 bool is_concurrent,
817 enum enum_duplicates duplicates,
818 bool ignore,
819 bool transactional_table,
820 int errcode)
821{
822 char *load_data_query;
823 my_off_t fname_start,
824 fname_end;
825 List<Item> fv;
826 Item *item, *val;
827 int n;
828 const char *tdb= (thd->db.str != NULL ? thd->db.str : db_arg);
829 const char *qualify_db= NULL;
830 char command_buffer[1024];
831 String query_str(command_buffer, sizeof(command_buffer),
832 system_charset_info);
833
834 Load_log_event lle(thd, ex, tdb, table_name_arg, fv, is_concurrent,
835 duplicates, ignore, transactional_table);
836
837 /*
838 force in a LOCAL if there was one in the original.
839 */
840 if (thd->lex->local_file)
841 lle.set_fname_outside_temp_buf(ex->file_name, strlen(ex->file_name));
842
843 query_str.length(0);
844 if (!thd->db.str || strcmp(db_arg, thd->db.str))
845 {
846 /*
847 If used database differs from table's database,
848 prefix table name with database name so that it
849 becomes a FQ name.
850 */
851 qualify_db= db_arg;
852 }
853 lle.print_query(thd, FALSE, (const char *) ex->cs?ex->cs->csname:NULL,
854 &query_str, &fname_start, &fname_end, qualify_db);
855
856 /*
857 prepare fields-list and SET if needed; print_query won't do that for us.
858 */
859 if (!thd->lex->field_list.is_empty())
860 {
861 List_iterator<Item> li(thd->lex->field_list);
862
863 query_str.append(" (");
864 n= 0;
865
866 while ((item= li++))
867 {
868 if (n++)
869 query_str.append(", ");
870 const Load_data_outvar *var= item->get_load_data_outvar();
871 DBUG_ASSERT(var);
872 var->load_data_print_for_log_event(thd, &query_str);
873 }
874 query_str.append(")");
875 }
876
877 if (!thd->lex->update_list.is_empty())
878 {
879 List_iterator<Item> lu(thd->lex->update_list);
880 List_iterator<Item> lv(thd->lex->value_list);
881
882 query_str.append(STRING_WITH_LEN(" SET "));
883 n= 0;
884
885 while ((item= lu++))
886 {
887 val= lv++;
888 if (n++)
889 query_str.append(STRING_WITH_LEN(", "));
890 append_identifier(thd, &query_str, &item->name);
891 query_str.append(&val->name);
892 }
893 }
894
895 if (!(load_data_query= (char *)thd->strmake(query_str.ptr(), query_str.length())))
896 return TRUE;
897
898 Execute_load_query_log_event
899 e(thd, load_data_query, query_str.length(),
900 (uint) (fname_start - 1), (uint) fname_end,
901 (duplicates == DUP_REPLACE) ? LOAD_DUP_REPLACE :
902 (ignore ? LOAD_DUP_IGNORE : LOAD_DUP_ERROR),
903 transactional_table, FALSE, FALSE, errcode);
904 return mysql_bin_log.write(&e);
905}
906
907#endif
908
909/****************************************************************************
910** Read of rows of fixed size + optional garage + optonal newline
911****************************************************************************/
912
913static int
914read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
915 List<Item> &fields_vars, List<Item> &set_fields,
916 List<Item> &set_values, READ_INFO &read_info,
917 ulong skip_lines, bool ignore_check_option_errors)
918{
919 List_iterator_fast<Item> it(fields_vars);
920 Item *item;
921 TABLE *table= table_list->table;
922 bool err, progress_reports;
923 ulonglong counter, time_to_report_progress;
924 DBUG_ENTER("read_fixed_length");
925
926 counter= 0;
927 time_to_report_progress= MY_HOW_OFTEN_TO_WRITE/10;
928 progress_reports= 1;
929 if ((thd->progress.max_counter= read_info.file_length()) == ~(my_off_t) 0)
930 progress_reports= 0;
931
932 while (!read_info.read_fixed_length())
933 {
934 if (thd->killed)
935 {
936 thd->send_kill_message();
937 DBUG_RETURN(1);
938 }
939 if (progress_reports)
940 {
941 thd->progress.counter= read_info.position();
942 if (++counter >= time_to_report_progress)
943 {
944 time_to_report_progress+= MY_HOW_OFTEN_TO_WRITE/10;
945 thd_progress_report(thd, thd->progress.counter,
946 thd->progress.max_counter);
947 }
948 }
949 if (skip_lines)
950 {
951 /*
952 We could implement this with a simple seek if:
953 - We are not using DATA INFILE LOCAL
954 - escape character is ""
955 - line starting prefix is ""
956 */
957 skip_lines--;
958 continue;
959 }
960 it.rewind();
961 uchar *pos=read_info.row_start;
962#ifdef HAVE_valgrind
963 read_info.row_end[0]=0;
964#endif
965
966 restore_record(table, s->default_values);
967
968 while ((item= it++))
969 {
970 Load_data_outvar *dst= item->get_load_data_outvar();
971 DBUG_ASSERT(dst);
972 if (pos == read_info.row_end)
973 {
974 if (dst->load_data_set_no_data(thd, &read_info))
975 DBUG_RETURN(1);
976 }
977 else
978 {
979 uint length, fixed_length= dst->load_data_fixed_length();
980 uchar save_chr;
981 if ((length=(uint) (read_info.row_end - pos)) > fixed_length)
982 length= fixed_length;
983 save_chr= pos[length]; pos[length]= '\0'; // Safeguard aganst malloc
984 dst->load_data_set_value(thd, (const char *) pos, length, &read_info);
985 pos[length]= save_chr;
986 if ((pos+= length) > read_info.row_end)
987 pos= read_info.row_end; // Fills rest with space
988 }
989 }
990 if (pos != read_info.row_end)
991 {
992 thd->cuted_fields++; /* To long row */
993 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
994 ER_WARN_TOO_MANY_RECORDS,
995 ER_THD(thd, ER_WARN_TOO_MANY_RECORDS),
996 thd->get_stmt_da()->current_row_for_warning());
997 }
998
999 if (thd->killed ||
1000 fill_record_n_invoke_before_triggers(thd, table, set_fields, set_values,
1001 ignore_check_option_errors,
1002 TRG_EVENT_INSERT))
1003 DBUG_RETURN(1);
1004
1005 switch (table_list->view_check_option(thd, ignore_check_option_errors)) {
1006 case VIEW_CHECK_SKIP:
1007 read_info.next_line();
1008 goto continue_loop;
1009 case VIEW_CHECK_ERROR:
1010 DBUG_RETURN(-1);
1011 }
1012
1013 WSREP_LOAD_DATA_SPLIT(thd, table, info);
1014 err= write_record(thd, table, &info);
1015 table->auto_increment_field_not_null= FALSE;
1016 if (err)
1017 DBUG_RETURN(1);
1018
1019 /*
1020 We don't need to reset auto-increment field since we are restoring
1021 its default value at the beginning of each loop iteration.
1022 */
1023 if (read_info.next_line()) // Skip to next line
1024 break;
1025 if (read_info.line_cuted)
1026 {
1027 thd->cuted_fields++; /* To long row */
1028 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
1029 ER_WARN_TOO_MANY_RECORDS,
1030 ER_THD(thd, ER_WARN_TOO_MANY_RECORDS),
1031 thd->get_stmt_da()->current_row_for_warning());
1032 }
1033 thd->get_stmt_da()->inc_current_row_for_warning();
1034continue_loop:;
1035 }
1036 DBUG_RETURN(MY_TEST(read_info.error));
1037}
1038
1039
1040static int
1041read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
1042 List<Item> &fields_vars, List<Item> &set_fields,
1043 List<Item> &set_values, READ_INFO &read_info,
1044 String &enclosed, ulong skip_lines,
1045 bool ignore_check_option_errors)
1046{
1047 List_iterator_fast<Item> it(fields_vars);
1048 Item *item;
1049 TABLE *table= table_list->table;
1050 uint enclosed_length;
1051 bool err, progress_reports;
1052 ulonglong counter, time_to_report_progress;
1053 DBUG_ENTER("read_sep_field");
1054
1055 enclosed_length=enclosed.length();
1056
1057 counter= 0;
1058 time_to_report_progress= MY_HOW_OFTEN_TO_WRITE/10;
1059 progress_reports= 1;
1060 if ((thd->progress.max_counter= read_info.file_length()) == ~(my_off_t) 0)
1061 progress_reports= 0;
1062
1063 for (;;it.rewind())
1064 {
1065 if (thd->killed)
1066 {
1067 thd->send_kill_message();
1068 DBUG_RETURN(1);
1069 }
1070
1071 if (progress_reports)
1072 {
1073 thd->progress.counter= read_info.position();
1074 if (++counter >= time_to_report_progress)
1075 {
1076 time_to_report_progress+= MY_HOW_OFTEN_TO_WRITE/10;
1077 thd_progress_report(thd, thd->progress.counter,
1078 thd->progress.max_counter);
1079 }
1080 }
1081 restore_record(table, s->default_values);
1082
1083 while ((item= it++))
1084 {
1085 uint length;
1086 uchar *pos;
1087 if (read_info.read_field())
1088 break;
1089
1090 /* If this line is to be skipped we don't want to fill field or var */
1091 if (skip_lines)
1092 continue;
1093
1094 pos=read_info.row_start;
1095 length=(uint) (read_info.row_end-pos);
1096
1097 Load_data_outvar *dst= item->get_load_data_outvar_or_error();
1098 DBUG_ASSERT(dst);
1099
1100 if ((!read_info.enclosed &&
1101 (enclosed_length && length == 4 &&
1102 !memcmp(pos, STRING_WITH_LEN("NULL")))) ||
1103 (length == 1 && read_info.found_null))
1104 {
1105 if (dst->load_data_set_null(thd, &read_info))
1106 DBUG_RETURN(1);
1107 }
1108 else
1109 {
1110 read_info.row_end[0]= 0; // Safe to change end marker
1111 if (dst->load_data_set_value(thd, (const char *) pos, length, &read_info))
1112 DBUG_RETURN(1);
1113 }
1114 }
1115
1116 if (unlikely(thd->is_error()))
1117 read_info.error= 1;
1118 if (unlikely(read_info.error))
1119 break;
1120
1121 if (skip_lines)
1122 {
1123 skip_lines--;
1124 continue;
1125 }
1126 if (item)
1127 {
1128 /* Have not read any field, thus input file is simply ended */
1129 if (item == fields_vars.head())
1130 break;
1131 for (; item ; item= it++)
1132 {
1133 Load_data_outvar *dst= item->get_load_data_outvar_or_error();
1134 DBUG_ASSERT(dst);
1135 if (unlikely(dst->load_data_set_no_data(thd, &read_info)))
1136 DBUG_RETURN(1);
1137 }
1138 }
1139
1140 if (unlikely(thd->killed) ||
1141 unlikely(fill_record_n_invoke_before_triggers(thd, table, set_fields,
1142 set_values,
1143 ignore_check_option_errors,
1144 TRG_EVENT_INSERT)))
1145 DBUG_RETURN(1);
1146
1147 switch (table_list->view_check_option(thd,
1148 ignore_check_option_errors)) {
1149 case VIEW_CHECK_SKIP:
1150 read_info.next_line();
1151 goto continue_loop;
1152 case VIEW_CHECK_ERROR:
1153 DBUG_RETURN(-1);
1154 }
1155
1156 WSREP_LOAD_DATA_SPLIT(thd, table, info);
1157 err= write_record(thd, table, &info);
1158 table->auto_increment_field_not_null= FALSE;
1159 if (err)
1160 DBUG_RETURN(1);
1161 /*
1162 We don't need to reset auto-increment field since we are restoring
1163 its default value at the beginning of each loop iteration.
1164 */
1165 if (read_info.next_line()) // Skip to next line
1166 break;
1167 if (read_info.line_cuted)
1168 {
1169 thd->cuted_fields++; /* To long row */
1170 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
1171 ER_WARN_TOO_MANY_RECORDS,
1172 ER_THD(thd, ER_WARN_TOO_MANY_RECORDS),
1173 thd->get_stmt_da()->current_row_for_warning());
1174 if (thd->killed)
1175 DBUG_RETURN(1);
1176 }
1177 thd->get_stmt_da()->inc_current_row_for_warning();
1178continue_loop:;
1179 }
1180 DBUG_RETURN(MY_TEST(read_info.error));
1181}
1182
1183
1184/****************************************************************************
1185** Read rows in xml format
1186****************************************************************************/
1187static int
1188read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
1189 List<Item> &fields_vars, List<Item> &set_fields,
1190 List<Item> &set_values, READ_INFO &read_info,
1191 String &row_tag, ulong skip_lines,
1192 bool ignore_check_option_errors)
1193{
1194 List_iterator_fast<Item> it(fields_vars);
1195 Item *item;
1196 TABLE *table= table_list->table;
1197 bool no_trans_update_stmt;
1198 DBUG_ENTER("read_xml_field");
1199
1200 no_trans_update_stmt= !table->file->has_transactions();
1201
1202 for ( ; ; it.rewind())
1203 {
1204 bool err;
1205 if (thd->killed)
1206 {
1207 thd->send_kill_message();
1208 DBUG_RETURN(1);
1209 }
1210
1211 // read row tag and save values into tag list
1212 if (read_info.read_xml(thd))
1213 break;
1214
1215 List_iterator_fast<XML_TAG> xmlit(read_info.taglist);
1216 xmlit.rewind();
1217 XML_TAG *tag= NULL;
1218
1219#ifndef DBUG_OFF
1220 DBUG_PRINT("read_xml_field", ("skip_lines=%d", (int) skip_lines));
1221 while ((tag= xmlit++))
1222 {
1223 DBUG_PRINT("read_xml_field", ("got tag:%i '%s' '%s'",
1224 tag->level, tag->field.c_ptr(),
1225 tag->value.c_ptr()));
1226 }
1227#endif
1228
1229 restore_record(table, s->default_values);
1230
1231 while ((item= it++))
1232 {
1233 /* If this line is to be skipped we don't want to fill field or var */
1234 if (skip_lines)
1235 continue;
1236
1237 /* find field in tag list */
1238 xmlit.rewind();
1239 tag= xmlit++;
1240
1241 while(tag && strcmp(tag->field.c_ptr(), item->name.str) != 0)
1242 tag= xmlit++;
1243
1244 Load_data_outvar *dst= item->get_load_data_outvar_or_error();
1245 DBUG_ASSERT(dst);
1246 if (!tag ? dst->load_data_set_null(thd, &read_info) :
1247 dst->load_data_set_value(thd, tag->value.ptr(),
1248 tag->value.length(),
1249 &read_info))
1250 DBUG_RETURN(1);
1251 }
1252
1253 if (unlikely(read_info.error))
1254 break;
1255
1256 if (skip_lines)
1257 {
1258 skip_lines--;
1259 continue;
1260 }
1261
1262 DBUG_ASSERT(!item);
1263
1264 if (thd->killed ||
1265 fill_record_n_invoke_before_triggers(thd, table, set_fields, set_values,
1266 ignore_check_option_errors,
1267 TRG_EVENT_INSERT))
1268 DBUG_RETURN(1);
1269
1270 switch (table_list->view_check_option(thd,
1271 ignore_check_option_errors)) {
1272 case VIEW_CHECK_SKIP:
1273 read_info.next_line();
1274 goto continue_loop;
1275 case VIEW_CHECK_ERROR:
1276 DBUG_RETURN(-1);
1277 }
1278
1279 WSREP_LOAD_DATA_SPLIT(thd, table, info);
1280 err= write_record(thd, table, &info);
1281 table->auto_increment_field_not_null= false;
1282 if (err)
1283 DBUG_RETURN(1);
1284
1285 /*
1286 We don't need to reset auto-increment field since we are restoring
1287 its default value at the beginning of each loop iteration.
1288 */
1289 thd->transaction.stmt.modified_non_trans_table= no_trans_update_stmt;
1290 thd->get_stmt_da()->inc_current_row_for_warning();
1291 continue_loop:;
1292 }
1293 DBUG_RETURN(MY_TEST(read_info.error) || thd->is_error());
1294} /* load xml end */
1295
1296
1297/* Unescape all escape characters, mark \N as null */
1298
1299char
1300READ_INFO::unescape(char chr)
1301{
1302 /* keep this switch synchornous with the ESCAPE_CHARS macro */
1303 switch(chr) {
1304 case 'n': return '\n';
1305 case 't': return '\t';
1306 case 'r': return '\r';
1307 case 'b': return '\b';
1308 case '0': return 0; // Ascii null
1309 case 'Z': return '\032'; // Win32 end of file
1310 case 'N': found_null=1;
1311
1312 /* fall through */
1313 default: return chr;
1314 }
1315}
1316
1317
1318/*
1319 Read a line using buffering
1320 If last line is empty (in line mode) then it isn't outputed
1321*/
1322
1323
1324READ_INFO::READ_INFO(THD *thd, File file_par,
1325 const Load_data_param &param,
1326 String &field_term, String &line_start, String &line_term,
1327 String &enclosed_par, int escape, bool get_it_from_net,
1328 bool is_fifo)
1329 :Load_data_param(param),
1330 file(file_par),
1331 m_field_term(field_term), m_line_term(line_term), m_line_start(line_start),
1332 escape_char(escape), found_end_of_line(false), eof(false),
1333 error(false), line_cuted(false), found_null(false)
1334{
1335 data.set_thread_specific();
1336 /*
1337 Field and line terminators must be interpreted as sequence of unsigned char.
1338 Otherwise, non-ascii terminators will be negative on some platforms,
1339 and positive on others (depending on the implementation of char).
1340 */
1341
1342 level= 0; /* for load xml */
1343 start_of_line= line_start.length() != 0;
1344 /* If field_terminator == line_terminator, don't use line_terminator */
1345 if (m_field_term.eq(m_line_term))
1346 m_line_term.reset();
1347 enclosed_char= enclosed_par.length() ? (uchar) enclosed_par[0] : INT_MAX;
1348
1349 /* Set of a stack for unget if long terminators */
1350 uint length= MY_MAX(charset()->mbmaxlen, MY_MAX(m_field_term.length(),
1351 m_line_term.length())) + 1;
1352 set_if_bigger(length,line_start.length());
1353 stack= stack_pos= (int*) thd->alloc(sizeof(int) * length);
1354
1355 DBUG_ASSERT(m_fixed_length < UINT_MAX32);
1356 if (data.reserve((size_t) m_fixed_length))
1357 error=1; /* purecov: inspected */
1358 else
1359 {
1360 if (init_io_cache(&cache,(get_it_from_net) ? -1 : file, 0,
1361 (get_it_from_net) ? READ_NET :
1362 (is_fifo ? READ_FIFO : READ_CACHE),0L,1,
1363 MYF(MY_WME | MY_THREAD_SPECIFIC)))
1364 {
1365 error=1;
1366 }
1367 else
1368 {
1369#ifndef EMBEDDED_LIBRARY
1370 if (get_it_from_net)
1371 cache.read_function = _my_b_net_read;
1372
1373 if (mysql_bin_log.is_open())
1374 {
1375 cache.real_read_function= cache.read_function;
1376 cache.read_function= log_loaded_block;
1377 }
1378#endif
1379 }
1380 }
1381}
1382
1383
1384READ_INFO::~READ_INFO()
1385{
1386 ::end_io_cache(&cache);
1387 List_iterator<XML_TAG> xmlit(taglist);
1388 XML_TAG *t;
1389 while ((t= xmlit++))
1390 delete(t);
1391}
1392
1393
1394inline bool READ_INFO::terminator(const uchar *ptr, uint length)
1395{
1396 int chr=0; // Keep gcc happy
1397 uint i;
1398 for (i=1 ; i < length ; i++)
1399 {
1400 if ((chr=GET) != *(uchar*)++ptr)
1401 {
1402 break;
1403 }
1404 }
1405 if (i == length)
1406 return true;
1407 PUSH(chr);
1408 while (i-- > 1)
1409 PUSH(*--ptr);
1410 return false;
1411}
1412
1413
1414/**
1415 Read a field.
1416
1417 The data in the loaded file was presumably escaped using
1418 - either select_export::send_data() OUTFILE
1419 - or mysql_real_escape_string()
1420 using the same character set with the one specified in the current
1421 "LOAD DATA INFILE ... CHARACTER SET ..." (or the default LOAD character set).
1422
1423 Note, non-escaped multi-byte characters are scanned as a single entity.
1424 This is needed to correctly distinguish between:
1425 - 0x5C as an escape character versus
1426 - 0x5C as the second byte in a multi-byte sequence (big5, cp932, gbk, sjis)
1427
1428 Parts of escaped multi-byte characters are scanned on different loop
1429 iterations. See the comment about 0x5C handling in select_export::send_data()
1430 in sql_class.cc.
1431
1432 READ_INFO::read_field() does not check wellformedness.
1433 Raising wellformedness errors or warnings in READ_INFO::read_field()
1434 would be wrong, as the data after unescaping can go into a BLOB field,
1435 or into a TEXT/VARCHAR field of a different character set.
1436 The loop below only makes sure to revert escaping made by
1437 select_export::send_data() or mysql_real_escape_string().
1438 Wellformedness is checked later, during Field::store(str,length,cs) time.
1439
1440 Note, in some cases users can supply data which did not go through
1441 escaping properly. For example, utf8 "\<C3><A4>"
1442 (backslash followed by LATIN SMALL LETTER A WITH DIAERESIS)
1443 is improperly escaped data that could not be generated by
1444 select_export::send_data() / mysql_real_escape_string():
1445 - either there should be two backslashes: "\\<C3><A4>"
1446 - or there should be no backslashes at all: "<C3><A4>"
1447 "\<C3>" and "<A4> are scanned on two different loop iterations and
1448 store "<C3><A4>" into the field.
1449
1450 Note, adding useless escapes before multi-byte characters like in the
1451 example above is safe in case of utf8, but is not safe in case of
1452 character sets that have escape_with_backslash_is_dangerous==TRUE,
1453 such as big5, cp932, gbk, sjis. This can lead to mis-interpretation of the
1454 data. Suppose we have a big5 character "<EE><5C>" followed by <30> (digit 0).
1455 If we add an extra escape before this sequence, then we'll get
1456 <5C><EE><5C><30>. The first loop iteration will turn <5C><EE> into <EE>.
1457 The second loop iteration will turn <5C><30> into <30>.
1458 So the program that generates a dump file for further use with LOAD DATA
1459 must make sure to use escapes properly.
1460*/
1461
1462int READ_INFO::read_field()
1463{
1464 int chr,found_enclosed_char;
1465
1466 found_null=0;
1467 if (found_end_of_line)
1468 return 1; // One have to call next_line
1469
1470 /* Skip until we find 'line_start' */
1471
1472 if (start_of_line)
1473 { // Skip until line_start
1474 start_of_line=0;
1475 if (find_start_of_fields())
1476 return 1;
1477 }
1478 if ((chr=GET) == my_b_EOF)
1479 {
1480 found_end_of_line=eof=1;
1481 return 1;
1482 }
1483 data.length(0);
1484 if (chr == enclosed_char)
1485 {
1486 found_enclosed_char=enclosed_char;
1487 data.append(chr); // If error
1488 }
1489 else
1490 {
1491 found_enclosed_char= INT_MAX;
1492 PUSH(chr);
1493 }
1494
1495 for (;;)
1496 {
1497 // Make sure we have enough space for the longest multi-byte character.
1498 while (data.length() + charset()->mbmaxlen <= data.alloced_length())
1499 {
1500 chr = GET;
1501 if (chr == my_b_EOF)
1502 goto found_eof;
1503 if (chr == escape_char)
1504 {
1505 if ((chr=GET) == my_b_EOF)
1506 {
1507 data.append(escape_char);
1508 goto found_eof;
1509 }
1510 /*
1511 When escape_char == enclosed_char, we treat it like we do for
1512 handling quotes in SQL parsing -- you can double-up the
1513 escape_char to include it literally, but it doesn't do escapes
1514 like \n. This allows: LOAD DATA ... ENCLOSED BY '"' ESCAPED BY '"'
1515 with data like: "fie""ld1", "field2"
1516 */
1517 if (escape_char != enclosed_char || chr == escape_char)
1518 {
1519 data.append(unescape((char) chr));
1520 continue;
1521 }
1522 PUSH(chr);
1523 chr= escape_char;
1524 }
1525#ifdef ALLOW_LINESEPARATOR_IN_STRINGS
1526 if (chr == m_line_term.initial_byte())
1527#else
1528 if (chr == m_line_term.initial_byte() && found_enclosed_char == INT_MAX)
1529#endif
1530 {
1531 if (terminator(m_line_term))
1532 { // Maybe unexpected linefeed
1533 enclosed=0;
1534 found_end_of_line=1;
1535 row_start= (uchar *) data.ptr();
1536 row_end= (uchar *) data.end();
1537 return 0;
1538 }
1539 }
1540 if (chr == found_enclosed_char)
1541 {
1542 if ((chr=GET) == found_enclosed_char)
1543 { // Remove dupplicated
1544 data.append(chr);
1545 continue;
1546 }
1547 // End of enclosed field if followed by field_term or line_term
1548 if (chr == my_b_EOF || terminator(chr, m_line_term))
1549 {
1550 /* Maybe unexpected linefeed */
1551 enclosed=1;
1552 found_end_of_line=1;
1553 row_start= (uchar *) data.ptr() + 1;
1554 row_end= (uchar *) data.end();
1555 return 0;
1556 }
1557 if (terminator(chr, m_field_term))
1558 {
1559 enclosed=1;
1560 row_start= (uchar *) data.ptr() + 1;
1561 row_end= (uchar *) data.end();
1562 return 0;
1563 }
1564 /*
1565 The string didn't terminate yet.
1566 Store back next character for the loop
1567 */
1568 PUSH(chr);
1569 /* copy the found term character to 'to' */
1570 chr= found_enclosed_char;
1571 }
1572 else if (chr == m_field_term.initial_byte() &&
1573 found_enclosed_char == INT_MAX)
1574 {
1575 if (terminator(m_field_term))
1576 {
1577 enclosed=0;
1578 row_start= (uchar *) data.ptr();
1579 row_end= (uchar *) data.end();
1580 return 0;
1581 }
1582 }
1583 data.append(chr);
1584 if (use_mb(charset()) && read_mbtail(&data))
1585 goto found_eof;
1586 }
1587 /*
1588 ** We come here if buffer is too small. Enlarge it and continue
1589 */
1590 if (data.reserve(IO_SIZE))
1591 return (error= 1);
1592 }
1593
1594found_eof:
1595 enclosed=0;
1596 found_end_of_line=eof=1;
1597 row_start= (uchar *) data.ptr();
1598 row_end= (uchar *) data.end();
1599 return 0;
1600}
1601
1602/*
1603 Read a row with fixed length.
1604
1605 NOTES
1606 The row may not be fixed size on disk if there are escape
1607 characters in the file.
1608
1609 IMPLEMENTATION NOTE
1610 One can't use fixed length with multi-byte charset **
1611
1612 RETURN
1613 0 ok
1614 1 error
1615*/
1616
1617int READ_INFO::read_fixed_length()
1618{
1619 int chr;
1620 if (found_end_of_line)
1621 return 1; // One have to call next_line
1622
1623 if (start_of_line)
1624 { // Skip until line_start
1625 start_of_line=0;
1626 if (find_start_of_fields())
1627 return 1;
1628 }
1629
1630 for (data.length(0); data.length() < m_fixed_length ; )
1631 {
1632 if ((chr=GET) == my_b_EOF)
1633 goto found_eof;
1634 if (chr == escape_char)
1635 {
1636 if ((chr=GET) == my_b_EOF)
1637 {
1638 data.append(escape_char);
1639 goto found_eof;
1640 }
1641 data.append((uchar) unescape((char) chr));
1642 continue;
1643 }
1644 if (terminator(chr, m_line_term))
1645 { // Maybe unexpected linefeed
1646 found_end_of_line= true;
1647 break;
1648 }
1649 data.append(chr);
1650 }
1651 row_start= (uchar *) data.ptr();
1652 row_end= (uchar *) data.end(); // Found full line
1653 return 0;
1654
1655found_eof:
1656 found_end_of_line=eof=1;
1657 row_start= (uchar *) data.ptr();
1658 row_end= (uchar *) data.end();
1659 return data.length() == 0 ? 1 : 0;
1660}
1661
1662
1663int READ_INFO::next_line()
1664{
1665 line_cuted=0;
1666 start_of_line= m_line_start.length() != 0;
1667 if (found_end_of_line || eof)
1668 {
1669 found_end_of_line=0;
1670 return eof;
1671 }
1672 found_end_of_line=0;
1673 if (!m_line_term.length())
1674 return 0; // No lines
1675 for (;;)
1676 {
1677 int chlen;
1678 char buf[MY_CS_MBMAXLEN];
1679
1680 if (getbyte(&buf[0]))
1681 return 1; // EOF
1682
1683 if (use_mb(charset()) &&
1684 (chlen= my_charlen(charset(), buf, buf + 1)) != 1)
1685 {
1686 uint i;
1687 for (i= 1; MY_CS_IS_TOOSMALL(chlen); )
1688 {
1689 DBUG_ASSERT(i < sizeof(buf));
1690 DBUG_ASSERT(chlen != 1);
1691 if (getbyte(&buf[i++]))
1692 return 1; // EOF
1693 chlen= my_charlen(charset(), buf, buf + i);
1694 }
1695
1696 /*
1697 Either a complete multi-byte sequence,
1698 or a broken byte sequence was found.
1699 Check if the sequence is a prefix of the "LINES TERMINATED BY" string.
1700 */
1701 if ((uchar) buf[0] == m_line_term.initial_byte() &&
1702 i <= m_line_term.length() &&
1703 !memcmp(buf, m_line_term.ptr(), i))
1704 {
1705 if (m_line_term.length() == i)
1706 {
1707 /*
1708 We found a "LINES TERMINATED BY" string that consists
1709 of a single multi-byte character.
1710 */
1711 return 0;
1712 }
1713 /*
1714 buf[] is a prefix of "LINES TERMINATED BY".
1715 Now check the suffix. Length of the suffix of line_term_ptr
1716 that still needs to be checked is (line_term_length - i).
1717 Note, READ_INFO::terminator() assumes that the leftmost byte of the
1718 argument is already scanned from the file and is checked to
1719 be a known prefix (e.g. against line_term.initial_char()).
1720 So we need to pass one extra byte.
1721 */
1722 if (terminator(m_line_term.ptr() + i - 1,
1723 m_line_term.length() - i + 1))
1724 return 0;
1725 }
1726 /*
1727 Here we have a good multi-byte sequence or a broken byte sequence,
1728 and the sequence is not equal to "LINES TERMINATED BY".
1729 No needs to check for escape_char, because:
1730 - multi-byte escape characters in "FIELDS ESCAPED BY" are not
1731 supported and are rejected at parse time.
1732 - broken single-byte sequences are not recognized as escapes,
1733 they are considered to be a part of the data and are converted to
1734 question marks.
1735 */
1736 line_cuted= true;
1737 continue;
1738 }
1739 if (buf[0] == escape_char)
1740 {
1741 line_cuted= true;
1742 if (GET == my_b_EOF)
1743 return 1;
1744 continue;
1745 }
1746 if (terminator(buf[0], m_line_term))
1747 return 0;
1748 line_cuted= true;
1749 }
1750}
1751
1752
1753bool READ_INFO::find_start_of_fields()
1754{
1755 for (int chr= GET ; chr != my_b_EOF ; chr= GET)
1756 {
1757 if (terminator(chr, m_line_start))
1758 return false;
1759 }
1760 return (found_end_of_line= eof= true);
1761}
1762
1763
1764/*
1765 Clear taglist from tags with a specified level
1766*/
1767int READ_INFO::clear_level(int level_arg)
1768{
1769 DBUG_ENTER("READ_INFO::read_xml clear_level");
1770 List_iterator<XML_TAG> xmlit(taglist);
1771 xmlit.rewind();
1772 XML_TAG *tag;
1773
1774 while ((tag= xmlit++))
1775 {
1776 if(tag->level >= level_arg)
1777 {
1778 xmlit.remove();
1779 delete tag;
1780 }
1781 }
1782 DBUG_RETURN(0);
1783}
1784
1785
1786/*
1787 Convert an XML entity to Unicode value.
1788 Return -1 on error;
1789*/
1790static int
1791my_xml_entity_to_char(const char *name, uint length)
1792{
1793 if (length == 2)
1794 {
1795 if (!memcmp(name, "gt", length))
1796 return '>';
1797 if (!memcmp(name, "lt", length))
1798 return '<';
1799 }
1800 else if (length == 3)
1801 {
1802 if (!memcmp(name, "amp", length))
1803 return '&';
1804 }
1805 else if (length == 4)
1806 {
1807 if (!memcmp(name, "quot", length))
1808 return '"';
1809 if (!memcmp(name, "apos", length))
1810 return '\'';
1811 }
1812 return -1;
1813}
1814
1815
1816/**
1817 @brief Convert newline, linefeed, tab to space
1818
1819 @param chr character
1820
1821 @details According to the "XML 1.0" standard,
1822 only space (#x20) characters, carriage returns,
1823 line feeds or tabs are considered as spaces.
1824 Convert all of them to space (#x20) for parsing simplicity.
1825*/
1826static int
1827my_tospace(int chr)
1828{
1829 return (chr == '\t' || chr == '\r' || chr == '\n') ? ' ' : chr;
1830}
1831
1832
1833/*
1834 Read an xml value: handle multibyte and xml escape
1835*/
1836int READ_INFO::read_value(int delim, String *val)
1837{
1838 int chr;
1839 String tmp;
1840
1841 for (chr= GET; my_tospace(chr) != delim && chr != my_b_EOF; chr= GET)
1842 {
1843 if(chr == '&')
1844 {
1845 tmp.length(0);
1846 for (chr= my_tospace(GET) ; chr != ';' ; chr= my_tospace(GET))
1847 {
1848 if (chr == my_b_EOF)
1849 return chr;
1850 tmp.append(chr);
1851 }
1852 if ((chr= my_xml_entity_to_char(tmp.ptr(), tmp.length())) >= 0)
1853 val->append(chr);
1854 else
1855 {
1856 val->append('&');
1857 val->append(tmp);
1858 val->append(';');
1859 }
1860 }
1861 else
1862 {
1863 val->append(chr);
1864 if (use_mb(charset()) && read_mbtail(val))
1865 return my_b_EOF;
1866 }
1867 }
1868 return my_tospace(chr);
1869}
1870
1871
1872/*
1873 Read a record in xml format
1874 tags and attributes are stored in taglist
1875 when tag set in ROWS IDENTIFIED BY is closed, we are ready and return
1876*/
1877int READ_INFO::read_xml(THD *thd)
1878{
1879 DBUG_ENTER("READ_INFO::read_xml");
1880 int chr, chr2, chr3;
1881 int delim= 0;
1882 String tag, attribute, value;
1883 bool in_tag= false;
1884
1885 tag.length(0);
1886 attribute.length(0);
1887 value.length(0);
1888
1889 for (chr= my_tospace(GET); chr != my_b_EOF ; )
1890 {
1891 switch(chr){
1892 case '<': /* read tag */
1893 /* TODO: check if this is a comment <!-- comment --> */
1894 chr= my_tospace(GET);
1895 if(chr == '!')
1896 {
1897 chr2= GET;
1898 chr3= GET;
1899
1900 if(chr2 == '-' && chr3 == '-')
1901 {
1902 chr2= 0;
1903 chr3= 0;
1904 chr= my_tospace(GET);
1905
1906 while(chr != '>' || chr2 != '-' || chr3 != '-')
1907 {
1908 if(chr == '-')
1909 {
1910 chr3= chr2;
1911 chr2= chr;
1912 }
1913 else if (chr2 == '-')
1914 {
1915 chr2= 0;
1916 chr3= 0;
1917 }
1918 chr= my_tospace(GET);
1919 if (chr == my_b_EOF)
1920 goto found_eof;
1921 }
1922 break;
1923 }
1924 }
1925
1926 tag.length(0);
1927 while(chr != '>' && chr != ' ' && chr != '/' && chr != my_b_EOF)
1928 {
1929 if(chr != delim) /* fix for the '<field name =' format */
1930 tag.append(chr);
1931 chr= my_tospace(GET);
1932 }
1933
1934 // row tag should be in ROWS IDENTIFIED BY '<row>' - stored in line_term
1935 if((tag.length() == m_line_term.length() - 2) &&
1936 (memcmp(tag.ptr(), m_line_term.ptr() + 1, tag.length()) == 0))
1937 {
1938 DBUG_PRINT("read_xml", ("start-of-row: %i %s %s",
1939 level,tag.c_ptr_safe(), m_line_term.ptr()));
1940 }
1941
1942 if(chr == ' ' || chr == '>')
1943 {
1944 level++;
1945 clear_level(level + 1);
1946 }
1947
1948 if (chr == ' ')
1949 in_tag= true;
1950 else
1951 in_tag= false;
1952 break;
1953
1954 case ' ': /* read attribute */
1955 while(chr == ' ') /* skip blanks */
1956 chr= my_tospace(GET);
1957
1958 if(!in_tag)
1959 break;
1960
1961 while(chr != '=' && chr != '/' && chr != '>' && chr != my_b_EOF)
1962 {
1963 attribute.append(chr);
1964 chr= my_tospace(GET);
1965 }
1966 break;
1967
1968 case '>': /* end tag - read tag value */
1969 in_tag= false;
1970 chr= read_value('<', &value);
1971 if(chr == my_b_EOF)
1972 goto found_eof;
1973
1974 /* save value to list */
1975 if (tag.length() > 0 && value.length() > 0)
1976 {
1977 DBUG_PRINT("read_xml", ("lev:%i tag:%s val:%s",
1978 level,tag.c_ptr_safe(), value.c_ptr_safe()));
1979 XML_TAG *tmp= new XML_TAG(level, tag, value);
1980 if (!tmp || taglist.push_front(tmp, thd->mem_root))
1981 DBUG_RETURN(1); // End of memory
1982 }
1983 tag.length(0);
1984 value.length(0);
1985 attribute.length(0);
1986 break;
1987
1988 case '/': /* close tag */
1989 chr= my_tospace(GET);
1990 /* Decrease the 'level' only when (i) It's not an */
1991 /* (without space) empty tag i.e. <tag/> or, (ii) */
1992 /* It is of format <row col="val" .../> */
1993 if(chr != '>' || in_tag)
1994 {
1995 level--;
1996 in_tag= false;
1997 }
1998 if(chr != '>') /* if this is an empty tag <tag /> */
1999 tag.length(0); /* we should keep tag value */
2000 while(chr != '>' && chr != my_b_EOF)
2001 {
2002 tag.append(chr);
2003 chr= my_tospace(GET);
2004 }
2005
2006 if((tag.length() == m_line_term.length() - 2) &&
2007 (memcmp(tag.ptr(), m_line_term.ptr() + 1, tag.length()) == 0))
2008 {
2009 DBUG_PRINT("read_xml", ("found end-of-row %i %s",
2010 level, tag.c_ptr_safe()));
2011 DBUG_RETURN(0); //normal return
2012 }
2013 chr= my_tospace(GET);
2014 break;
2015
2016 case '=': /* attribute name end - read the value */
2017 //check for tag field and attribute name
2018 if(!memcmp(tag.c_ptr_safe(), STRING_WITH_LEN("field")) &&
2019 !memcmp(attribute.c_ptr_safe(), STRING_WITH_LEN("name")))
2020 {
2021 /*
2022 this is format <field name="xx">xx</field>
2023 where actual fieldname is in attribute
2024 */
2025 delim= my_tospace(GET);
2026 tag.length(0);
2027 attribute.length(0);
2028 chr= '<'; /* we pretend that it is a tag */
2029 level--;
2030 break;
2031 }
2032
2033 //check for " or '
2034 chr= GET;
2035 if (chr == my_b_EOF)
2036 goto found_eof;
2037 if(chr == '"' || chr == '\'')
2038 {
2039 delim= chr;
2040 }
2041 else
2042 {
2043 delim= ' '; /* no delimiter, use space */
2044 PUSH(chr);
2045 }
2046
2047 chr= read_value(delim, &value);
2048 if (attribute.length() > 0 && value.length() > 0)
2049 {
2050 DBUG_PRINT("read_xml", ("lev:%i att:%s val:%s\n",
2051 level + 1,
2052 attribute.c_ptr_safe(),
2053 value.c_ptr_safe()));
2054 XML_TAG *tmp= new XML_TAG(level + 1, attribute, value);
2055 if (!tmp || taglist.push_front(tmp, thd->mem_root))
2056 DBUG_RETURN(1); // End of memory
2057 }
2058 attribute.length(0);
2059 value.length(0);
2060 if (chr != ' ')
2061 chr= my_tospace(GET);
2062 break;
2063
2064 default:
2065 chr= my_tospace(GET);
2066 } /* end switch */
2067 } /* end while */
2068
2069found_eof:
2070 DBUG_PRINT("read_xml",("Found eof"));
2071 eof= 1;
2072 DBUG_RETURN(1);
2073}
2074