1/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
15
16/*
17 Locking of Maria-tables.
18 Must be first request before doing any furter calls to any Maria function.
19 Is used to allow many process use the same non transactional Maria table
20*/
21
22#include "ma_ftdefs.h"
23
24 /* lock table by F_UNLCK, F_RDLCK or F_WRLCK */
25
26int maria_lock_database(MARIA_HA *info, int lock_type)
27{
28 int error;
29 uint count;
30 MARIA_SHARE *share= info->s;
31 DBUG_ENTER("maria_lock_database");
32 DBUG_PRINT("enter",("lock_type: %d old lock %d r_locks: %u w_locks: %u "
33 "global_changed: %d open_count: %u name: '%s'",
34 lock_type, info->lock_type, share->r_locks,
35 share->w_locks,
36 share->global_changed, share->state.open_count,
37 share->index_file_name.str));
38 if (share->options & HA_OPTION_READ_ONLY_DATA ||
39 info->lock_type == lock_type)
40 DBUG_RETURN(0);
41 if (lock_type == F_EXTRA_LCK) /* Used by TMP tables */
42 {
43 ++share->w_locks;
44 ++share->tot_locks;
45 info->lock_type= lock_type;
46 DBUG_RETURN(0);
47 }
48
49 error=0;
50 mysql_mutex_lock(&share->intern_lock);
51 if (share->kfile.file >= 0) /* May only be false on windows */
52 {
53 switch (lock_type) {
54 case F_UNLCK:
55 maria_ftparser_call_deinitializer(info);
56 if (info->lock_type == F_RDLCK)
57 {
58 count= --share->r_locks;
59 if (share->lock_restore_status)
60 (*share->lock_restore_status)(info);
61 }
62 else
63 {
64 count= --share->w_locks;
65 if (share->lock.update_status)
66 _ma_update_status_with_lock(info);
67 }
68 --share->tot_locks;
69 if (info->lock_type == F_WRLCK && !share->w_locks)
70 {
71 /* pages of transactional tables get flushed at Checkpoint */
72 if (!share->base.born_transactional && !share->temporary &&
73 _ma_flush_table_files(info,
74 share->delay_key_write ? MARIA_FLUSH_DATA :
75 MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
76 FLUSH_KEEP, FLUSH_KEEP))
77 error= my_errno;
78 }
79 if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
80 {
81 if (end_io_cache(&info->rec_cache))
82 {
83 error= my_errno;
84 _ma_set_fatal_error(share, error);
85 }
86 }
87 if (!count)
88 {
89 DBUG_PRINT("info",("changed: %u w_locks: %u",
90 (uint) share->changed, share->w_locks));
91 if (share->changed && !share->w_locks)
92 {
93#ifdef HAVE_MMAP
94 if ((share->mmaped_length !=
95 share->state.state.data_file_length) &&
96 (share->nonmmaped_inserts > MAX_NONMAPPED_INSERTS))
97 {
98 if (share->lock_key_trees)
99 mysql_rwlock_wrlock(&share->mmap_lock);
100 _ma_remap_file(info, share->state.state.data_file_length);
101 share->nonmmaped_inserts= 0;
102 if (share->lock_key_trees)
103 mysql_rwlock_unlock(&share->mmap_lock);
104 }
105#endif
106#ifdef MARIA_EXTERNAL_LOCKING
107 share->state.process= share->last_process=share->this_process;
108 share->state.unique= info->last_unique= info->this_unique;
109 share->state.update_count= info->last_loop= ++info->this_loop;
110#endif
111 /* transactional tables rather flush their state at Checkpoint */
112 if (!share->base.born_transactional)
113 {
114 if (_ma_state_info_write_sub(share->kfile.file, &share->state,
115 MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET))
116 error= my_errno;
117 else
118 {
119 /* A value of 0 means below means "state flushed" */
120 share->changed= 0;
121 }
122 }
123 if (maria_flush)
124 {
125 if (_ma_sync_table_files(info))
126 error= my_errno;
127 }
128 else
129 share->not_flushed=1;
130 if (error)
131 _ma_set_fatal_error(share, error);
132 }
133 }
134 info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
135 info->lock_type= F_UNLCK;
136 break;
137 case F_RDLCK:
138 if (info->lock_type == F_WRLCK)
139 {
140 /*
141 Change RW to READONLY
142
143 mysqld does not turn write locks to read locks,
144 so we're never here in mysqld.
145 */
146 share->w_locks--;
147 share->r_locks++;
148 info->lock_type=lock_type;
149 break;
150 }
151#ifdef MARIA_EXTERNAL_LOCKING
152 if (!share->r_locks && !share->w_locks)
153 {
154 /* note that a transactional table should not do this */
155 if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
156 {
157 error=my_errno;
158 break;
159 }
160 }
161#endif
162 _ma_test_if_changed(info);
163 share->r_locks++;
164 share->tot_locks++;
165 info->lock_type=lock_type;
166 break;
167 case F_WRLCK:
168 if (info->lock_type == F_RDLCK)
169 { /* Change READONLY to RW */
170 if (share->r_locks == 1)
171 {
172 share->r_locks--;
173 share->w_locks++;
174 info->lock_type=lock_type;
175 break;
176 }
177 }
178#ifdef MARIA_EXTERNAL_LOCKING
179 if (!(share->options & HA_OPTION_READ_ONLY_DATA))
180 {
181 if (!share->w_locks)
182 {
183 if (!share->r_locks)
184 {
185 /*
186 Note that transactional tables should not do this.
187 If we enabled this code, we should make sure to skip it if
188 born_transactional is true. We should not test
189 now_transactional to decide if we can call
190 _ma_state_info_read_dsk(), because it can temporarily be 0
191 (TRUNCATE on a partitioned table) and thus it would make a state
192 modification below without mutex, confusing a concurrent
193 checkpoint running.
194 Even if this code was enabled only for non-transactional tables:
195 in scenario LOCK TABLE t1 WRITE; INSERT INTO t1; DELETE FROM t1;
196 state on disk read by DELETE is obsolete as it was not flushed
197 at the end of INSERT. MyISAM same. It however causes no issue as
198 maria_delete_all_rows() calls _ma_reset_status() thus is not
199 influenced by the obsolete read values.
200 */
201 if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
202 {
203 error=my_errno;
204 break;
205 }
206 }
207 }
208 }
209#endif /* defined(MARIA_EXTERNAL_LOCKING) */
210 _ma_test_if_changed(info);
211
212 info->lock_type=lock_type;
213 info->invalidator=share->invalidator;
214 share->w_locks++;
215 share->tot_locks++;
216 break;
217 default:
218 DBUG_ASSERT(0);
219 break; /* Impossible */
220 }
221 }
222#ifdef __WIN__
223 else
224 {
225 /*
226 Check for bad file descriptors if this table is part
227 of a merge union. Failing to capture this may cause
228 a crash on windows if the table is renamed and
229 later on referenced by the merge table.
230 */
231 if( info->owned_by_merge && (info->s)->kfile.file < 0 )
232 {
233 error = HA_ERR_NO_SUCH_TABLE;
234 }
235 }
236#endif
237 mysql_mutex_unlock(&share->intern_lock);
238 DBUG_RETURN(error);
239} /* maria_lock_database */
240
241
242/****************************************************************************
243 ** functions to read / write the state
244****************************************************************************/
245
246int _ma_readinfo(register MARIA_HA *info __attribute__ ((unused)),
247 int lock_type __attribute__ ((unused)),
248 int check_keybuffer __attribute__ ((unused)))
249{
250#ifdef MARIA_EXTERNAL_LOCKING
251 DBUG_ENTER("_ma_readinfo");
252
253 if (info->lock_type == F_UNLCK)
254 {
255 MARIA_SHARE *share= info->s;
256 if (!share->tot_locks)
257 {
258 /* should not be done for transactional tables */
259 if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
260 {
261 if (!my_errno)
262 my_errno= HA_ERR_FILE_TOO_SHORT;
263 DBUG_RETURN(1);
264 }
265 }
266 if (check_keybuffer)
267 VOID(_ma_test_if_changed(info));
268 info->invalidator=share->invalidator;
269 }
270 else if (lock_type == F_WRLCK && info->lock_type == F_RDLCK)
271 {
272 my_errno=EACCES; /* Not allowed to change */
273 DBUG_RETURN(-1); /* when have read_lock() */
274 }
275 DBUG_RETURN(0);
276#else
277 return 0;
278#endif /* defined(MARIA_EXTERNAL_LOCKING) */
279} /* _ma_readinfo */
280
281
282/*
283 Every isam-function that uppdates the isam-database MUST end with this
284 request
285
286 NOTES
287 my_errno is not changed if this succeeds!
288*/
289
290int _ma_writeinfo(register MARIA_HA *info, uint operation)
291{
292 int error,olderror;
293 MARIA_SHARE *share= info->s;
294 DBUG_ENTER("_ma_writeinfo");
295 DBUG_PRINT("info",("operation: %u tot_locks: %u", operation,
296 share->tot_locks));
297
298 error=0;
299 if (share->tot_locks == 0 && !share->base.born_transactional)
300 {
301 /* transactional tables flush their state at Checkpoint */
302 if (operation)
303 { /* Two threads can't be here */
304 olderror= my_errno; /* Remember last error */
305
306#ifdef MARIA_EXTERNAL_LOCKING
307 /*
308 The following only makes sense if we want to be allow two different
309 processes access the same table at the same time
310 */
311 share->state.process= share->last_process= share->this_process;
312 share->state.unique= info->last_unique= info->this_unique;
313 share->state.update_count= info->last_loop= ++info->this_loop;
314#endif
315
316 if ((error=
317 _ma_state_info_write_sub(share->kfile.file,
318 &share->state,
319 MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET)))
320 olderror=my_errno;
321#ifdef __WIN__
322 if (maria_flush)
323 {
324 _commit(share->kfile.file);
325 _commit(info->dfile.file);
326 }
327#endif
328 my_errno=olderror;
329 }
330 }
331 else if (operation)
332 share->changed= 1; /* Mark keyfile changed */
333 DBUG_RETURN(error);
334} /* _ma_writeinfo */
335
336
337/*
338 Test if an external process has changed the database
339 (Should be called after readinfo)
340*/
341
342int _ma_test_if_changed(register MARIA_HA *info)
343{
344#ifdef MARIA_EXTERNAL_LOCKING
345 MARIA_SHARE *share= info->s;
346 if (share->state.process != share->last_process ||
347 share->state.unique != info->last_unique ||
348 share->state.update_count != info->last_loop)
349 { /* Keyfile has changed */
350 DBUG_PRINT("info",("index file changed"));
351 if (share->state.process != share->this_process)
352 VOID(flush_pagecache_blocks(share->pagecache, &share->kfile,
353 FLUSH_RELEASE));
354 share->last_process=share->state.process;
355 info->last_unique= share->state.unique;
356 info->last_loop= share->state.update_count;
357 info->update|= HA_STATE_WRITTEN; /* Must use file on next */
358 info->data_changed= 1; /* For maria_is_changed */
359 return 1;
360 }
361#endif
362 return (!(info->update & HA_STATE_AKTIV) ||
363 (info->update & (HA_STATE_WRITTEN | HA_STATE_DELETED |
364 HA_STATE_KEY_CHANGED)));
365} /* _ma_test_if_changed */
366
367
368/*
369 Put a mark in the .MAI file that someone is updating the table
370
371 DOCUMENTATION
372 state.open_count in the .MAI file is used the following way:
373 - For the first change of the .MYI file in this process open_count is
374 incremented by _ma_mark_file_changed(). (We have a write lock on the file
375 when this happens)
376 - In maria_close() it's decremented by _ma_decrement_open_count() if it
377 was incremented in the same process.
378
379 This mean that if we are the only process using the file, the open_count
380 tells us if the MARIA file wasn't properly closed. (This is true if
381 my_disable_locking is set).
382
383 open_count is not maintained on disk for temporary tables.
384*/
385
386#define _MA_ALREADY_MARKED_FILE_CHANGED \
387 ((share->state.changed & STATE_CHANGED) && share->global_changed)
388
389int _ma_mark_file_changed(register MARIA_SHARE *share)
390{
391 if (!share->base.born_transactional)
392 {
393 if (!_MA_ALREADY_MARKED_FILE_CHANGED)
394 return _ma_mark_file_changed_now(share);
395 }
396 else
397 {
398 /*
399 For transactional tables, the table is marked changed when the first page
400 is written. Here we just mark the state to be updated so that caller
401 can do 'analyze table' and find that is has changed before any pages
402 are written.
403 */
404 if (! test_all_bits(share->state.changed,
405 (STATE_CHANGED | STATE_NOT_ANALYZED |
406 STATE_NOT_OPTIMIZED_KEYS)))
407 {
408 mysql_mutex_lock(&share->intern_lock);
409 share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED |
410 STATE_NOT_OPTIMIZED_KEYS);
411 mysql_mutex_unlock(&share->intern_lock);
412 }
413 }
414 return 0;
415}
416
417int _ma_mark_file_changed_now(register MARIA_SHARE *share)
418{
419 uchar buff[3];
420 int error= 1;
421 DBUG_ENTER("_ma_mark_file_changed_now");
422
423 if (_MA_ALREADY_MARKED_FILE_CHANGED)
424 DBUG_RETURN(0);
425 mysql_mutex_lock(&share->intern_lock); /* recheck under mutex */
426 if (! _MA_ALREADY_MARKED_FILE_CHANGED)
427 {
428 share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED |
429 STATE_NOT_OPTIMIZED_KEYS);
430 if (!share->global_changed)
431 {
432 share->changed= share->global_changed= 1;
433 share->state.open_count++;
434 }
435 /*
436 Temp tables don't need an open_count as they are removed on crash.
437 In theory transactional tables are fixed by log-based recovery, so don't
438 need an open_count either, but if recovery has failed and logs have been
439 removed (by maria-force-start-after-recovery-failures), we still need to
440 detect dubious tables.
441 If we didn't maintain open_count on disk for a table, after a crash
442 we wouldn't know if it was closed at crash time (thus does not need a
443 check) or not. So we would have to check all tables: overkill.
444 */
445 if (!share->temporary)
446 {
447 mi_int2store(buff,share->state.open_count);
448 buff[2]=1; /* Mark that it's changed */
449 if (my_pwrite(share->kfile.file, buff, sizeof(buff),
450 sizeof(share->state.header) +
451 MARIA_FILE_OPEN_COUNT_OFFSET,
452 MYF(MY_NABP)))
453 goto err;
454 }
455 /* Set uuid of file if not yet set (zerofilled file) */
456 if (share->base.born_transactional &&
457 !(share->state.changed & STATE_NOT_MOVABLE))
458 {
459 /* Lock table to current installation */
460 if (_ma_set_uuid(share, 0) ||
461 (share->state.create_rename_lsn == LSN_NEEDS_NEW_STATE_LSNS &&
462 _ma_update_state_lsns_sub(share, LSN_IMPOSSIBLE,
463 trnman_get_min_trid(),
464 TRUE, TRUE)))
465 goto err;
466 share->state.changed|= STATE_NOT_MOVABLE;
467 }
468 }
469 error= 0;
470err:
471 mysql_mutex_unlock(&share->intern_lock);
472 DBUG_RETURN(error);
473#undef _MA_ALREADY_MARKED_FILE_CHANGED
474}
475
476/*
477 Check that a region is all zero
478
479 SYNOPSIS
480 check_if_zero()
481 pos Start of memory to check
482 length length of memory region
483
484 NOTES
485 Used mainly to detect rows with wrong extent information
486*/
487
488my_bool _ma_check_if_zero(uchar *pos, size_t length)
489{
490 uchar *end;
491 for (end= pos+ length; pos != end ; pos++)
492 if (pos[0] != 0)
493 return 1;
494 return 0;
495}
496
497/*
498 This is only called by close or by extra(HA_FLUSH) if the OS has the pwrite()
499 call. In these context the following code should be safe!
500 */
501
502int _ma_decrement_open_count(MARIA_HA *info, my_bool lock_tables)
503{
504 uchar buff[2];
505 register MARIA_SHARE *share= info->s;
506 int lock_error=0,write_error=0;
507 DBUG_ENTER("_ma_decrement_open_count");
508
509 if (share->global_changed)
510 {
511 uint old_lock=info->lock_type;
512 share->global_changed=0;
513 lock_error= (my_disable_locking || ! lock_tables ? 0 :
514 maria_lock_database(info, F_WRLCK));
515 /* Its not fatal even if we couldn't get the lock ! */
516 if (share->state.open_count > 0)
517 {
518 share->state.open_count--;
519 share->changed= 1; /* We have to update state */
520 /*
521 For temporary tables that will just be deleted, we don't have
522 to decrement state. For transactional tables the state will be
523 updated in maria_close().
524 */
525
526 if (!share->temporary && !share->now_transactional)
527 {
528 mi_int2store(buff,share->state.open_count);
529 write_error= (int) my_pwrite(share->kfile.file, buff, sizeof(buff),
530 sizeof(share->state.header) +
531 MARIA_FILE_OPEN_COUNT_OFFSET,
532 MYF(MY_NABP));
533 }
534 }
535 if (!lock_error && !my_disable_locking && lock_tables)
536 lock_error=maria_lock_database(info,old_lock);
537 }
538 DBUG_RETURN(MY_TEST(lock_error || write_error));
539}
540
541
542/** @brief mark file as crashed */
543
544void _ma_mark_file_crashed(MARIA_SHARE *share)
545{
546 uchar buff[2];
547 DBUG_ENTER("_ma_mark_file_crashed");
548
549 share->state.changed|= STATE_CRASHED;
550 mi_int2store(buff, share->state.changed);
551 /*
552 We can ignore the errors, as if the mark failed, there isn't anything
553 else we can do; The user should already have got an error that the
554 table was crashed.
555 */
556 (void) my_pwrite(share->kfile.file, buff, sizeof(buff),
557 sizeof(share->state.header) +
558 MARIA_FILE_CHANGED_OFFSET,
559 MYF(MY_NABP));
560 DBUG_VOID_RETURN;
561}
562
563/*
564 Handle a fatal error
565
566 - Mark the table as crashed
567 - Print an error message, if we had not issued an error message before
568 that the table had been crashed.
569 - set my_errno to error
570 - If 'maria_assert_if_crashed_table is set, then assert.
571*/
572
573void _ma_set_fatal_error(MARIA_SHARE *share, int error)
574{
575 DBUG_PRINT("error", ("error: %d", error));
576 maria_mark_crashed_share(share);
577 if (!(share->state.changed & STATE_CRASHED_PRINTED))
578 {
579 share->state.changed|= STATE_CRASHED_PRINTED;
580 maria_print_error(share, error);
581 }
582 my_errno= error;
583 DBUG_ASSERT(!maria_assert_if_crashed_table);
584}
585
586
587/**
588 @brief Set uuid of for a Maria file
589
590 @fn _ma_set_uuid()
591 @param share Maria share
592 @param reset_uuid Instead of setting file to maria_uuid, set it to
593 0 to mark it as movable
594*/
595
596my_bool _ma_set_uuid(MARIA_SHARE *share, my_bool reset_uuid)
597{
598 uchar buff[MY_UUID_SIZE], *uuid;
599
600 uuid= maria_uuid;
601 if (reset_uuid)
602 {
603 bzero(buff, sizeof(buff));
604 uuid= buff;
605 }
606 return (my_bool) my_pwrite(share->kfile.file, uuid, MY_UUID_SIZE,
607 mi_uint2korr(share->state.header.base_pos),
608 MYF(MY_NABP));
609}
610