1 | /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB |
2 | |
3 | This program is free software; you can redistribute it and/or modify |
4 | it under the terms of the GNU General Public License as published by |
5 | the Free Software Foundation; version 2 of the License. |
6 | |
7 | This program is distributed in the hope that it will be useful, |
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
10 | GNU General Public License for more details. |
11 | |
12 | You should have received a copy of the GNU General Public License |
13 | along with this program; if not, write to the Free Software |
14 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ |
15 | |
16 | /* |
17 | Locking of Maria-tables. |
18 | Must be first request before doing any furter calls to any Maria function. |
19 | Is used to allow many process use the same non transactional Maria table |
20 | */ |
21 | |
22 | #include "ma_ftdefs.h" |
23 | |
24 | /* lock table by F_UNLCK, F_RDLCK or F_WRLCK */ |
25 | |
26 | int maria_lock_database(MARIA_HA *info, int lock_type) |
27 | { |
28 | int error; |
29 | uint count; |
30 | MARIA_SHARE *share= info->s; |
31 | DBUG_ENTER("maria_lock_database" ); |
32 | DBUG_PRINT("enter" ,("lock_type: %d old lock %d r_locks: %u w_locks: %u " |
33 | "global_changed: %d open_count: %u name: '%s'" , |
34 | lock_type, info->lock_type, share->r_locks, |
35 | share->w_locks, |
36 | share->global_changed, share->state.open_count, |
37 | share->index_file_name.str)); |
38 | if (share->options & HA_OPTION_READ_ONLY_DATA || |
39 | info->lock_type == lock_type) |
40 | DBUG_RETURN(0); |
41 | if (lock_type == F_EXTRA_LCK) /* Used by TMP tables */ |
42 | { |
43 | ++share->w_locks; |
44 | ++share->tot_locks; |
45 | info->lock_type= lock_type; |
46 | DBUG_RETURN(0); |
47 | } |
48 | |
49 | error=0; |
50 | mysql_mutex_lock(&share->intern_lock); |
51 | if (share->kfile.file >= 0) /* May only be false on windows */ |
52 | { |
53 | switch (lock_type) { |
54 | case F_UNLCK: |
55 | maria_ftparser_call_deinitializer(info); |
56 | if (info->lock_type == F_RDLCK) |
57 | { |
58 | count= --share->r_locks; |
59 | if (share->lock_restore_status) |
60 | (*share->lock_restore_status)(info); |
61 | } |
62 | else |
63 | { |
64 | count= --share->w_locks; |
65 | if (share->lock.update_status) |
66 | _ma_update_status_with_lock(info); |
67 | } |
68 | --share->tot_locks; |
69 | if (info->lock_type == F_WRLCK && !share->w_locks) |
70 | { |
71 | /* pages of transactional tables get flushed at Checkpoint */ |
72 | if (!share->base.born_transactional && !share->temporary && |
73 | _ma_flush_table_files(info, |
74 | share->delay_key_write ? MARIA_FLUSH_DATA : |
75 | MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX, |
76 | FLUSH_KEEP, FLUSH_KEEP)) |
77 | error= my_errno; |
78 | } |
79 | if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) |
80 | { |
81 | if (end_io_cache(&info->rec_cache)) |
82 | { |
83 | error= my_errno; |
84 | _ma_set_fatal_error(share, error); |
85 | } |
86 | } |
87 | if (!count) |
88 | { |
89 | DBUG_PRINT("info" ,("changed: %u w_locks: %u" , |
90 | (uint) share->changed, share->w_locks)); |
91 | if (share->changed && !share->w_locks) |
92 | { |
93 | #ifdef HAVE_MMAP |
94 | if ((share->mmaped_length != |
95 | share->state.state.data_file_length) && |
96 | (share->nonmmaped_inserts > MAX_NONMAPPED_INSERTS)) |
97 | { |
98 | if (share->lock_key_trees) |
99 | mysql_rwlock_wrlock(&share->mmap_lock); |
100 | _ma_remap_file(info, share->state.state.data_file_length); |
101 | share->nonmmaped_inserts= 0; |
102 | if (share->lock_key_trees) |
103 | mysql_rwlock_unlock(&share->mmap_lock); |
104 | } |
105 | #endif |
106 | #ifdef MARIA_EXTERNAL_LOCKING |
107 | share->state.process= share->last_process=share->this_process; |
108 | share->state.unique= info->last_unique= info->this_unique; |
109 | share->state.update_count= info->last_loop= ++info->this_loop; |
110 | #endif |
111 | /* transactional tables rather flush their state at Checkpoint */ |
112 | if (!share->base.born_transactional) |
113 | { |
114 | if (_ma_state_info_write_sub(share->kfile.file, &share->state, |
115 | MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET)) |
116 | error= my_errno; |
117 | else |
118 | { |
119 | /* A value of 0 means below means "state flushed" */ |
120 | share->changed= 0; |
121 | } |
122 | } |
123 | if (maria_flush) |
124 | { |
125 | if (_ma_sync_table_files(info)) |
126 | error= my_errno; |
127 | } |
128 | else |
129 | share->not_flushed=1; |
130 | if (error) |
131 | _ma_set_fatal_error(share, error); |
132 | } |
133 | } |
134 | info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); |
135 | info->lock_type= F_UNLCK; |
136 | break; |
137 | case F_RDLCK: |
138 | if (info->lock_type == F_WRLCK) |
139 | { |
140 | /* |
141 | Change RW to READONLY |
142 | |
143 | mysqld does not turn write locks to read locks, |
144 | so we're never here in mysqld. |
145 | */ |
146 | share->w_locks--; |
147 | share->r_locks++; |
148 | info->lock_type=lock_type; |
149 | break; |
150 | } |
151 | #ifdef MARIA_EXTERNAL_LOCKING |
152 | if (!share->r_locks && !share->w_locks) |
153 | { |
154 | /* note that a transactional table should not do this */ |
155 | if (_ma_state_info_read_dsk(share->kfile.file, &share->state)) |
156 | { |
157 | error=my_errno; |
158 | break; |
159 | } |
160 | } |
161 | #endif |
162 | _ma_test_if_changed(info); |
163 | share->r_locks++; |
164 | share->tot_locks++; |
165 | info->lock_type=lock_type; |
166 | break; |
167 | case F_WRLCK: |
168 | if (info->lock_type == F_RDLCK) |
169 | { /* Change READONLY to RW */ |
170 | if (share->r_locks == 1) |
171 | { |
172 | share->r_locks--; |
173 | share->w_locks++; |
174 | info->lock_type=lock_type; |
175 | break; |
176 | } |
177 | } |
178 | #ifdef MARIA_EXTERNAL_LOCKING |
179 | if (!(share->options & HA_OPTION_READ_ONLY_DATA)) |
180 | { |
181 | if (!share->w_locks) |
182 | { |
183 | if (!share->r_locks) |
184 | { |
185 | /* |
186 | Note that transactional tables should not do this. |
187 | If we enabled this code, we should make sure to skip it if |
188 | born_transactional is true. We should not test |
189 | now_transactional to decide if we can call |
190 | _ma_state_info_read_dsk(), because it can temporarily be 0 |
191 | (TRUNCATE on a partitioned table) and thus it would make a state |
192 | modification below without mutex, confusing a concurrent |
193 | checkpoint running. |
194 | Even if this code was enabled only for non-transactional tables: |
195 | in scenario LOCK TABLE t1 WRITE; INSERT INTO t1; DELETE FROM t1; |
196 | state on disk read by DELETE is obsolete as it was not flushed |
197 | at the end of INSERT. MyISAM same. It however causes no issue as |
198 | maria_delete_all_rows() calls _ma_reset_status() thus is not |
199 | influenced by the obsolete read values. |
200 | */ |
201 | if (_ma_state_info_read_dsk(share->kfile.file, &share->state)) |
202 | { |
203 | error=my_errno; |
204 | break; |
205 | } |
206 | } |
207 | } |
208 | } |
209 | #endif /* defined(MARIA_EXTERNAL_LOCKING) */ |
210 | _ma_test_if_changed(info); |
211 | |
212 | info->lock_type=lock_type; |
213 | info->invalidator=share->invalidator; |
214 | share->w_locks++; |
215 | share->tot_locks++; |
216 | break; |
217 | default: |
218 | DBUG_ASSERT(0); |
219 | break; /* Impossible */ |
220 | } |
221 | } |
222 | #ifdef __WIN__ |
223 | else |
224 | { |
225 | /* |
226 | Check for bad file descriptors if this table is part |
227 | of a merge union. Failing to capture this may cause |
228 | a crash on windows if the table is renamed and |
229 | later on referenced by the merge table. |
230 | */ |
231 | if( info->owned_by_merge && (info->s)->kfile.file < 0 ) |
232 | { |
233 | error = HA_ERR_NO_SUCH_TABLE; |
234 | } |
235 | } |
236 | #endif |
237 | mysql_mutex_unlock(&share->intern_lock); |
238 | DBUG_RETURN(error); |
239 | } /* maria_lock_database */ |
240 | |
241 | |
242 | /**************************************************************************** |
243 | ** functions to read / write the state |
244 | ****************************************************************************/ |
245 | |
246 | int _ma_readinfo(register MARIA_HA *info __attribute__ ((unused)), |
247 | int lock_type __attribute__ ((unused)), |
248 | int check_keybuffer __attribute__ ((unused))) |
249 | { |
250 | #ifdef MARIA_EXTERNAL_LOCKING |
251 | DBUG_ENTER("_ma_readinfo" ); |
252 | |
253 | if (info->lock_type == F_UNLCK) |
254 | { |
255 | MARIA_SHARE *share= info->s; |
256 | if (!share->tot_locks) |
257 | { |
258 | /* should not be done for transactional tables */ |
259 | if (_ma_state_info_read_dsk(share->kfile.file, &share->state)) |
260 | { |
261 | if (!my_errno) |
262 | my_errno= HA_ERR_FILE_TOO_SHORT; |
263 | DBUG_RETURN(1); |
264 | } |
265 | } |
266 | if (check_keybuffer) |
267 | VOID(_ma_test_if_changed(info)); |
268 | info->invalidator=share->invalidator; |
269 | } |
270 | else if (lock_type == F_WRLCK && info->lock_type == F_RDLCK) |
271 | { |
272 | my_errno=EACCES; /* Not allowed to change */ |
273 | DBUG_RETURN(-1); /* when have read_lock() */ |
274 | } |
275 | DBUG_RETURN(0); |
276 | #else |
277 | return 0; |
278 | #endif /* defined(MARIA_EXTERNAL_LOCKING) */ |
279 | } /* _ma_readinfo */ |
280 | |
281 | |
282 | /* |
283 | Every isam-function that uppdates the isam-database MUST end with this |
284 | request |
285 | |
286 | NOTES |
287 | my_errno is not changed if this succeeds! |
288 | */ |
289 | |
290 | int _ma_writeinfo(register MARIA_HA *info, uint operation) |
291 | { |
292 | int error,olderror; |
293 | MARIA_SHARE *share= info->s; |
294 | DBUG_ENTER("_ma_writeinfo" ); |
295 | DBUG_PRINT("info" ,("operation: %u tot_locks: %u" , operation, |
296 | share->tot_locks)); |
297 | |
298 | error=0; |
299 | if (share->tot_locks == 0 && !share->base.born_transactional) |
300 | { |
301 | /* transactional tables flush their state at Checkpoint */ |
302 | if (operation) |
303 | { /* Two threads can't be here */ |
304 | olderror= my_errno; /* Remember last error */ |
305 | |
306 | #ifdef MARIA_EXTERNAL_LOCKING |
307 | /* |
308 | The following only makes sense if we want to be allow two different |
309 | processes access the same table at the same time |
310 | */ |
311 | share->state.process= share->last_process= share->this_process; |
312 | share->state.unique= info->last_unique= info->this_unique; |
313 | share->state.update_count= info->last_loop= ++info->this_loop; |
314 | #endif |
315 | |
316 | if ((error= |
317 | _ma_state_info_write_sub(share->kfile.file, |
318 | &share->state, |
319 | MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET))) |
320 | olderror=my_errno; |
321 | #ifdef __WIN__ |
322 | if (maria_flush) |
323 | { |
324 | _commit(share->kfile.file); |
325 | _commit(info->dfile.file); |
326 | } |
327 | #endif |
328 | my_errno=olderror; |
329 | } |
330 | } |
331 | else if (operation) |
332 | share->changed= 1; /* Mark keyfile changed */ |
333 | DBUG_RETURN(error); |
334 | } /* _ma_writeinfo */ |
335 | |
336 | |
337 | /* |
338 | Test if an external process has changed the database |
339 | (Should be called after readinfo) |
340 | */ |
341 | |
342 | int _ma_test_if_changed(register MARIA_HA *info) |
343 | { |
344 | #ifdef MARIA_EXTERNAL_LOCKING |
345 | MARIA_SHARE *share= info->s; |
346 | if (share->state.process != share->last_process || |
347 | share->state.unique != info->last_unique || |
348 | share->state.update_count != info->last_loop) |
349 | { /* Keyfile has changed */ |
350 | DBUG_PRINT("info" ,("index file changed" )); |
351 | if (share->state.process != share->this_process) |
352 | VOID(flush_pagecache_blocks(share->pagecache, &share->kfile, |
353 | FLUSH_RELEASE)); |
354 | share->last_process=share->state.process; |
355 | info->last_unique= share->state.unique; |
356 | info->last_loop= share->state.update_count; |
357 | info->update|= HA_STATE_WRITTEN; /* Must use file on next */ |
358 | info->data_changed= 1; /* For maria_is_changed */ |
359 | return 1; |
360 | } |
361 | #endif |
362 | return (!(info->update & HA_STATE_AKTIV) || |
363 | (info->update & (HA_STATE_WRITTEN | HA_STATE_DELETED | |
364 | HA_STATE_KEY_CHANGED))); |
365 | } /* _ma_test_if_changed */ |
366 | |
367 | |
368 | /* |
369 | Put a mark in the .MAI file that someone is updating the table |
370 | |
371 | DOCUMENTATION |
372 | state.open_count in the .MAI file is used the following way: |
373 | - For the first change of the .MYI file in this process open_count is |
374 | incremented by _ma_mark_file_changed(). (We have a write lock on the file |
375 | when this happens) |
376 | - In maria_close() it's decremented by _ma_decrement_open_count() if it |
377 | was incremented in the same process. |
378 | |
379 | This mean that if we are the only process using the file, the open_count |
380 | tells us if the MARIA file wasn't properly closed. (This is true if |
381 | my_disable_locking is set). |
382 | |
383 | open_count is not maintained on disk for temporary tables. |
384 | */ |
385 | |
386 | #define _MA_ALREADY_MARKED_FILE_CHANGED \ |
387 | ((share->state.changed & STATE_CHANGED) && share->global_changed) |
388 | |
389 | int _ma_mark_file_changed(register MARIA_SHARE *share) |
390 | { |
391 | if (!share->base.born_transactional) |
392 | { |
393 | if (!_MA_ALREADY_MARKED_FILE_CHANGED) |
394 | return _ma_mark_file_changed_now(share); |
395 | } |
396 | else |
397 | { |
398 | /* |
399 | For transactional tables, the table is marked changed when the first page |
400 | is written. Here we just mark the state to be updated so that caller |
401 | can do 'analyze table' and find that is has changed before any pages |
402 | are written. |
403 | */ |
404 | if (! test_all_bits(share->state.changed, |
405 | (STATE_CHANGED | STATE_NOT_ANALYZED | |
406 | STATE_NOT_OPTIMIZED_KEYS))) |
407 | { |
408 | mysql_mutex_lock(&share->intern_lock); |
409 | share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED | |
410 | STATE_NOT_OPTIMIZED_KEYS); |
411 | mysql_mutex_unlock(&share->intern_lock); |
412 | } |
413 | } |
414 | return 0; |
415 | } |
416 | |
417 | int _ma_mark_file_changed_now(register MARIA_SHARE *share) |
418 | { |
419 | uchar buff[3]; |
420 | int error= 1; |
421 | DBUG_ENTER("_ma_mark_file_changed_now" ); |
422 | |
423 | if (_MA_ALREADY_MARKED_FILE_CHANGED) |
424 | DBUG_RETURN(0); |
425 | mysql_mutex_lock(&share->intern_lock); /* recheck under mutex */ |
426 | if (! _MA_ALREADY_MARKED_FILE_CHANGED) |
427 | { |
428 | share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED | |
429 | STATE_NOT_OPTIMIZED_KEYS); |
430 | if (!share->global_changed) |
431 | { |
432 | share->changed= share->global_changed= 1; |
433 | share->state.open_count++; |
434 | } |
435 | /* |
436 | Temp tables don't need an open_count as they are removed on crash. |
437 | In theory transactional tables are fixed by log-based recovery, so don't |
438 | need an open_count either, but if recovery has failed and logs have been |
439 | removed (by maria-force-start-after-recovery-failures), we still need to |
440 | detect dubious tables. |
441 | If we didn't maintain open_count on disk for a table, after a crash |
442 | we wouldn't know if it was closed at crash time (thus does not need a |
443 | check) or not. So we would have to check all tables: overkill. |
444 | */ |
445 | if (!share->temporary) |
446 | { |
447 | mi_int2store(buff,share->state.open_count); |
448 | buff[2]=1; /* Mark that it's changed */ |
449 | if (my_pwrite(share->kfile.file, buff, sizeof(buff), |
450 | sizeof(share->state.header) + |
451 | MARIA_FILE_OPEN_COUNT_OFFSET, |
452 | MYF(MY_NABP))) |
453 | goto err; |
454 | } |
455 | /* Set uuid of file if not yet set (zerofilled file) */ |
456 | if (share->base.born_transactional && |
457 | !(share->state.changed & STATE_NOT_MOVABLE)) |
458 | { |
459 | /* Lock table to current installation */ |
460 | if (_ma_set_uuid(share, 0) || |
461 | (share->state.create_rename_lsn == LSN_NEEDS_NEW_STATE_LSNS && |
462 | _ma_update_state_lsns_sub(share, LSN_IMPOSSIBLE, |
463 | trnman_get_min_trid(), |
464 | TRUE, TRUE))) |
465 | goto err; |
466 | share->state.changed|= STATE_NOT_MOVABLE; |
467 | } |
468 | } |
469 | error= 0; |
470 | err: |
471 | mysql_mutex_unlock(&share->intern_lock); |
472 | DBUG_RETURN(error); |
473 | #undef _MA_ALREADY_MARKED_FILE_CHANGED |
474 | } |
475 | |
476 | /* |
477 | Check that a region is all zero |
478 | |
479 | SYNOPSIS |
480 | check_if_zero() |
481 | pos Start of memory to check |
482 | length length of memory region |
483 | |
484 | NOTES |
485 | Used mainly to detect rows with wrong extent information |
486 | */ |
487 | |
488 | my_bool _ma_check_if_zero(uchar *pos, size_t length) |
489 | { |
490 | uchar *end; |
491 | for (end= pos+ length; pos != end ; pos++) |
492 | if (pos[0] != 0) |
493 | return 1; |
494 | return 0; |
495 | } |
496 | |
497 | /* |
498 | This is only called by close or by extra(HA_FLUSH) if the OS has the pwrite() |
499 | call. In these context the following code should be safe! |
500 | */ |
501 | |
502 | int _ma_decrement_open_count(MARIA_HA *info, my_bool lock_tables) |
503 | { |
504 | uchar buff[2]; |
505 | register MARIA_SHARE *share= info->s; |
506 | int lock_error=0,write_error=0; |
507 | DBUG_ENTER("_ma_decrement_open_count" ); |
508 | |
509 | if (share->global_changed) |
510 | { |
511 | uint old_lock=info->lock_type; |
512 | share->global_changed=0; |
513 | lock_error= (my_disable_locking || ! lock_tables ? 0 : |
514 | maria_lock_database(info, F_WRLCK)); |
515 | /* Its not fatal even if we couldn't get the lock ! */ |
516 | if (share->state.open_count > 0) |
517 | { |
518 | share->state.open_count--; |
519 | share->changed= 1; /* We have to update state */ |
520 | /* |
521 | For temporary tables that will just be deleted, we don't have |
522 | to decrement state. For transactional tables the state will be |
523 | updated in maria_close(). |
524 | */ |
525 | |
526 | if (!share->temporary && !share->now_transactional) |
527 | { |
528 | mi_int2store(buff,share->state.open_count); |
529 | write_error= (int) my_pwrite(share->kfile.file, buff, sizeof(buff), |
530 | sizeof(share->state.header) + |
531 | MARIA_FILE_OPEN_COUNT_OFFSET, |
532 | MYF(MY_NABP)); |
533 | } |
534 | } |
535 | if (!lock_error && !my_disable_locking && lock_tables) |
536 | lock_error=maria_lock_database(info,old_lock); |
537 | } |
538 | DBUG_RETURN(MY_TEST(lock_error || write_error)); |
539 | } |
540 | |
541 | |
542 | /** @brief mark file as crashed */ |
543 | |
544 | void _ma_mark_file_crashed(MARIA_SHARE *share) |
545 | { |
546 | uchar buff[2]; |
547 | DBUG_ENTER("_ma_mark_file_crashed" ); |
548 | |
549 | share->state.changed|= STATE_CRASHED; |
550 | mi_int2store(buff, share->state.changed); |
551 | /* |
552 | We can ignore the errors, as if the mark failed, there isn't anything |
553 | else we can do; The user should already have got an error that the |
554 | table was crashed. |
555 | */ |
556 | (void) my_pwrite(share->kfile.file, buff, sizeof(buff), |
557 | sizeof(share->state.header) + |
558 | MARIA_FILE_CHANGED_OFFSET, |
559 | MYF(MY_NABP)); |
560 | DBUG_VOID_RETURN; |
561 | } |
562 | |
563 | /* |
564 | Handle a fatal error |
565 | |
566 | - Mark the table as crashed |
567 | - Print an error message, if we had not issued an error message before |
568 | that the table had been crashed. |
569 | - set my_errno to error |
570 | - If 'maria_assert_if_crashed_table is set, then assert. |
571 | */ |
572 | |
573 | void _ma_set_fatal_error(MARIA_SHARE *share, int error) |
574 | { |
575 | DBUG_PRINT("error" , ("error: %d" , error)); |
576 | maria_mark_crashed_share(share); |
577 | if (!(share->state.changed & STATE_CRASHED_PRINTED)) |
578 | { |
579 | share->state.changed|= STATE_CRASHED_PRINTED; |
580 | maria_print_error(share, error); |
581 | } |
582 | my_errno= error; |
583 | DBUG_ASSERT(!maria_assert_if_crashed_table); |
584 | } |
585 | |
586 | |
587 | /** |
588 | @brief Set uuid of for a Maria file |
589 | |
590 | @fn _ma_set_uuid() |
591 | @param share Maria share |
592 | @param reset_uuid Instead of setting file to maria_uuid, set it to |
593 | 0 to mark it as movable |
594 | */ |
595 | |
596 | my_bool _ma_set_uuid(MARIA_SHARE *share, my_bool reset_uuid) |
597 | { |
598 | uchar buff[MY_UUID_SIZE], *uuid; |
599 | |
600 | uuid= maria_uuid; |
601 | if (reset_uuid) |
602 | { |
603 | bzero(buff, sizeof(buff)); |
604 | uuid= buff; |
605 | } |
606 | return (my_bool) my_pwrite(share->kfile.file, uuid, MY_UUID_SIZE, |
607 | mi_uint2korr(share->state.header.base_pos), |
608 | MYF(MY_NABP)); |
609 | } |
610 | |