1 | /* Copyright (C) 2006-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc. |
2 | |
3 | This program is free software; you can redistribute it and/or modify |
4 | it under the terms of the GNU General Public License as published by |
5 | the Free Software Foundation; version 2 of the License. |
6 | |
7 | This program is distributed in the hope that it will be useful, |
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
10 | GNU General Public License for more details. |
11 | |
12 | You should have received a copy of the GNU General Public License |
13 | along with this program; if not, write to the Free Software |
14 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ |
15 | |
16 | |
17 | #include <my_global.h> |
18 | #include <my_sys.h> |
19 | #include <m_string.h> |
20 | #include "trnman.h" |
21 | #include "ma_checkpoint.h" |
22 | #include "ma_control_file.h" |
23 | |
24 | /* |
25 | status variables: |
26 | how many trns in the active list currently, |
27 | in the committed list currently, allocated since startup. |
28 | */ |
29 | uint trnman_active_transactions, trnman_committed_transactions, |
30 | trnman_allocated_transactions; |
31 | |
32 | #ifdef WORKAROUND_GCC_4_3_2_BUG |
33 | volatile |
34 | #endif |
35 | /* list of active transactions in the trid order */ |
36 | static TRN active_list_min, active_list_max; |
37 | /* list of committed transactions in the trid order */ |
38 | static TRN committed_list_min, committed_list_max; |
39 | |
40 | /* a counter, used to generate transaction ids */ |
41 | static TrID global_trid_generator; |
42 | |
43 | /* |
44 | The minimum existing transaction id for trnman_get_min_trid() |
45 | The default value is used when transaction manager not initialize; |
46 | Probably called from maria_chk |
47 | */ |
48 | static TrID trid_min_read_from= MAX_TRID; |
49 | |
50 | /* the mutex for everything above */ |
51 | static mysql_mutex_t LOCK_trn_list; |
52 | |
53 | /* LIFO pool of unused TRN structured for reuse */ |
54 | static TRN *pool; |
55 | |
56 | /* a hash for committed transactions that maps trid to a TRN structure */ |
57 | static LF_HASH trid_to_trn; |
58 | |
59 | /* an array that maps short_id of an active transaction to a TRN structure */ |
60 | static TRN **short_trid_to_active_trn; |
61 | |
62 | /* locks for short_trid_to_active_trn and pool */ |
63 | static my_bool default_trnman_end_trans_hook(TRN *, my_bool, my_bool); |
64 | static void trnman_free_trn(TRN *); |
65 | |
66 | my_bool (*trnman_end_trans_hook)(TRN *, my_bool, my_bool)= |
67 | default_trnman_end_trans_hook; |
68 | |
69 | /* |
70 | Simple interface functions |
71 | QQ: if they stay so simple, should we make them inline? |
72 | */ |
73 | |
74 | uint trnman_increment_locked_tables(TRN *trn) |
75 | { |
76 | return trn->locked_tables++; |
77 | } |
78 | |
79 | uint trnman_has_locked_tables(TRN *trn) |
80 | { |
81 | return trn->locked_tables; |
82 | } |
83 | |
84 | uint trnman_decrement_locked_tables(TRN *trn) |
85 | { |
86 | return --trn->locked_tables; |
87 | } |
88 | |
89 | void trnman_reset_locked_tables(TRN *trn, uint locked_tables) |
90 | { |
91 | trn->locked_tables= locked_tables; |
92 | } |
93 | |
94 | #ifdef EXTRA_DEBUG |
95 | uint16 trnman_get_flags(TRN *trn) |
96 | { |
97 | return trn->flags; |
98 | } |
99 | |
100 | void trnman_set_flags(TRN *trn, uint16 flags) |
101 | { |
102 | trn->flags= flags; |
103 | } |
104 | #endif |
105 | |
106 | /** Wake up threads waiting for this transaction */ |
107 | static void wt_thd_release_self(TRN *trn) |
108 | { |
109 | if (trn->wt) |
110 | { |
111 | WT_RESOURCE_ID rc; |
112 | rc.type= &ma_rc_dup_unique; |
113 | rc.value= (intptr)trn; |
114 | wt_thd_release(trn->wt, & rc); |
115 | trn->wt= 0; |
116 | } |
117 | } |
118 | |
119 | static my_bool |
120 | default_trnman_end_trans_hook(TRN *trn __attribute__ ((unused)), |
121 | my_bool commit __attribute__ ((unused)), |
122 | my_bool active_transactions |
123 | __attribute__ ((unused))) |
124 | { |
125 | return 0; |
126 | } |
127 | |
128 | |
129 | static uchar *trn_get_hash_key(const uchar *trn, size_t *len, |
130 | my_bool unused __attribute__ ((unused))) |
131 | { |
132 | *len= sizeof(TrID); |
133 | return (uchar *) & ((*((TRN **)trn))->trid); |
134 | } |
135 | |
136 | |
137 | /** |
138 | @brief Initializes transaction manager. |
139 | |
140 | @param initial_trid Generated TrIDs will start from initial_trid+1. |
141 | |
142 | @return Operation status |
143 | @retval 0 OK |
144 | @retval !=0 Error |
145 | */ |
146 | |
147 | int trnman_init(TrID initial_trid) |
148 | { |
149 | DBUG_ENTER("trnman_init" ); |
150 | DBUG_PRINT("enter" , ("initial_trid: %lu" , (ulong) initial_trid)); |
151 | |
152 | short_trid_to_active_trn= (TRN **)my_malloc(SHORT_TRID_MAX*sizeof(TRN*), |
153 | MYF(MY_WME|MY_ZEROFILL)); |
154 | if (unlikely(!short_trid_to_active_trn)) |
155 | DBUG_RETURN(1); |
156 | short_trid_to_active_trn--; /* min short_id is 1 */ |
157 | |
158 | /* |
159 | Initialize lists. |
160 | active_list_max.min_read_from must be larger than any trid, |
161 | so that when an active list is empty we would could free |
162 | all committed list. |
163 | And committed_list_max itself can not be freed so |
164 | committed_list_max.commit_trid must not be smaller that |
165 | active_list_max.min_read_from |
166 | */ |
167 | |
168 | active_list_max.trid= active_list_min.trid= 0; |
169 | active_list_max.min_read_from= MAX_TRID; |
170 | active_list_max.next= active_list_min.prev= 0; |
171 | active_list_max.prev= &active_list_min; |
172 | active_list_min.next= &active_list_max; |
173 | |
174 | committed_list_max.commit_trid= MAX_TRID; |
175 | committed_list_max.next= committed_list_min.prev= 0; |
176 | committed_list_max.prev= &committed_list_min; |
177 | committed_list_min.next= &committed_list_max; |
178 | |
179 | trnman_active_transactions= 0; |
180 | trnman_committed_transactions= 0; |
181 | trnman_allocated_transactions= 0; |
182 | /* This is needed for recovery and repair */ |
183 | dummy_transaction_object.min_read_from= ~(TrID) 0; |
184 | dummy_transaction_object.first_undo_lsn= TRANSACTION_LOGGED_LONG_ID; |
185 | |
186 | pool= 0; |
187 | global_trid_generator= initial_trid; |
188 | trid_min_read_from= initial_trid; |
189 | lf_hash_init(&trid_to_trn, sizeof(TRN*), LF_HASH_UNIQUE, |
190 | 0, 0, trn_get_hash_key, 0); |
191 | DBUG_PRINT("info" , ("mysql_mutex_init LOCK_trn_list" )); |
192 | mysql_mutex_init(key_LOCK_trn_list, &LOCK_trn_list, MY_MUTEX_INIT_FAST); |
193 | |
194 | DBUG_RETURN(0); |
195 | } |
196 | |
197 | /* |
198 | NOTE |
199 | this could only be called in the "idle" state - no transaction can be |
200 | running. See asserts below. |
201 | */ |
202 | void trnman_destroy() |
203 | { |
204 | DBUG_ENTER("trnman_destroy" ); |
205 | |
206 | if (short_trid_to_active_trn == NULL) /* trnman already destroyed */ |
207 | DBUG_VOID_RETURN; |
208 | DBUG_ASSERT(trid_to_trn.count == 0); |
209 | DBUG_ASSERT(trnman_active_transactions == 0); |
210 | DBUG_ASSERT(trnman_committed_transactions == 0); |
211 | DBUG_ASSERT(active_list_max.prev == &active_list_min); |
212 | DBUG_ASSERT(active_list_min.next == &active_list_max); |
213 | DBUG_ASSERT(committed_list_max.prev == &committed_list_min); |
214 | DBUG_ASSERT(committed_list_min.next == &committed_list_max); |
215 | while (pool) |
216 | { |
217 | TRN *trn= pool; |
218 | pool= pool->next; |
219 | DBUG_ASSERT(trn->wt == NULL); |
220 | mysql_mutex_destroy(&trn->state_lock); |
221 | my_free(trn); |
222 | } |
223 | lf_hash_destroy(&trid_to_trn); |
224 | DBUG_PRINT("info" , ("mysql_mutex_destroy LOCK_trn_list" )); |
225 | mysql_mutex_destroy(&LOCK_trn_list); |
226 | my_free(short_trid_to_active_trn+1); |
227 | short_trid_to_active_trn= NULL; |
228 | |
229 | DBUG_VOID_RETURN; |
230 | } |
231 | |
232 | /* |
233 | NOTE |
234 | TrID is limited to 6 bytes. Initial value of the generator |
235 | is set by the recovery code - being read from the last checkpoint |
236 | (or 1 on a first run). |
237 | */ |
238 | static TrID new_trid() |
239 | { |
240 | DBUG_ENTER("new_trid" ); |
241 | DBUG_ASSERT(global_trid_generator < 0xffffffffffffLL); |
242 | DBUG_PRINT("info" , ("mysql_mutex_assert_owner LOCK_trn_list" )); |
243 | mysql_mutex_assert_owner(&LOCK_trn_list); |
244 | DBUG_RETURN(++global_trid_generator); |
245 | } |
246 | |
247 | static uint get_short_trid(TRN *trn) |
248 | { |
249 | int i= (int) ((global_trid_generator + (intptr)trn) * 312089 % |
250 | SHORT_TRID_MAX) + 1; |
251 | uint res=0; |
252 | |
253 | for ( ; !res ; i= 1) |
254 | { |
255 | for ( ; i <= SHORT_TRID_MAX; i++) /* the range is [1..SHORT_TRID_MAX] */ |
256 | { |
257 | void *tmp= NULL; |
258 | if (short_trid_to_active_trn[i] == NULL && |
259 | my_atomic_casptr((void **)&short_trid_to_active_trn[i], &tmp, trn)) |
260 | { |
261 | res= i; |
262 | break; |
263 | } |
264 | } |
265 | } |
266 | return res; |
267 | } |
268 | |
269 | /** |
270 | Allocates and initialzies a new TRN object |
271 | |
272 | @note the 'wt' parameter can only be 0 in a single-threaded code (or, |
273 | generally, where threads cannot block each other), otherwise the |
274 | first call to the deadlock detector will sigsegv. |
275 | */ |
276 | |
277 | TRN *trnman_new_trn(WT_THD *wt) |
278 | { |
279 | int res; |
280 | TRN *trn; |
281 | union { TRN *trn; void *v; } tmp; |
282 | DBUG_ENTER("trnman_new_trn" ); |
283 | |
284 | /* |
285 | we have a mutex, to do simple things under it - allocate a TRN, |
286 | increment trnman_active_transactions, set trn->min_read_from. |
287 | |
288 | Note that all the above is fast. generating short_id may be slow, |
289 | as it involves scanning a large array - so it's done outside of the |
290 | mutex. |
291 | */ |
292 | |
293 | DBUG_PRINT("info" , ("mysql_mutex_lock LOCK_trn_list" )); |
294 | mysql_mutex_lock(&LOCK_trn_list); |
295 | |
296 | /* Allocating a new TRN structure */ |
297 | tmp.trn= pool; |
298 | /* |
299 | Popping an unused TRN from the pool |
300 | (ABA isn't possible, we're behind a mutex |
301 | */ |
302 | while (tmp.trn && !my_atomic_casptr((void **)(char*) &pool, &tmp.v, |
303 | (void *)tmp.trn->next)) |
304 | /* no-op */; |
305 | |
306 | /* Nothing in the pool ? Allocate a new one */ |
307 | if (!(trn= tmp.trn)) |
308 | { |
309 | /* |
310 | trn should be completely initalized at create time to allow |
311 | one to keep a known state on it. |
312 | (Like redo_lns, which is assumed to be 0 at start of row handling |
313 | and reset to zero before end of row handling) |
314 | */ |
315 | trn= (TRN *)my_malloc(sizeof(TRN), MYF(MY_WME | MY_ZEROFILL)); |
316 | if (unlikely(!trn)) |
317 | { |
318 | DBUG_PRINT("info" , ("mysql_mutex_unlock LOCK_trn_list" )); |
319 | mysql_mutex_unlock(&LOCK_trn_list); |
320 | return 0; |
321 | } |
322 | trnman_allocated_transactions++; |
323 | mysql_mutex_init(key_TRN_state_lock, &trn->state_lock, MY_MUTEX_INIT_FAST); |
324 | } |
325 | trn->wt= wt; |
326 | trn->pins= lf_hash_get_pins(&trid_to_trn); |
327 | if (!trn->pins) |
328 | { |
329 | trnman_free_trn(trn); |
330 | mysql_mutex_unlock(&LOCK_trn_list); |
331 | return 0; |
332 | } |
333 | |
334 | trnman_active_transactions++; |
335 | |
336 | trn->min_read_from= active_list_min.next->trid; |
337 | |
338 | trn->trid= new_trid(); |
339 | |
340 | trn->next= &active_list_max; |
341 | trn->prev= active_list_max.prev; |
342 | active_list_max.prev= trn->prev->next= trn; |
343 | trid_min_read_from= active_list_min.next->min_read_from; |
344 | DBUG_PRINT("info" , ("mysql_mutex_unlock LOCK_trn_list" )); |
345 | mysql_mutex_unlock(&LOCK_trn_list); |
346 | |
347 | if (unlikely(!trn->min_read_from)) |
348 | { |
349 | /* |
350 | We are the only transaction. Set min_read_from so that we can read |
351 | our own rows |
352 | */ |
353 | trn->min_read_from= trn->trid + 1; |
354 | } |
355 | |
356 | /* no other transaction can read changes done by this one */ |
357 | trn->commit_trid= MAX_TRID; |
358 | trn->rec_lsn= trn->undo_lsn= trn->first_undo_lsn= 0; |
359 | trn->used_tables= 0; |
360 | |
361 | trn->locked_tables= 0; |
362 | trn->flags= 0; |
363 | |
364 | /* |
365 | only after the following function TRN is considered initialized, |
366 | so it must be done the last |
367 | */ |
368 | mysql_mutex_lock(&trn->state_lock); |
369 | trn->short_id= get_short_trid(trn); |
370 | mysql_mutex_unlock(&trn->state_lock); |
371 | |
372 | res= lf_hash_insert(&trid_to_trn, trn->pins, &trn); |
373 | DBUG_ASSERT(res <= 0); |
374 | if (res) |
375 | { |
376 | trnman_end_trn(trn, 0); |
377 | return 0; |
378 | } |
379 | |
380 | DBUG_PRINT("exit" , ("trn: %p trid: 0x%lu min_read_from: 0x%lu" , |
381 | trn, (ulong) trn->trid, (ulong) trn->min_read_from)); |
382 | |
383 | DBUG_RETURN(trn); |
384 | } |
385 | |
386 | /* |
387 | remove a trn from the active list. |
388 | if necessary - move to committed list and set commit_trid |
389 | |
390 | NOTE |
391 | Locks are released at the end. In particular, after placing the |
392 | transaction in commit list, and after setting commit_trid. It's |
393 | important, as commit_trid affects visibility. Locks don't affect |
394 | anything they simply delay execution of other threads - they could be |
395 | released arbitrarily late. In other words, when locks are released it |
396 | serves as a start banner for other threads, they start to run. So |
397 | everything they may need must be ready at that point. |
398 | |
399 | RETURN |
400 | 0 ok |
401 | 1 error |
402 | */ |
403 | my_bool trnman_end_trn(TRN *trn, my_bool commit) |
404 | { |
405 | int res= 1; |
406 | uint16 cached_short_id= trn->short_id; /* we have to cache it, see below */ |
407 | TRN *free_me= 0; |
408 | LF_PINS *pins= trn->pins; |
409 | DBUG_ENTER("trnman_end_trn" ); |
410 | DBUG_PRINT("enter" , ("trn: %p commit: %d" , trn, commit)); |
411 | |
412 | /* if a rollback, all UNDO records should have been executed */ |
413 | DBUG_ASSERT(commit || trn->undo_lsn == 0); |
414 | DBUG_ASSERT(trn != &dummy_transaction_object); |
415 | DBUG_PRINT("info" , ("mysql_mutex_lock LOCK_trn_list" )); |
416 | |
417 | mysql_mutex_lock(&LOCK_trn_list); |
418 | |
419 | /* remove from active list */ |
420 | trn->next->prev= trn->prev; |
421 | trn->prev->next= trn->next; |
422 | |
423 | /* |
424 | if trn was the oldest active transaction, now that it goes away there |
425 | may be committed transactions in the list which no active transaction |
426 | needs to bother about - clean up the committed list |
427 | */ |
428 | if (trn->prev == &active_list_min) |
429 | { |
430 | uint free_me_count; |
431 | TRN *t; |
432 | for (t= committed_list_min.next, free_me_count= 0; |
433 | t->commit_trid < active_list_min.next->min_read_from; |
434 | t= t->next, free_me_count++) /* no-op */; |
435 | |
436 | DBUG_ASSERT((t != committed_list_min.next && free_me_count > 0) || |
437 | (t == committed_list_min.next && free_me_count == 0)); |
438 | /* found transactions committed before the oldest active one */ |
439 | if (t != committed_list_min.next) |
440 | { |
441 | free_me= committed_list_min.next; |
442 | committed_list_min.next= t; |
443 | t->prev->next= 0; |
444 | t->prev= &committed_list_min; |
445 | trnman_committed_transactions-= free_me_count; |
446 | } |
447 | } |
448 | |
449 | mysql_mutex_lock(&trn->state_lock); |
450 | if (commit) |
451 | trn->commit_trid= global_trid_generator; |
452 | wt_thd_release_self(trn); |
453 | mysql_mutex_unlock(&trn->state_lock); |
454 | |
455 | /* |
456 | if transaction is committed and it was not the only active transaction - |
457 | add it to the committed list |
458 | */ |
459 | if (commit && active_list_min.next != &active_list_max) |
460 | { |
461 | trn->next= &committed_list_max; |
462 | trn->prev= committed_list_max.prev; |
463 | trnman_committed_transactions++; |
464 | committed_list_max.prev= trn->prev->next= trn; |
465 | } |
466 | else |
467 | { |
468 | trn->next= free_me; |
469 | free_me= trn; |
470 | } |
471 | trid_min_read_from= active_list_min.next->min_read_from; |
472 | |
473 | if ((*trnman_end_trans_hook)(trn, commit, |
474 | active_list_min.next != &active_list_max)) |
475 | res= -1; |
476 | trnman_active_transactions--; |
477 | |
478 | DBUG_PRINT("info" , ("mysql_mutex_unlock LOCK_trn_list" )); |
479 | mysql_mutex_unlock(&LOCK_trn_list); |
480 | |
481 | /* |
482 | the rest is done outside of a critical section |
483 | |
484 | note that we don't own trn anymore, it may be in a shared list now. |
485 | Thus, we cannot dereference it, and must use cached_short_id below. |
486 | */ |
487 | my_atomic_storeptr((void **)&short_trid_to_active_trn[cached_short_id], 0); |
488 | |
489 | /* |
490 | we, under the mutex, removed going-in-free_me transactions from the |
491 | active and committed lists, thus nobody else may see them when it scans |
492 | those lists, and thus nobody may want to free them. Now we don't |
493 | need a mutex to access free_me list |
494 | */ |
495 | /* QQ: send them to the purge thread */ |
496 | while (free_me) |
497 | { |
498 | TRN *t= free_me; |
499 | free_me= free_me->next; |
500 | |
501 | /* ignore OOM. it's harmless, and we can do nothing here anyway */ |
502 | (void)lf_hash_delete(&trid_to_trn, pins, &t->trid, sizeof(TrID)); |
503 | |
504 | trnman_free_trn(t); |
505 | } |
506 | |
507 | lf_hash_put_pins(pins); |
508 | |
509 | DBUG_RETURN(res < 0); |
510 | } |
511 | |
512 | /* |
513 | free a trn (add to the pool, that is) |
514 | note - we can never really free() a TRN if there's at least one other |
515 | running transaction - see, e.g., how lock waits are implemented in |
516 | lockman.c |
517 | The same is true for other lock-free data structures too. We may need some |
518 | kind of FLUSH command to reset them all - ensuring that no transactions are |
519 | running. It may even be called automatically on checkpoints if no |
520 | transactions are running. |
521 | */ |
522 | static void trnman_free_trn(TRN *trn) |
523 | { |
524 | /* |
525 | union is to solve strict aliasing issue. |
526 | without it gcc 3.4.3 doesn't notice that updating *(void **)&tmp |
527 | modifies the value of tmp. |
528 | */ |
529 | union { TRN *trn; void *v; } tmp; |
530 | |
531 | mysql_mutex_lock(&trn->state_lock); |
532 | trn->short_id= 0; |
533 | mysql_mutex_unlock(&trn->state_lock); |
534 | |
535 | tmp.trn= pool; |
536 | |
537 | do |
538 | { |
539 | /* |
540 | without this volatile cast gcc-3.4.4 moves the assignment |
541 | down after the loop at -O2 |
542 | */ |
543 | *(TRN * volatile *)&(trn->next)= tmp.trn; |
544 | } while (!my_atomic_casptr((void **)(char*)&pool, &tmp.v, trn)); |
545 | } |
546 | |
547 | /* |
548 | NOTE |
549 | here we access the hash in a lock-free manner. |
550 | It's safe, a 'found' TRN can never be freed/reused before we access it. |
551 | In fact, it cannot be freed before 'trn' ends, because a 'found' TRN |
552 | can only be removed from the hash when: |
553 | found->commit_trid < ALL (trn->min_read_from) |
554 | that is, at least |
555 | found->commit_trid < trn->min_read_from |
556 | but |
557 | found->trid >= trn->min_read_from |
558 | and |
559 | found->commit_trid > found->trid |
560 | |
561 | RETURN |
562 | 1 can |
563 | 0 cannot |
564 | -1 error (OOM) |
565 | */ |
566 | int trnman_can_read_from(TRN *trn, TrID trid) |
567 | { |
568 | TRN **found; |
569 | my_bool can; |
570 | |
571 | if (trid < trn->min_read_from) |
572 | return 1; /* Row is visible by all transactions in the system */ |
573 | |
574 | if (trid >= trn->trid) |
575 | { |
576 | /* |
577 | We have now two cases |
578 | trid > trn->trid, in which case the row is from a new transaction |
579 | and not visible, in which case we should return 0. |
580 | trid == trn->trid in which case the row is from the current transaction |
581 | and we should return 1 |
582 | */ |
583 | return trid == trn->trid; |
584 | } |
585 | |
586 | found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid)); |
587 | if (found == NULL) |
588 | return 0; /* not in the hash of transactions = cannot read */ |
589 | if (found == MY_ERRPTR) |
590 | return -1; |
591 | |
592 | can= (*found)->commit_trid < trn->trid; |
593 | lf_hash_search_unpin(trn->pins); |
594 | return can; |
595 | } |
596 | |
597 | /** |
598 | Finds a TRN by its TrID |
599 | |
600 | @param trn current trn. Needed for pinning pointers (see lf_pin) |
601 | @param trid trid to search for |
602 | |
603 | @return found trn or 0 |
604 | |
605 | @note that trn is returned with its state locked! |
606 | */ |
607 | TRN *trnman_trid_to_trn(TRN *trn, TrID trid) |
608 | { |
609 | TRN **found; |
610 | |
611 | if (trid < trn->min_read_from) |
612 | return 0; /* it's committed eons ago */ |
613 | |
614 | found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid)); |
615 | if (found == NULL || found == MY_ERRPTR) |
616 | return 0; /* no luck */ |
617 | |
618 | /* we've found something */ |
619 | mysql_mutex_lock(&(*found)->state_lock); |
620 | |
621 | if ((*found)->short_id == 0) |
622 | { |
623 | mysql_mutex_unlock(&(*found)->state_lock); |
624 | lf_hash_search_unpin(trn->pins); |
625 | return 0; /* but it was a ghost */ |
626 | } |
627 | lf_hash_search_unpin(trn->pins); |
628 | |
629 | /* Gotcha! */ |
630 | return *found; |
631 | } |
632 | |
633 | /* TODO: the stubs below are waiting for savepoints to be implemented */ |
634 | |
635 | void trnman_new_statement(TRN *trn __attribute__ ((unused))) |
636 | { |
637 | } |
638 | |
639 | void trnman_rollback_statement(TRN *trn __attribute__ ((unused))) |
640 | { |
641 | } |
642 | |
643 | |
644 | /** |
645 | @brief Allocates buffers and stores in them some info about transactions |
646 | |
647 | Does the allocation because the caller cannot know the size itself. |
648 | Memory freeing is to be done by the caller (if the "str" member of the |
649 | LEX_STRING is not NULL). |
650 | The caller has the intention of doing checkpoints. |
651 | |
652 | @param[out] str_act pointer to where the allocated buffer, |
653 | and its size, will be put; buffer will be filled |
654 | with info about active transactions |
655 | @param[out] str_com pointer to where the allocated buffer, |
656 | and its size, will be put; buffer will be filled |
657 | with info about committed transactions |
658 | @param[out] min_first_undo_lsn pointer to where the minimum |
659 | first_undo_lsn of all transactions will be put |
660 | |
661 | @return Operation status |
662 | @retval 0 OK |
663 | @retval 1 Error |
664 | */ |
665 | |
666 | my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com, |
667 | LSN *min_rec_lsn, LSN *min_first_undo_lsn) |
668 | { |
669 | my_bool error; |
670 | TRN *trn; |
671 | char *ptr; |
672 | uint stored_transactions= 0; |
673 | LSN minimum_rec_lsn= LSN_MAX, minimum_first_undo_lsn= LSN_MAX; |
674 | DBUG_ENTER("trnman_collect_transactions" ); |
675 | |
676 | DBUG_ASSERT((NULL == str_act->str) && (NULL == str_com->str)); |
677 | |
678 | /* validate the use of read_non_atomic() in general: */ |
679 | compile_time_assert((sizeof(LSN) == 8) && (sizeof(LSN_WITH_FLAGS) == 8)); |
680 | mysql_mutex_lock(&LOCK_trn_list); |
681 | str_act->length= 2 + /* number of active transactions */ |
682 | LSN_STORE_SIZE + /* minimum of their rec_lsn */ |
683 | TRANSID_SIZE + /* current TrID generator value */ |
684 | (2 + /* short id */ |
685 | 6 + /* long id */ |
686 | LSN_STORE_SIZE + /* undo_lsn */ |
687 | #ifdef MARIA_VERSIONING /* not enabled yet */ |
688 | LSN_STORE_SIZE + /* undo_purge_lsn */ |
689 | #endif |
690 | LSN_STORE_SIZE /* first_undo_lsn */ |
691 | ) * trnman_active_transactions; |
692 | str_com->length= 4 + /* number of committed transactions */ |
693 | (6 + /* long id */ |
694 | #ifdef MARIA_VERSIONING /* not enabled yet */ |
695 | LSN_STORE_SIZE + /* undo_purge_lsn */ |
696 | #endif |
697 | LSN_STORE_SIZE /* first_undo_lsn */ |
698 | ) * trnman_committed_transactions; |
699 | if ((NULL == (str_act->str= my_malloc(str_act->length, MYF(MY_WME)))) || |
700 | (NULL == (str_com->str= my_malloc(str_com->length, MYF(MY_WME))))) |
701 | goto err; |
702 | /* First, the active transactions */ |
703 | ptr= str_act->str + 2 + LSN_STORE_SIZE; |
704 | transid_store(ptr, global_trid_generator); |
705 | ptr+= TRANSID_SIZE; |
706 | for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next) |
707 | { |
708 | uint sid; |
709 | LSN rec_lsn, undo_lsn, first_undo_lsn; |
710 | mysql_mutex_lock(&trn->state_lock); |
711 | sid= trn->short_id; |
712 | mysql_mutex_unlock(&trn->state_lock); |
713 | if (sid == 0) |
714 | { |
715 | /* |
716 | Not even inited, has done nothing. Or it is the |
717 | dummy_transaction_object, which does only non-transactional |
718 | immediate-sync operations (CREATE/DROP/RENAME/REPAIR TABLE), and so |
719 | can be forgotten for Checkpoint. |
720 | */ |
721 | continue; |
722 | } |
723 | /* needed for low-water mark calculation */ |
724 | if (((rec_lsn= lsn_read_non_atomic(trn->rec_lsn)) > 0) && |
725 | (cmp_translog_addr(rec_lsn, minimum_rec_lsn) < 0)) |
726 | minimum_rec_lsn= rec_lsn; |
727 | /* |
728 | If trn has not logged LOGREC_LONG_TRANSACTION_ID, this trn will be |
729 | discovered when seeing that log record which is for sure located after |
730 | checkpoint_start_log_horizon. |
731 | */ |
732 | if ((LSN_WITH_FLAGS_TO_FLAGS(trn->first_undo_lsn) & |
733 | TRANSACTION_LOGGED_LONG_ID) == 0) |
734 | continue; |
735 | /* |
736 | On the other hand, if undo_lsn is LSN_IMPOSSIBLE, trn may later log |
737 | records; so we must include trn in the checkpoint now, because we cannot |
738 | count on LOGREC_LONG_TRANSACTION_ID (as we are already past it). |
739 | */ |
740 | undo_lsn= trn->undo_lsn; |
741 | stored_transactions++; |
742 | int2store(ptr, sid); |
743 | ptr+= 2; |
744 | int6store(ptr, trn->trid); |
745 | ptr+= 6; |
746 | lsn_store(ptr, undo_lsn); /* needed for rollback */ |
747 | ptr+= LSN_STORE_SIZE; |
748 | /* needed for low-water mark calculation */ |
749 | if (((first_undo_lsn= lsn_read_non_atomic(trn->first_undo_lsn)) > 0) && |
750 | (cmp_translog_addr(first_undo_lsn, minimum_first_undo_lsn) < 0)) |
751 | minimum_first_undo_lsn= first_undo_lsn; |
752 | lsn_store(ptr, first_undo_lsn); |
753 | ptr+= LSN_STORE_SIZE; |
754 | #ifdef MARIA_VERSIONING /* not enabled yet */ |
755 | /* to know where purging should start (last delete of this trn) */ |
756 | lsn_store(ptr, trn->undo_purge_lsn); |
757 | ptr+= LSN_STORE_SIZE; |
758 | #endif |
759 | /** |
760 | @todo RECOVERY: add a comment explaining why we can dirtily read some |
761 | vars, inspired by the text of "assumption 8" in WL#3072 |
762 | */ |
763 | } |
764 | str_act->length= ptr - str_act->str; /* as we maybe over-estimated */ |
765 | ptr= str_act->str; |
766 | DBUG_PRINT("info" ,("collected %u active transactions" , |
767 | (uint)stored_transactions)); |
768 | int2store(ptr, stored_transactions); |
769 | ptr+= 2; |
770 | /* this LSN influences how REDOs for any page can be ignored by Recovery */ |
771 | lsn_store(ptr, minimum_rec_lsn); |
772 | /* one day there will also be a list of prepared transactions */ |
773 | /* do the same for committed ones */ |
774 | ptr= str_com->str; |
775 | int4store(ptr, trnman_committed_transactions); |
776 | ptr+= 4; |
777 | DBUG_PRINT("info" ,("collected %u committed transactions" , |
778 | (uint)trnman_committed_transactions)); |
779 | for (trn= committed_list_min.next; trn != &committed_list_max; |
780 | trn= trn->next) |
781 | { |
782 | LSN first_undo_lsn; |
783 | int6store(ptr, trn->trid); |
784 | ptr+= 6; |
785 | #ifdef MARIA_VERSIONING /* not enabled yet */ |
786 | lsn_store(ptr, trn->undo_purge_lsn); |
787 | ptr+= LSN_STORE_SIZE; |
788 | #endif |
789 | first_undo_lsn= LSN_WITH_FLAGS_TO_LSN(trn->first_undo_lsn); |
790 | if (cmp_translog_addr(first_undo_lsn, minimum_first_undo_lsn) < 0) |
791 | minimum_first_undo_lsn= first_undo_lsn; |
792 | lsn_store(ptr, first_undo_lsn); |
793 | ptr+= LSN_STORE_SIZE; |
794 | } |
795 | /* |
796 | TODO: if we see there exists no transaction (active and committed) we can |
797 | tell the lock-free structures to do some freeing (my_free()). |
798 | */ |
799 | error= 0; |
800 | *min_rec_lsn= minimum_rec_lsn; |
801 | *min_first_undo_lsn= minimum_first_undo_lsn; |
802 | goto end; |
803 | err: |
804 | error= 1; |
805 | end: |
806 | mysql_mutex_unlock(&LOCK_trn_list); |
807 | DBUG_RETURN(error); |
808 | } |
809 | |
810 | |
811 | TRN *trnman_recreate_trn_from_recovery(uint16 shortid, TrID longid) |
812 | { |
813 | TrID old_trid_generator= global_trid_generator; |
814 | TRN *trn; |
815 | DBUG_ASSERT(maria_in_recovery && !maria_multi_threaded); |
816 | global_trid_generator= longid-1; /* force a correct trid in the new trn */ |
817 | if (unlikely((trn= trnman_new_trn(NULL)) == NULL)) |
818 | return NULL; |
819 | /* deallocate excessive allocations of trnman_new_trn() */ |
820 | global_trid_generator= old_trid_generator; |
821 | set_if_bigger(global_trid_generator, longid); |
822 | short_trid_to_active_trn[trn->short_id]= 0; |
823 | DBUG_ASSERT(short_trid_to_active_trn[shortid] == NULL); |
824 | short_trid_to_active_trn[shortid]= trn; |
825 | trn->short_id= shortid; |
826 | return trn; |
827 | } |
828 | |
829 | |
830 | TRN *trnman_get_any_trn() |
831 | { |
832 | TRN *trn= active_list_min.next; |
833 | return (trn != &active_list_max) ? trn : NULL; |
834 | } |
835 | |
836 | |
837 | /** |
838 | Returns the minimum existing transaction id. May return a too small |
839 | number in race conditions, but this is ok as the value is used to |
840 | remove not visible transid from index/rows. |
841 | */ |
842 | |
843 | TrID trnman_get_min_trid() |
844 | { |
845 | return trid_min_read_from; |
846 | } |
847 | |
848 | |
849 | /** |
850 | Returns the minimum possible transaction id |
851 | |
852 | @notes |
853 | If there is no transactions running, returns number for next running |
854 | transaction. |
855 | If one has an active transaction, the returned number will be less or |
856 | equal to this. If one is not running in a transaction one will ge the |
857 | number for the next started transaction. This is used in create table |
858 | to get a safe minimum trid to use. |
859 | */ |
860 | |
861 | TrID trnman_get_min_safe_trid() |
862 | { |
863 | TrID trid; |
864 | mysql_mutex_lock(&LOCK_trn_list); |
865 | trid= MY_MIN(active_list_min.next->min_read_from, |
866 | global_trid_generator); |
867 | mysql_mutex_unlock(&LOCK_trn_list); |
868 | return trid; |
869 | } |
870 | |
871 | |
872 | /** |
873 | Returns maximum transaction id given to a transaction so far. |
874 | */ |
875 | |
876 | TrID trnman_get_max_trid() |
877 | { |
878 | TrID id; |
879 | if (short_trid_to_active_trn == NULL) |
880 | return 0; |
881 | mysql_mutex_lock(&LOCK_trn_list); |
882 | id= global_trid_generator; |
883 | mysql_mutex_unlock(&LOCK_trn_list); |
884 | return id; |
885 | } |
886 | |
887 | /** |
888 | @brief Check if there exist an active transaction between two commit_id's |
889 | |
890 | @todo |
891 | Improve speed of this. |
892 | - Store transactions in tree or skip list |
893 | - Have function to copying all active transaction id's to b-tree |
894 | and use b-tree for checking states. This could be a big win |
895 | for checkpoint that will call this function for a lot of objects. |
896 | |
897 | @return |
898 | 0 No transaction exists |
899 | 1 There is at least on active transaction in the given range |
900 | */ |
901 | |
902 | my_bool trnman_exists_active_transactions(TrID min_id, TrID max_id, |
903 | my_bool trnman_is_locked) |
904 | { |
905 | TRN *trn; |
906 | my_bool ret= 0; |
907 | |
908 | if (!trnman_is_locked) |
909 | mysql_mutex_lock(&LOCK_trn_list); |
910 | mysql_mutex_assert_owner(&LOCK_trn_list); |
911 | for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next) |
912 | { |
913 | /* |
914 | We use <= for max_id as max_id is a commit_trid and trn->trid |
915 | is transaction id. When calculating commit_trid we use the |
916 | current value of global_trid_generator. global_trid_generator is |
917 | incremented for each new transaction. |
918 | |
919 | For example, assuming we have |
920 | min_id = 5 |
921 | max_id = 10 |
922 | |
923 | A trid of value 5 can't see the history event between 5 & 10 |
924 | at it vas started before min_id 5 was committed. |
925 | A trid of value 10 can't see the next history event (max_id = 10) |
926 | as it started before this was committed. In this case it must use |
927 | the this event. |
928 | */ |
929 | if (trn->trid > min_id && trn->trid <= max_id) |
930 | { |
931 | ret= 1; |
932 | break; |
933 | } |
934 | } |
935 | if (!trnman_is_locked) |
936 | mysql_mutex_unlock(&LOCK_trn_list); |
937 | return ret; |
938 | } |
939 | |
940 | |
941 | /** |
942 | lock transaction list |
943 | */ |
944 | |
945 | void trnman_lock() |
946 | { |
947 | mysql_mutex_lock(&LOCK_trn_list); |
948 | } |
949 | |
950 | |
951 | /** |
952 | unlock transaction list |
953 | */ |
954 | |
955 | void trnman_unlock() |
956 | { |
957 | mysql_mutex_unlock(&LOCK_trn_list); |
958 | } |
959 | |
960 | |
961 | /** |
962 | Is trman initialized |
963 | */ |
964 | |
965 | my_bool trman_is_inited() |
966 | { |
967 | return (short_trid_to_active_trn != NULL); |
968 | } |
969 | |