1/* Copyright (c) 2000, 2012, Oracle and/or its affiliates.
2 Copyright (c) 2010, 2011 Monty Program Ab
3 Copyright (C) 2013 Sergey Vojtovich and MariaDB Foundation
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; version 2 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
17
18/**
19 @file
20 Table definition cache and table cache implementation.
21
22 Table definition cache actions:
23 - add new TABLE_SHARE object to cache (tdc_acquire_share())
24 - acquire TABLE_SHARE object from cache (tdc_acquire_share())
25 - release TABLE_SHARE object to cache (tdc_release_share())
26 - purge unused TABLE_SHARE objects from cache (tdc_purge())
27 - remove TABLE_SHARE object from cache (tdc_remove_table())
28 - get number of TABLE_SHARE objects in cache (tdc_records())
29
30 Table cache actions:
31 - add new TABLE object to cache (tc_add_table())
32 - acquire TABLE object from cache (tc_acquire_table())
33 - release TABLE object to cache (tc_release_table())
34 - purge unused TABLE objects from cache (tc_purge())
35 - purge unused TABLE objects of a table from cache (tdc_remove_table())
36 - get number of TABLE objects in cache (tc_records())
37
38 Dependencies:
39 - close_cached_tables(): flush tables on shutdown
40 - alloc_table_share()
41 - free_table_share()
42
43 Table cache invariants:
44 - TABLE_SHARE::free_tables shall not contain objects with TABLE::in_use != 0
45 - TABLE_SHARE::free_tables shall not receive new objects if
46 TABLE_SHARE::tdc.flushed is true
47*/
48
49#include "mariadb.h"
50#include "lf.h"
51#include "table.h"
52#include "sql_base.h"
53
54
55/** Configuration. */
56ulong tdc_size; /**< Table definition cache threshold for LRU eviction. */
57ulong tc_size; /**< Table cache threshold for LRU eviction. */
58uint32 tc_instances;
59uint32 tc_active_instances= 1;
60static uint32 tc_contention_warning_reported;
61
62/** Data collections. */
63static LF_HASH tdc_hash; /**< Collection of TABLE_SHARE objects. */
64/** Collection of unused TABLE_SHARE objects. */
65static
66I_P_List <TDC_element,
67 I_P_List_adapter<TDC_element, &TDC_element::next, &TDC_element::prev>,
68 I_P_List_null_counter,
69 I_P_List_fast_push_back<TDC_element> > unused_shares;
70
71static tdc_version_t tdc_version; /* Increments on each reload */
72static bool tdc_inited;
73
74
75/**
76 Protects unused shares list.
77
78 TDC_element::prev
79 TDC_element::next
80 unused_shares
81*/
82
83static mysql_mutex_t LOCK_unused_shares;
84
85#ifdef HAVE_PSI_INTERFACE
86static PSI_mutex_key key_LOCK_unused_shares, key_TABLE_SHARE_LOCK_table_share,
87 key_LOCK_table_cache;
88static PSI_mutex_info all_tc_mutexes[]=
89{
90 { &key_LOCK_unused_shares, "LOCK_unused_shares", PSI_FLAG_GLOBAL },
91 { &key_TABLE_SHARE_LOCK_table_share, "TABLE_SHARE::tdc.LOCK_table_share", 0 },
92 { &key_LOCK_table_cache, "LOCK_table_cache", 0 }
93};
94
95static PSI_cond_key key_TABLE_SHARE_COND_release;
96static PSI_cond_info all_tc_conds[]=
97{
98 { &key_TABLE_SHARE_COND_release, "TABLE_SHARE::tdc.COND_release", 0 }
99};
100#endif
101
102
103static int fix_thd_pins(THD *thd)
104{
105 return thd->tdc_hash_pins ? 0 :
106 (thd->tdc_hash_pins= lf_hash_get_pins(&tdc_hash)) == 0;
107}
108
109
110/*
111 Auxiliary routines for manipulating with per-share all/unused lists
112 and tc_count counter.
113 Responsible for preserving invariants between those lists, counter
114 and TABLE::in_use member.
115 In fact those routines implement sort of implicit table cache as
116 part of table definition cache.
117*/
118
119struct Table_cache_instance
120{
121 /**
122 Protects free_tables (TABLE::global_free_next and TABLE::global_free_prev),
123 records, Share_free_tables::List (TABLE::prev and TABLE::next),
124 TABLE::in_use.
125 */
126 mysql_mutex_t LOCK_table_cache;
127 I_P_List <TABLE, I_P_List_adapter<TABLE, &TABLE::global_free_next,
128 &TABLE::global_free_prev>,
129 I_P_List_null_counter, I_P_List_fast_push_back<TABLE> >
130 free_tables;
131 ulong records;
132 uint mutex_waits;
133 uint mutex_nowaits;
134 /** Avoid false sharing between instances */
135 char pad[CPU_LEVEL1_DCACHE_LINESIZE];
136
137 Table_cache_instance(): records(0), mutex_waits(0), mutex_nowaits(0)
138 {
139 mysql_mutex_init(key_LOCK_table_cache, &LOCK_table_cache,
140 MY_MUTEX_INIT_FAST);
141 }
142
143 ~Table_cache_instance()
144 {
145 mysql_mutex_destroy(&LOCK_table_cache);
146 DBUG_ASSERT(free_tables.is_empty());
147 DBUG_ASSERT(records == 0);
148 }
149
150 /**
151 Lock table cache mutex and check contention.
152
153 Instance is considered contested if more than 20% of mutex acquisiotions
154 can't be served immediately. Up to 100 000 probes may be performed to avoid
155 instance activation on short sporadic peaks. 100 000 is estimated maximum
156 number of queries one instance can serve in one second.
157
158 These numbers work well on a 2 socket / 20 core / 40 threads Intel Broadwell
159 system, that is expected number of instances is activated within reasonable
160 warmup time. It may have to be adjusted for other systems.
161
162 Only TABLE object acquistion is instrumented. We intentionally avoid this
163 overhead on TABLE object release. All other table cache mutex acquistions
164 are considered out of hot path and are not instrumented either.
165 */
166 void lock_and_check_contention(uint32 n_instances, uint32 instance)
167 {
168 if (mysql_mutex_trylock(&LOCK_table_cache))
169 {
170 mysql_mutex_lock(&LOCK_table_cache);
171 if (++mutex_waits == 20000)
172 {
173 if (n_instances < tc_instances)
174 {
175 if (my_atomic_cas32_weak_explicit((int32*) &tc_active_instances,
176 (int32*) &n_instances,
177 (int32) n_instances + 1,
178 MY_MEMORY_ORDER_RELAXED,
179 MY_MEMORY_ORDER_RELAXED))
180 {
181 sql_print_information("Detected table cache mutex contention at instance %d: "
182 "%d%% waits. Additional table cache instance "
183 "activated. Number of instances after "
184 "activation: %d.",
185 instance + 1,
186 mutex_waits * 100 / (mutex_nowaits + mutex_waits),
187 n_instances + 1);
188 }
189 }
190 else if (!my_atomic_fas32_explicit((int32*) &tc_contention_warning_reported,
191 1, MY_MEMORY_ORDER_RELAXED))
192 {
193 sql_print_warning("Detected table cache mutex contention at instance %d: "
194 "%d%% waits. Additional table cache instance "
195 "cannot be activated: consider raising "
196 "table_open_cache_instances. Number of active "
197 "instances: %d.",
198 instance + 1,
199 mutex_waits * 100 / (mutex_nowaits + mutex_waits),
200 n_instances);
201 }
202 mutex_waits= 0;
203 mutex_nowaits= 0;
204 }
205 }
206 else if (++mutex_nowaits == 80000)
207 {
208 mutex_waits= 0;
209 mutex_nowaits= 0;
210 }
211 }
212};
213
214
215static Table_cache_instance *tc;
216
217
218static void intern_close_table(TABLE *table)
219{
220 delete table->triggers;
221 DBUG_ASSERT(table->file);
222 closefrm(table);
223 tdc_release_share(table->s);
224 my_free(table);
225}
226
227
228/**
229 Get number of TABLE objects (used and unused) in table cache.
230*/
231
232uint tc_records(void)
233{
234 ulong total= 0;
235 for (ulong i= 0; i < tc_instances; i++)
236 {
237 mysql_mutex_lock(&tc[i].LOCK_table_cache);
238 total+= tc[i].records;
239 mysql_mutex_unlock(&tc[i].LOCK_table_cache);
240 }
241 return total;
242}
243
244
245/**
246 Remove TABLE object from table cache.
247*/
248
249static void tc_remove_table(TABLE *table)
250{
251 TDC_element *element= table->s->tdc;
252
253 mysql_mutex_lock(&element->LOCK_table_share);
254 /* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */
255 while (element->all_tables_refs)
256 mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
257 element->all_tables.remove(table);
258 mysql_mutex_unlock(&element->LOCK_table_share);
259
260 intern_close_table(table);
261}
262
263
264static void tc_remove_all_unused_tables(TDC_element *element,
265 Share_free_tables::List *purge_tables,
266 bool mark_flushed)
267{
268 TABLE *table;
269
270 /*
271 Mark share flushed in order to ensure that it gets
272 automatically deleted once it is no longer referenced.
273
274 Note that code in TABLE_SHARE::wait_for_old_version() assumes that
275 marking share flushed is followed by purge of unused table
276 shares.
277 */
278 if (mark_flushed)
279 element->flushed= true;
280 for (ulong i= 0; i < tc_instances; i++)
281 {
282 mysql_mutex_lock(&tc[i].LOCK_table_cache);
283 while ((table= element->free_tables[i].list.pop_front()))
284 {
285 tc[i].records--;
286 tc[i].free_tables.remove(table);
287 DBUG_ASSERT(element->all_tables_refs == 0);
288 element->all_tables.remove(table);
289 purge_tables->push_front(table);
290 }
291 mysql_mutex_unlock(&tc[i].LOCK_table_cache);
292 }
293}
294
295
296/**
297 Free all unused TABLE objects.
298
299 While locked:
300 - remove unused objects from TABLE_SHARE::tdc.free_tables and
301 TABLE_SHARE::tdc.all_tables
302 - decrement tc_count
303
304 While unlocked:
305 - free resources related to unused objects
306
307 @note This is called by 'handle_manager' when one wants to
308 periodicly flush all not used tables.
309*/
310
311struct tc_purge_arg
312{
313 Share_free_tables::List purge_tables;
314 bool mark_flushed;
315};
316
317
318static my_bool tc_purge_callback(TDC_element *element, tc_purge_arg *arg)
319{
320 mysql_mutex_lock(&element->LOCK_table_share);
321 tc_remove_all_unused_tables(element, &arg->purge_tables, arg->mark_flushed);
322 mysql_mutex_unlock(&element->LOCK_table_share);
323 return FALSE;
324}
325
326
327void tc_purge(bool mark_flushed)
328{
329 tc_purge_arg argument;
330 TABLE *table;
331
332 argument.mark_flushed= mark_flushed;
333 tdc_iterate(0, (my_hash_walk_action) tc_purge_callback, &argument);
334 while ((table= argument.purge_tables.pop_front()))
335 intern_close_table(table);
336}
337
338
339/**
340 Add new TABLE object to table cache.
341
342 @pre TABLE object is used by caller.
343
344 Added object cannot be evicted or acquired.
345
346 While locked:
347 - add object to TABLE_SHARE::tdc.all_tables
348 - increment tc_count
349 - evict LRU object from table cache if we reached threshold
350
351 While unlocked:
352 - free evicted object
353*/
354
355void tc_add_table(THD *thd, TABLE *table)
356{
357 uint32 i= thd->thread_id % my_atomic_load32_explicit((int32*) &tc_active_instances,
358 MY_MEMORY_ORDER_RELAXED);
359 TABLE *LRU_table= 0;
360 TDC_element *element= table->s->tdc;
361
362 DBUG_ASSERT(table->in_use == thd);
363 table->instance= i;
364 mysql_mutex_lock(&element->LOCK_table_share);
365 /* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */
366 while (element->all_tables_refs)
367 mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
368 element->all_tables.push_front(table);
369 mysql_mutex_unlock(&element->LOCK_table_share);
370
371 mysql_mutex_lock(&tc[i].LOCK_table_cache);
372 if (tc[i].records == tc_size)
373 {
374 if ((LRU_table= tc[i].free_tables.pop_front()))
375 {
376 LRU_table->s->tdc->free_tables[i].list.remove(LRU_table);
377 /* Needed if MDL deadlock detector chimes in before tc_remove_table() */
378 LRU_table->in_use= thd;
379 mysql_mutex_unlock(&tc[i].LOCK_table_cache);
380 /* Keep out of locked LOCK_table_cache */
381 tc_remove_table(LRU_table);
382 }
383 else
384 {
385 tc[i].records++;
386 mysql_mutex_unlock(&tc[i].LOCK_table_cache);
387 }
388 /* Keep out of locked LOCK_table_cache */
389 status_var_increment(thd->status_var.table_open_cache_overflows);
390 }
391 else
392 {
393 tc[i].records++;
394 mysql_mutex_unlock(&tc[i].LOCK_table_cache);
395 }
396}
397
398
399/**
400 Acquire TABLE object from table cache.
401
402 @pre share must be protected against removal.
403
404 Acquired object cannot be evicted or acquired again.
405
406 @return TABLE object, or NULL if no unused objects.
407*/
408
409static TABLE *tc_acquire_table(THD *thd, TDC_element *element)
410{
411 uint32 n_instances=
412 my_atomic_load32_explicit((int32*) &tc_active_instances,
413 MY_MEMORY_ORDER_RELAXED);
414 uint32 i= thd->thread_id % n_instances;
415 TABLE *table;
416
417 tc[i].lock_and_check_contention(n_instances, i);
418 table= element->free_tables[i].list.pop_front();
419 if (table)
420 {
421 DBUG_ASSERT(!table->in_use);
422 table->in_use= thd;
423 /* The ex-unused table must be fully functional. */
424 DBUG_ASSERT(table->db_stat && table->file);
425 /* The children must be detached from the table. */
426 DBUG_ASSERT(!table->file->extra(HA_EXTRA_IS_ATTACHED_CHILDREN));
427 tc[i].free_tables.remove(table);
428 }
429 mysql_mutex_unlock(&tc[i].LOCK_table_cache);
430 return table;
431}
432
433
434/**
435 Release TABLE object to table cache.
436
437 @pre object is used by caller.
438
439 Released object may be evicted or acquired again.
440
441 While locked:
442 - if object is marked for purge, decrement tc_count
443 - add object to TABLE_SHARE::tdc.free_tables
444 - evict LRU object from table cache if we reached threshold
445
446 While unlocked:
447 - mark object not in use by any thread
448 - free evicted/purged object
449
450 @note Another thread may mark share for purge any moment (even
451 after version check). It means to-be-purged object may go to
452 unused lists. This other thread is expected to call tc_purge(),
453 which is synchronized with us on TABLE_SHARE::tdc.LOCK_table_share.
454
455 @return
456 @retval true object purged
457 @retval false object released
458*/
459
460void tc_release_table(TABLE *table)
461{
462 uint32 i= table->instance;
463 DBUG_ENTER("tc_release_table");
464 DBUG_ASSERT(table->in_use);
465 DBUG_ASSERT(table->file);
466
467 mysql_mutex_lock(&tc[i].LOCK_table_cache);
468 if (table->needs_reopen() || table->s->tdc->flushed ||
469 tc[i].records > tc_size)
470 {
471 tc[i].records--;
472 mysql_mutex_unlock(&tc[i].LOCK_table_cache);
473 tc_remove_table(table);
474 }
475 else
476 {
477 table->in_use= 0;
478 table->s->tdc->free_tables[i].list.push_front(table);
479 tc[i].free_tables.push_back(table);
480 mysql_mutex_unlock(&tc[i].LOCK_table_cache);
481 }
482 DBUG_VOID_RETURN;
483}
484
485
486static void tdc_assert_clean_share(TDC_element *element)
487{
488 DBUG_ASSERT(element->share == 0);
489 DBUG_ASSERT(element->ref_count == 0);
490 DBUG_ASSERT(element->m_flush_tickets.is_empty());
491 DBUG_ASSERT(element->all_tables.is_empty());
492#ifndef DBUG_OFF
493 for (ulong i= 0; i < tc_instances; i++)
494 DBUG_ASSERT(element->free_tables[i].list.is_empty());
495#endif
496 DBUG_ASSERT(element->all_tables_refs == 0);
497 DBUG_ASSERT(element->next == 0);
498 DBUG_ASSERT(element->prev == 0);
499}
500
501
502/**
503 Delete share from hash and free share object.
504*/
505
506static void tdc_delete_share_from_hash(TDC_element *element)
507{
508 THD *thd= current_thd;
509 LF_PINS *pins;
510 TABLE_SHARE *share;
511 DBUG_ENTER("tdc_delete_share_from_hash");
512
513 mysql_mutex_assert_owner(&element->LOCK_table_share);
514 share= element->share;
515 DBUG_ASSERT(share);
516 element->share= 0;
517 PSI_CALL_release_table_share(share->m_psi);
518 share->m_psi= 0;
519
520 if (!element->m_flush_tickets.is_empty())
521 {
522 Wait_for_flush_list::Iterator it(element->m_flush_tickets);
523 Wait_for_flush *ticket;
524 while ((ticket= it++))
525 (void) ticket->get_ctx()->m_wait.set_status(MDL_wait::GRANTED);
526
527 do
528 {
529 mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
530 } while (!element->m_flush_tickets.is_empty());
531 }
532
533 mysql_mutex_unlock(&element->LOCK_table_share);
534
535 if (thd)
536 {
537 fix_thd_pins(thd);
538 pins= thd->tdc_hash_pins;
539 }
540 else
541 pins= lf_hash_get_pins(&tdc_hash);
542
543 DBUG_ASSERT(pins); // What can we do about it?
544 tdc_assert_clean_share(element);
545 lf_hash_delete(&tdc_hash, pins, element->m_key, element->m_key_length);
546 if (!thd)
547 lf_hash_put_pins(pins);
548 free_table_share(share);
549 DBUG_VOID_RETURN;
550}
551
552
553/**
554 Prepeare table share for use with table definition cache.
555*/
556
557static void lf_alloc_constructor(uchar *arg)
558{
559 TDC_element *element= (TDC_element*) (arg + LF_HASH_OVERHEAD);
560 DBUG_ENTER("lf_alloc_constructor");
561 mysql_mutex_init(key_TABLE_SHARE_LOCK_table_share,
562 &element->LOCK_table_share, MY_MUTEX_INIT_FAST);
563 mysql_cond_init(key_TABLE_SHARE_COND_release, &element->COND_release, 0);
564 element->m_flush_tickets.empty();
565 element->all_tables.empty();
566 for (ulong i= 0; i < tc_instances; i++)
567 element->free_tables[i].list.empty();
568 element->all_tables_refs= 0;
569 element->share= 0;
570 element->ref_count= 0;
571 element->next= 0;
572 element->prev= 0;
573 DBUG_VOID_RETURN;
574}
575
576
577/**
578 Release table definition cache specific resources of table share.
579*/
580
581static void lf_alloc_destructor(uchar *arg)
582{
583 TDC_element *element= (TDC_element*) (arg + LF_HASH_OVERHEAD);
584 DBUG_ENTER("lf_alloc_destructor");
585 tdc_assert_clean_share(element);
586 mysql_cond_destroy(&element->COND_release);
587 mysql_mutex_destroy(&element->LOCK_table_share);
588 DBUG_VOID_RETURN;
589}
590
591
592static void tdc_hash_initializer(LF_HASH *,
593 TDC_element *element, LEX_STRING *key)
594{
595 memcpy(element->m_key, key->str, key->length);
596 element->m_key_length= (uint)key->length;
597 tdc_assert_clean_share(element);
598}
599
600
601static uchar *tdc_hash_key(const TDC_element *element, size_t *length,
602 my_bool)
603{
604 *length= element->m_key_length;
605 return (uchar*) element->m_key;
606}
607
608
609/**
610 Initialize table definition cache.
611*/
612
613bool tdc_init(void)
614{
615 DBUG_ENTER("tdc_init");
616#ifdef HAVE_PSI_INTERFACE
617 mysql_mutex_register("sql", all_tc_mutexes, array_elements(all_tc_mutexes));
618 mysql_cond_register("sql", all_tc_conds, array_elements(all_tc_conds));
619#endif
620 /* Extra instance is allocated to avoid false sharing */
621 if (!(tc= new Table_cache_instance[tc_instances + 1]))
622 DBUG_RETURN(true);
623 tdc_inited= true;
624 mysql_mutex_init(key_LOCK_unused_shares, &LOCK_unused_shares,
625 MY_MUTEX_INIT_FAST);
626 tdc_version= 1L; /* Increments on each reload */
627 lf_hash_init(&tdc_hash, sizeof(TDC_element) +
628 sizeof(Share_free_tables) * (tc_instances - 1),
629 LF_HASH_UNIQUE, 0, 0,
630 (my_hash_get_key) tdc_hash_key,
631 &my_charset_bin);
632 tdc_hash.alloc.constructor= lf_alloc_constructor;
633 tdc_hash.alloc.destructor= lf_alloc_destructor;
634 tdc_hash.initializer= (lf_hash_initializer) tdc_hash_initializer;
635 DBUG_RETURN(false);
636}
637
638
639/**
640 Notify table definition cache that process of shutting down server
641 has started so it has to keep number of TABLE and TABLE_SHARE objects
642 minimal in order to reduce number of references to pluggable engines.
643*/
644
645void tdc_start_shutdown(void)
646{
647 DBUG_ENTER("table_def_start_shutdown");
648 if (tdc_inited)
649 {
650 /*
651 Ensure that TABLE and TABLE_SHARE objects which are created for
652 tables that are open during process of plugins' shutdown are
653 immediately released. This keeps number of references to engine
654 plugins minimal and allows shutdown to proceed smoothly.
655 */
656 tdc_size= 0;
657 tc_size= 0;
658 /* Free all cached but unused TABLEs and TABLE_SHAREs. */
659 close_cached_tables(NULL, NULL, FALSE, LONG_TIMEOUT);
660 }
661 DBUG_VOID_RETURN;
662}
663
664
665/**
666 Deinitialize table definition cache.
667*/
668
669void tdc_deinit(void)
670{
671 DBUG_ENTER("tdc_deinit");
672 if (tdc_inited)
673 {
674 tdc_inited= false;
675 lf_hash_destroy(&tdc_hash);
676 mysql_mutex_destroy(&LOCK_unused_shares);
677 delete [] tc;
678 }
679 DBUG_VOID_RETURN;
680}
681
682
683/**
684 Get number of cached table definitions.
685
686 @return Number of cached table definitions
687*/
688
689ulong tdc_records(void)
690{
691 return my_atomic_load32_explicit(&tdc_hash.count, MY_MEMORY_ORDER_RELAXED);
692}
693
694
695void tdc_purge(bool all)
696{
697 DBUG_ENTER("tdc_purge");
698 while (all || tdc_records() > tdc_size)
699 {
700 TDC_element *element;
701
702 mysql_mutex_lock(&LOCK_unused_shares);
703 if (!(element= unused_shares.pop_front()))
704 {
705 mysql_mutex_unlock(&LOCK_unused_shares);
706 break;
707 }
708
709 /* Concurrent thread may start using share again, reset prev and next. */
710 element->prev= 0;
711 element->next= 0;
712 mysql_mutex_lock(&element->LOCK_table_share);
713 if (element->ref_count)
714 {
715 mysql_mutex_unlock(&element->LOCK_table_share);
716 mysql_mutex_unlock(&LOCK_unused_shares);
717 continue;
718 }
719 mysql_mutex_unlock(&LOCK_unused_shares);
720
721 tdc_delete_share_from_hash(element);
722 }
723 DBUG_VOID_RETURN;
724}
725
726
727/**
728 Lock table share.
729
730 Find table share with given db.table_name in table definition cache. Return
731 locked table share if found.
732
733 Locked table share means:
734 - table share is protected against removal from table definition cache
735 - no other thread can acquire/release table share
736
737 Caller is expected to unlock table share with tdc_unlock_share().
738
739 @retval 0 Share not found
740 @retval MY_ERRPTR OOM
741 @retval ptr Pointer to locked table share
742*/
743
744TDC_element *tdc_lock_share(THD *thd, const char *db, const char *table_name)
745{
746 TDC_element *element;
747 char key[MAX_DBKEY_LENGTH];
748
749 DBUG_ENTER("tdc_lock_share");
750 if (unlikely(fix_thd_pins(thd)))
751 DBUG_RETURN((TDC_element*) MY_ERRPTR);
752
753 element= (TDC_element *) lf_hash_search(&tdc_hash, thd->tdc_hash_pins,
754 (uchar*) key,
755 tdc_create_key(key, db, table_name));
756 if (element)
757 {
758 mysql_mutex_lock(&element->LOCK_table_share);
759 if (unlikely(!element->share || element->share->error))
760 {
761 mysql_mutex_unlock(&element->LOCK_table_share);
762 element= 0;
763 }
764 lf_hash_search_unpin(thd->tdc_hash_pins);
765 }
766
767 DBUG_RETURN(element);
768}
769
770
771/**
772 Unlock share locked by tdc_lock_share().
773*/
774
775void tdc_unlock_share(TDC_element *element)
776{
777 DBUG_ENTER("tdc_unlock_share");
778 mysql_mutex_unlock(&element->LOCK_table_share);
779 DBUG_VOID_RETURN;
780}
781
782
783/*
784 Get TABLE_SHARE for a table.
785
786 tdc_acquire_share()
787 thd Thread handle
788 tl Table that should be opened
789 flags operation: what to open table or view
790 out_table TABLE for the requested table
791
792 IMPLEMENTATION
793 Get a table definition from the table definition cache.
794 If it doesn't exist, create a new from the table definition file.
795
796 RETURN
797 0 Error
798 # Share for table
799*/
800
801TABLE_SHARE *tdc_acquire_share(THD *thd, TABLE_LIST *tl, uint flags,
802 TABLE **out_table)
803{
804 TABLE_SHARE *share;
805 TDC_element *element;
806 const char *key;
807 uint key_length= get_table_def_key(tl, &key);
808 my_hash_value_type hash_value= tl->mdl_request.key.tc_hash_value();
809 bool was_unused;
810 DBUG_ENTER("tdc_acquire_share");
811
812 if (fix_thd_pins(thd))
813 DBUG_RETURN(0);
814
815retry:
816 while (!(element= (TDC_element*) lf_hash_search_using_hash_value(&tdc_hash,
817 thd->tdc_hash_pins, hash_value, (uchar*) key, key_length)))
818 {
819 LEX_STRING tmp= { const_cast<char*>(key), key_length };
820 int res= lf_hash_insert(&tdc_hash, thd->tdc_hash_pins, (uchar*) &tmp);
821
822 if (res == -1)
823 DBUG_RETURN(0);
824 else if (res == 1)
825 continue;
826
827 element= (TDC_element*) lf_hash_search_using_hash_value(&tdc_hash,
828 thd->tdc_hash_pins, hash_value, (uchar*) key, key_length);
829 lf_hash_search_unpin(thd->tdc_hash_pins);
830 DBUG_ASSERT(element);
831
832 if (!(share= alloc_table_share(tl->db.str, tl->table_name.str, key, key_length)))
833 {
834 lf_hash_delete(&tdc_hash, thd->tdc_hash_pins, key, key_length);
835 DBUG_RETURN(0);
836 }
837
838 /* note that tdc_acquire_share() *always* uses discovery */
839 open_table_def(thd, share, flags | GTS_USE_DISCOVERY);
840
841 if (checked_unlikely(share->error))
842 {
843 free_table_share(share);
844 lf_hash_delete(&tdc_hash, thd->tdc_hash_pins, key, key_length);
845 DBUG_RETURN(0);
846 }
847
848 mysql_mutex_lock(&element->LOCK_table_share);
849 element->share= share;
850 share->tdc= element;
851 element->ref_count++;
852 element->version= tdc_refresh_version();
853 element->flushed= false;
854 mysql_mutex_unlock(&element->LOCK_table_share);
855
856 tdc_purge(false);
857 if (out_table)
858 {
859 status_var_increment(thd->status_var.table_open_cache_misses);
860 *out_table= 0;
861 }
862 share->m_psi= PSI_CALL_get_table_share(false, share);
863 goto end;
864 }
865
866 /* cannot force discovery of a cached share */
867 DBUG_ASSERT(!(flags & GTS_FORCE_DISCOVERY));
868
869 if (out_table && (flags & GTS_TABLE))
870 {
871 if ((*out_table= tc_acquire_table(thd, element)))
872 {
873 lf_hash_search_unpin(thd->tdc_hash_pins);
874 DBUG_ASSERT(!(flags & GTS_NOLOCK));
875 DBUG_ASSERT(element->share);
876 DBUG_ASSERT(!element->share->error);
877 DBUG_ASSERT(!element->share->is_view);
878 status_var_increment(thd->status_var.table_open_cache_hits);
879 DBUG_RETURN(element->share);
880 }
881 status_var_increment(thd->status_var.table_open_cache_misses);
882 }
883
884 mysql_mutex_lock(&element->LOCK_table_share);
885 if (!(share= element->share))
886 {
887 mysql_mutex_unlock(&element->LOCK_table_share);
888 lf_hash_search_unpin(thd->tdc_hash_pins);
889 goto retry;
890 }
891 lf_hash_search_unpin(thd->tdc_hash_pins);
892
893 /*
894 We found an existing table definition. Return it if we didn't get
895 an error when reading the table definition from file.
896 */
897 if (unlikely(share->error))
898 {
899 open_table_error(share, share->error, share->open_errno);
900 goto err;
901 }
902
903 if (share->is_view && !(flags & GTS_VIEW))
904 {
905 open_table_error(share, OPEN_FRM_NOT_A_TABLE, ENOENT);
906 goto err;
907 }
908 if (!share->is_view && !(flags & GTS_TABLE))
909 {
910 open_table_error(share, OPEN_FRM_NOT_A_VIEW, ENOENT);
911 goto err;
912 }
913
914 was_unused= !element->ref_count;
915 element->ref_count++;
916 mysql_mutex_unlock(&element->LOCK_table_share);
917 if (was_unused)
918 {
919 mysql_mutex_lock(&LOCK_unused_shares);
920 if (element->prev)
921 {
922 /*
923 Share was not used before and it was in the old_unused_share list
924 Unlink share from this list
925 */
926 DBUG_PRINT("info", ("Unlinking from not used list"));
927 unused_shares.remove(element);
928 element->next= 0;
929 element->prev= 0;
930 }
931 mysql_mutex_unlock(&LOCK_unused_shares);
932 }
933
934end:
935 DBUG_PRINT("exit", ("share: %p ref_count: %u",
936 share, share->tdc->ref_count));
937 if (flags & GTS_NOLOCK)
938 {
939 tdc_release_share(share);
940 /*
941 if GTS_NOLOCK is requested, the returned share pointer cannot be used,
942 the share it points to may go away any moment.
943 But perhaps the caller is only interested to know whether a share or
944 table existed?
945 Let's return an invalid pointer here to catch dereferencing attempts.
946 */
947 share= (TABLE_SHARE*) 1;
948 }
949 DBUG_RETURN(share);
950
951err:
952 mysql_mutex_unlock(&element->LOCK_table_share);
953 DBUG_RETURN(0);
954}
955
956
957/**
958 Release table share acquired by tdc_acquire_share().
959*/
960
961void tdc_release_share(TABLE_SHARE *share)
962{
963 DBUG_ENTER("tdc_release_share");
964
965 mysql_mutex_lock(&share->tdc->LOCK_table_share);
966 DBUG_PRINT("enter",
967 ("share: %p table: %s.%s ref_count: %u version: %lld",
968 share, share->db.str, share->table_name.str,
969 share->tdc->ref_count, share->tdc->version));
970 DBUG_ASSERT(share->tdc->ref_count);
971
972 if (share->tdc->ref_count > 1)
973 {
974 share->tdc->ref_count--;
975 if (!share->is_view)
976 mysql_cond_broadcast(&share->tdc->COND_release);
977 mysql_mutex_unlock(&share->tdc->LOCK_table_share);
978 DBUG_VOID_RETURN;
979 }
980 mysql_mutex_unlock(&share->tdc->LOCK_table_share);
981
982 mysql_mutex_lock(&LOCK_unused_shares);
983 mysql_mutex_lock(&share->tdc->LOCK_table_share);
984 if (--share->tdc->ref_count)
985 {
986 if (!share->is_view)
987 mysql_cond_broadcast(&share->tdc->COND_release);
988 mysql_mutex_unlock(&share->tdc->LOCK_table_share);
989 mysql_mutex_unlock(&LOCK_unused_shares);
990 DBUG_VOID_RETURN;
991 }
992 if (share->tdc->flushed || tdc_records() > tdc_size)
993 {
994 mysql_mutex_unlock(&LOCK_unused_shares);
995 tdc_delete_share_from_hash(share->tdc);
996 DBUG_VOID_RETURN;
997 }
998 /* Link share last in used_table_share list */
999 DBUG_PRINT("info", ("moving share to unused list"));
1000 DBUG_ASSERT(share->tdc->next == 0);
1001 unused_shares.push_back(share->tdc);
1002 mysql_mutex_unlock(&share->tdc->LOCK_table_share);
1003 mysql_mutex_unlock(&LOCK_unused_shares);
1004 DBUG_VOID_RETURN;
1005}
1006
1007
1008/**
1009 Auxiliary function which allows to kill delayed threads for
1010 particular table identified by its share.
1011
1012 @param share Table share.
1013
1014 @pre Caller should have TABLE_SHARE::tdc.LOCK_table_share mutex.
1015*/
1016
1017static void kill_delayed_threads_for_table(TDC_element *element)
1018{
1019 All_share_tables_list::Iterator it(element->all_tables);
1020 TABLE *tab;
1021
1022 mysql_mutex_assert_owner(&element->LOCK_table_share);
1023
1024 if (!delayed_insert_threads)
1025 return;
1026
1027 while ((tab= it++))
1028 {
1029 THD *in_use= tab->in_use;
1030
1031 DBUG_ASSERT(in_use && tab->s->tdc->flushed);
1032 if ((in_use->system_thread & SYSTEM_THREAD_DELAYED_INSERT) &&
1033 ! in_use->killed)
1034 {
1035 in_use->killed= KILL_SYSTEM_THREAD;
1036 mysql_mutex_lock(&in_use->mysys_var->mutex);
1037 if (in_use->mysys_var->current_cond)
1038 {
1039 mysql_mutex_lock(in_use->mysys_var->current_mutex);
1040 mysql_cond_broadcast(in_use->mysys_var->current_cond);
1041 mysql_mutex_unlock(in_use->mysys_var->current_mutex);
1042 }
1043 mysql_mutex_unlock(&in_use->mysys_var->mutex);
1044 }
1045 }
1046}
1047
1048
1049/**
1050 Remove all or some (depending on parameter) instances of TABLE and
1051 TABLE_SHARE from the table definition cache.
1052
1053 @param thd Thread context
1054 @param remove_type Type of removal:
1055 TDC_RT_REMOVE_ALL - remove all TABLE instances and
1056 TABLE_SHARE instance. There
1057 should be no used TABLE objects
1058 and caller should have exclusive
1059 metadata lock on the table.
1060 TDC_RT_REMOVE_NOT_OWN - remove all TABLE instances
1061 except those that belong to
1062 this thread. There should be
1063 no TABLE objects used by other
1064 threads and caller should have
1065 exclusive metadata lock on the
1066 table.
1067 TDC_RT_REMOVE_UNUSED - remove all unused TABLE
1068 instances (if there are no
1069 used instances will also
1070 remove TABLE_SHARE).
1071 TDC_RT_REMOVE_NOT_OWN_KEEP_SHARE -
1072 remove all TABLE instances
1073 except those that belong to
1074 this thread, but don't mark
1075 TABLE_SHARE as old. There
1076 should be no TABLE objects
1077 used by other threads and
1078 caller should have exclusive
1079 metadata lock on the table.
1080 @param db Name of database
1081 @param table_name Name of table
1082 @param kill_delayed_threads If TRUE, kill INSERT DELAYED threads
1083
1084 @note It assumes that table instances are already not used by any
1085 (other) thread (this should be achieved by using meta-data locks).
1086*/
1087
1088bool tdc_remove_table(THD *thd, enum_tdc_remove_table_type remove_type,
1089 const char *db, const char *table_name,
1090 bool kill_delayed_threads)
1091{
1092 Share_free_tables::List purge_tables;
1093 TABLE *table;
1094 TDC_element *element;
1095 uint my_refs= 1;
1096 DBUG_ENTER("tdc_remove_table");
1097 DBUG_PRINT("enter",("name: %s remove_type: %d", table_name, remove_type));
1098
1099 DBUG_ASSERT(remove_type == TDC_RT_REMOVE_UNUSED ||
1100 thd->mdl_context.is_lock_owner(MDL_key::TABLE, db, table_name,
1101 MDL_EXCLUSIVE));
1102
1103
1104 mysql_mutex_lock(&LOCK_unused_shares);
1105 if (!(element= tdc_lock_share(thd, db, table_name)))
1106 {
1107 mysql_mutex_unlock(&LOCK_unused_shares);
1108 DBUG_ASSERT(remove_type != TDC_RT_REMOVE_NOT_OWN_KEEP_SHARE);
1109 DBUG_RETURN(false);
1110 }
1111
1112 DBUG_ASSERT(element != MY_ERRPTR); // What can we do about it?
1113
1114 if (!element->ref_count)
1115 {
1116 if (element->prev)
1117 {
1118 unused_shares.remove(element);
1119 element->prev= 0;
1120 element->next= 0;
1121 }
1122 mysql_mutex_unlock(&LOCK_unused_shares);
1123
1124 tdc_delete_share_from_hash(element);
1125 DBUG_RETURN(true);
1126 }
1127 mysql_mutex_unlock(&LOCK_unused_shares);
1128
1129 element->ref_count++;
1130
1131 tc_remove_all_unused_tables(element, &purge_tables,
1132 remove_type != TDC_RT_REMOVE_NOT_OWN_KEEP_SHARE);
1133
1134 if (kill_delayed_threads)
1135 kill_delayed_threads_for_table(element);
1136
1137 if (remove_type == TDC_RT_REMOVE_NOT_OWN ||
1138 remove_type == TDC_RT_REMOVE_NOT_OWN_KEEP_SHARE)
1139 {
1140 All_share_tables_list::Iterator it(element->all_tables);
1141 while ((table= it++))
1142 {
1143 my_refs++;
1144 DBUG_ASSERT(table->in_use == thd);
1145 }
1146 }
1147 DBUG_ASSERT(element->all_tables.is_empty() || remove_type != TDC_RT_REMOVE_ALL);
1148 mysql_mutex_unlock(&element->LOCK_table_share);
1149
1150 while ((table= purge_tables.pop_front()))
1151 intern_close_table(table);
1152
1153 if (remove_type != TDC_RT_REMOVE_UNUSED)
1154 {
1155 /*
1156 Even though current thread holds exclusive metadata lock on this share
1157 (asserted above), concurrent FLUSH TABLES threads may be in process of
1158 closing unused table instances belonging to this share. E.g.:
1159 thr1 (FLUSH TABLES): table= share->tdc.free_tables.pop_front();
1160 thr1 (FLUSH TABLES): share->tdc.all_tables.remove(table);
1161 thr2 (ALTER TABLE): tdc_remove_table();
1162 thr1 (FLUSH TABLES): intern_close_table(table);
1163
1164 Current remove type assumes that all table instances (except for those
1165 that are owned by current thread) must be closed before
1166 thd_remove_table() returns. Wait for such tables now.
1167
1168 intern_close_table() decrements ref_count and signals COND_release. When
1169 ref_count drops down to number of references owned by current thread
1170 waiting is completed.
1171
1172 Unfortunately TABLE_SHARE::wait_for_old_version() cannot be used here
1173 because it waits for all table instances, whereas we have to wait only
1174 for those that are not owned by current thread.
1175 */
1176 mysql_mutex_lock(&element->LOCK_table_share);
1177 while (element->ref_count > my_refs)
1178 mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
1179 mysql_mutex_unlock(&element->LOCK_table_share);
1180 }
1181
1182 tdc_release_share(element->share);
1183
1184 DBUG_RETURN(true);
1185}
1186
1187
1188/**
1189 Check if table's share is being removed from the table definition
1190 cache and, if yes, wait until the flush is complete.
1191
1192 @param thd Thread context.
1193 @param table_list Table which share should be checked.
1194 @param timeout Timeout for waiting.
1195 @param deadlock_weight Weight of this wait for deadlock detector.
1196
1197 @retval 0 Success. Share is up to date or has been flushed.
1198 @retval 1 Error (OOM, was killed, the wait resulted
1199 in a deadlock or timeout). Reported.
1200*/
1201
1202int tdc_wait_for_old_version(THD *thd, const char *db, const char *table_name,
1203 ulong wait_timeout, uint deadlock_weight, tdc_version_t refresh_version)
1204{
1205 TDC_element *element;
1206
1207 if (!(element= tdc_lock_share(thd, db, table_name)))
1208 return FALSE;
1209 else if (element == MY_ERRPTR)
1210 return TRUE;
1211 else if (element->flushed && refresh_version > element->version)
1212 {
1213 struct timespec abstime;
1214 set_timespec(abstime, wait_timeout);
1215 return element->share->wait_for_old_version(thd, &abstime, deadlock_weight);
1216 }
1217 tdc_unlock_share(element);
1218 return FALSE;
1219}
1220
1221
1222tdc_version_t tdc_refresh_version(void)
1223{
1224 return (tdc_version_t)my_atomic_load64_explicit(&tdc_version, MY_MEMORY_ORDER_RELAXED);
1225}
1226
1227
1228tdc_version_t tdc_increment_refresh_version(void)
1229{
1230 tdc_version_t v= (tdc_version_t)my_atomic_add64_explicit(&tdc_version, 1, MY_MEMORY_ORDER_RELAXED);
1231 DBUG_PRINT("tcache", ("incremented global refresh_version to: %lld", v));
1232 return v + 1;
1233}
1234
1235
1236/**
1237 Iterate table definition cache.
1238
1239 Object is protected against removal from table definition cache.
1240
1241 @note Returned TABLE_SHARE is not guaranteed to be fully initialized:
1242 tdc_acquire_share() added new share, but didn't open it yet. If caller
1243 needs fully initializer share, it must lock table share mutex.
1244*/
1245
1246struct eliminate_duplicates_arg
1247{
1248 HASH hash;
1249 MEM_ROOT root;
1250 my_hash_walk_action action;
1251 void *argument;
1252};
1253
1254
1255static uchar *eliminate_duplicates_get_key(const uchar *element, size_t *length,
1256 my_bool not_used __attribute__((unused)))
1257{
1258 LEX_STRING *key= (LEX_STRING *) element;
1259 *length= key->length;
1260 return (uchar *) key->str;
1261}
1262
1263
1264static my_bool eliminate_duplicates(TDC_element *element,
1265 eliminate_duplicates_arg *arg)
1266{
1267 LEX_STRING *key= (LEX_STRING *) alloc_root(&arg->root, sizeof(LEX_STRING));
1268
1269 if (!key || !(key->str= (char*) memdup_root(&arg->root, element->m_key,
1270 element->m_key_length)))
1271 return TRUE;
1272
1273 key->length= element->m_key_length;
1274
1275 if (my_hash_insert(&arg->hash, (uchar *) key))
1276 return FALSE;
1277
1278 return arg->action(element, arg->argument);
1279}
1280
1281
1282int tdc_iterate(THD *thd, my_hash_walk_action action, void *argument,
1283 bool no_dups)
1284{
1285 eliminate_duplicates_arg no_dups_argument;
1286 LF_PINS *pins;
1287 myf alloc_flags= 0;
1288 uint hash_flags= HASH_UNIQUE;
1289 int res;
1290
1291 if (thd)
1292 {
1293 fix_thd_pins(thd);
1294 pins= thd->tdc_hash_pins;
1295 alloc_flags= MY_THREAD_SPECIFIC;
1296 hash_flags|= HASH_THREAD_SPECIFIC;
1297 }
1298 else
1299 pins= lf_hash_get_pins(&tdc_hash);
1300
1301 if (!pins)
1302 return ER_OUTOFMEMORY;
1303
1304 if (no_dups)
1305 {
1306 init_alloc_root(&no_dups_argument.root, "no_dups", 4096, 4096,
1307 MYF(alloc_flags));
1308 my_hash_init(&no_dups_argument.hash, &my_charset_bin, tdc_records(), 0, 0,
1309 eliminate_duplicates_get_key, 0, hash_flags);
1310 no_dups_argument.action= action;
1311 no_dups_argument.argument= argument;
1312 action= (my_hash_walk_action) eliminate_duplicates;
1313 argument= &no_dups_argument;
1314 }
1315
1316 res= lf_hash_iterate(&tdc_hash, pins, action, argument);
1317
1318 if (!thd)
1319 lf_hash_put_pins(pins);
1320
1321 if (no_dups)
1322 {
1323 my_hash_free(&no_dups_argument.hash);
1324 free_root(&no_dups_argument.root, MYF(0));
1325 }
1326 return res;
1327}
1328