1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
3 | #ident "$Id$" |
4 | /*====== |
5 | This file is part of PerconaFT. |
6 | |
7 | |
8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
9 | |
10 | PerconaFT is free software: you can redistribute it and/or modify |
11 | it under the terms of the GNU General Public License, version 2, |
12 | as published by the Free Software Foundation. |
13 | |
14 | PerconaFT is distributed in the hope that it will be useful, |
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | GNU General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU General Public License |
20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
21 | |
22 | ---------------------------------------- |
23 | |
24 | PerconaFT is free software: you can redistribute it and/or modify |
25 | it under the terms of the GNU Affero General Public License, version 3, |
26 | as published by the Free Software Foundation. |
27 | |
28 | PerconaFT is distributed in the hope that it will be useful, |
29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
31 | GNU Affero General Public License for more details. |
32 | |
33 | You should have received a copy of the GNU Affero General Public License |
34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
35 | ======= */ |
36 | |
37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
38 | |
39 | #pragma once |
40 | |
41 | #include "cachetable/background_job_manager.h" |
42 | #include <portability/toku_random.h> |
43 | #include <util/frwlock.h> |
44 | #include <util/kibbutz.h> |
45 | #include <util/nb_mutex.h> |
46 | #include <util/partitioned_counter.h> |
47 | |
48 | ////////////////////////////////////////////////////////////////////////////// |
49 | // |
50 | // This file contains the classes and structs that make up the cachetable. |
51 | // The structs are: |
52 | // - cachefile |
53 | // - ctpair |
54 | // - pair_list |
55 | // - cachefile_list |
56 | // - checkpointer |
57 | // - evictor |
58 | // - cleaner |
59 | // |
60 | // The rest of this comment assumes familiarity with the locks used in these |
61 | // classes/structs and what the locks protect. Nevertheless, here is |
62 | // a list of the locks that we have: |
63 | // - pair_list->list_lock |
64 | // - pair_list->pending_lock_expensive |
65 | // - pair_list->pending_lock_cheap |
66 | // - cachefile_list->lock |
67 | // - PAIR->mutex |
68 | // - PAIR->value_rwlock |
69 | // - PAIR->disk_nb_mutex |
70 | // |
71 | // Here are rules for how the locks interact: |
72 | // - To grab any of the pair_list's locks, or the cachefile_list's lock, |
73 | // the cachetable must be in existence |
74 | // - To grab the PAIR mutex, we must know the PAIR will not dissappear: |
75 | // - the PAIR must be pinned (value_rwlock or disk_nb_mutex is held) |
76 | // - OR, the pair_list's list lock is held |
77 | // - As a result, to get rid of a PAIR from the pair_list, we must hold |
78 | // both the pair_list's list_lock and the PAIR's mutex |
79 | // - To grab PAIR->value_rwlock, we must hold the PAIR's mutex |
80 | // - To grab PAIR->disk_nb_mutex, we must hold the PAIR's mutex |
81 | // and hold PAIR->value_rwlock |
82 | // |
83 | // Now let's talk about ordering. Here is an order from outer to inner (top locks must be grabbed first) |
84 | // - pair_list->pending_lock_expensive |
85 | // - pair_list->list_lock |
86 | // - cachefile_list->lock |
87 | // - PAIR->mutex |
88 | // - pair_list->pending_lock_cheap <-- after grabbing this lock, |
89 | // NO other locks |
90 | // should be grabbed. |
91 | // - when grabbing PAIR->value_rwlock or PAIR->disk_nb_mutex, |
92 | // if the acquisition will not block, then it does not matter if any other locks held, |
93 | // BUT if the acquisition will block, then NO other locks may be held besides |
94 | // PAIR->mutex. |
95 | // |
96 | // HERE ARE TWO EXAMPLES: |
97 | // To pin a PAIR on a client thread, the following must be done: |
98 | // - first grab the list lock and find the PAIR |
99 | // - with the list lock grabbed, grab PAIR->mutex |
100 | // - with PAIR->mutex held: |
101 | // - release list lock |
102 | // - pin PAIR |
103 | // - with PAIR pinned, grab pending_lock_cheap, |
104 | // - copy and clear PAIR->checkpoint_pending, |
105 | // - resolve checkpointing if necessary |
106 | // - return to user. |
107 | // The list lock may be held while pinning the PAIR if |
108 | // the PAIR has no contention. Otherwise, we may have |
109 | // get a deadlock with another thread that has the PAIR pinned, |
110 | // tries to pin some other PAIR, and in doing so, grabs the list lock. |
111 | // |
112 | // To unpin a PAIR on a client thread: |
113 | // - because the PAIR is pinned, we don't need the pair_list's list_lock |
114 | // - so, simply acquire PAIR->mutex |
115 | // - unpin the PAIR |
116 | // - return |
117 | // |
118 | ////////////////////////////////////////////////////////////////////////////// |
119 | class evictor; |
120 | class pair_list; |
121 | |
122 | /////////////////////////////////////////////////////////////////////////////// |
123 | // |
124 | // Maps to a file on disk. |
125 | // |
126 | struct cachefile { |
127 | // these next two fields are protected by cachetable's list lock |
128 | // they are managed whenever we add or remove a pair from |
129 | // the cachetable. As of Riddler, this linked list is only used to |
130 | // make cachetable_flush_cachefile more efficient |
131 | PAIR cf_head; // doubly linked list that is NOT circular |
132 | uint32_t num_pairs; // count on number of pairs in the cachetable belong to this cachefile |
133 | |
134 | bool for_checkpoint; //True if part of the in-progress checkpoint |
135 | |
136 | // If set and the cachefile closes, the file will be removed. |
137 | // Clients must not operate on the cachefile after setting this, |
138 | // nor attempt to open any cachefile with the same fname (dname) |
139 | // until this cachefile has been fully closed and unlinked. |
140 | bool unlink_on_close; |
141 | // If set then fclose will not be logged in recovery log. |
142 | bool skip_log_recover_on_close; |
143 | int fd; /* Bug: If a file is opened read-only, then it is stuck in read-only. If it is opened read-write, then subsequent writers can write to it too. */ |
144 | CACHETABLE cachetable; |
145 | struct fileid fileid; |
146 | // the filenum is used as an identifer of the cachefile |
147 | // for logging and recovery |
148 | FILENUM filenum; |
149 | // number used to generate hashes for blocks in the cachefile |
150 | // used in toku_cachetable_hash |
151 | // this used to be the filenum.fileid, but now it is separate |
152 | uint32_t hash_id; |
153 | char *fname_in_env; /* Used for logging */ |
154 | |
155 | void *userdata; |
156 | void (*log_fassociate_during_checkpoint)(CACHEFILE cf, void *userdata); // When starting a checkpoint we must log all open files. |
157 | void (*close_userdata)(CACHEFILE cf, int fd, void *userdata, bool lsnvalid, LSN); // when closing the last reference to a cachefile, first call this function. |
158 | void (*free_userdata)(CACHEFILE cf, void *userdata); // when closing the last reference to a cachefile, first call this function. |
159 | void (*begin_checkpoint_userdata)(LSN lsn_of_checkpoint, void *userdata); // before checkpointing cachefiles call this function. |
160 | void (*checkpoint_userdata)(CACHEFILE cf, int fd, void *userdata); // when checkpointing a cachefile, call this function. |
161 | void (*end_checkpoint_userdata)(CACHEFILE cf, int fd, void *userdata); // after checkpointing cachefiles call this function. |
162 | void (*note_pin_by_checkpoint)(CACHEFILE cf, void *userdata); // add a reference to the userdata to prevent it from being removed from memory |
163 | void (*note_unpin_by_checkpoint)(CACHEFILE cf, void *userdata); // add a reference to the userdata to prevent it from being removed from memory |
164 | BACKGROUND_JOB_MANAGER bjm; |
165 | }; |
166 | |
167 | |
168 | /////////////////////////////////////////////////////////////////////////////// |
169 | // |
170 | // The pair represents the data stored in the cachetable. |
171 | // |
172 | struct ctpair { |
173 | // these fields are essentially constants. They do not change. |
174 | CACHEFILE cachefile; |
175 | CACHEKEY key; |
176 | uint32_t fullhash; |
177 | CACHETABLE_FLUSH_CALLBACK flush_callback; |
178 | CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK pe_est_callback; |
179 | CACHETABLE_PARTIAL_EVICTION_CALLBACK pe_callback; |
180 | CACHETABLE_CLEANER_CALLBACK cleaner_callback; |
181 | CACHETABLE_CLONE_CALLBACK clone_callback; |
182 | CACHETABLE_CHECKPOINT_COMPLETE_CALLBACK checkpoint_complete_callback; |
183 | void *; |
184 | |
185 | // access to these fields are protected by disk_nb_mutex |
186 | void* cloned_value_data; // cloned copy of value_data used for checkpointing |
187 | long cloned_value_size; // size of cloned_value_data, used for accounting of size_current |
188 | void* disk_data; // data used to fetch/flush value_data to and from disk. |
189 | |
190 | // access to these fields are protected by value_rwlock |
191 | void* value_data; // data used by client threads, FTNODEs and ROLLBACK_LOG_NODEs |
192 | PAIR_ATTR attr; |
193 | enum cachetable_dirty dirty; |
194 | |
195 | // protected by PAIR->mutex |
196 | uint32_t count; // clock count |
197 | uint32_t refcount; // if > 0, then this PAIR is referenced by |
198 | // callers to the cachetable, and therefore cannot |
199 | // be evicted |
200 | uint32_t num_waiting_on_refs; // number of threads waiting on refcount to go to zero |
201 | toku_cond_t refcount_wait; // cond used to wait for refcount to go to zero |
202 | |
203 | // locks |
204 | toku::frwlock value_rwlock; |
205 | struct nb_mutex disk_nb_mutex; // single writer, protects disk_data, is used for writing cloned nodes for checkpoint |
206 | toku_mutex_t* mutex; // gotten from the pair list |
207 | |
208 | // Access to checkpoint_pending is protected by two mechanisms, |
209 | // the value_rwlock and the pair_list's pending locks (expensive and cheap). |
210 | // checkpoint_pending may be true of false. |
211 | // Here are the rules for reading/modifying this bit. |
212 | // - To transition this field from false to true during begin_checkpoint, |
213 | // we must be holding both of the pair_list's pending locks. |
214 | // - To transition this field from true to false during end_checkpoint, |
215 | // we must be holding the value_rwlock. |
216 | // - For a non-checkpoint thread to read the value, we must hold both the |
217 | // value_rwlock and one of the pair_list's pending locks |
218 | // - For the checkpoint thread to read the value, we must |
219 | // hold the value_rwlock |
220 | // |
221 | bool checkpoint_pending; // If this is on, then we have got to resolve checkpointing modifying it. |
222 | |
223 | // these are variables that are only used to transfer information to background threads |
224 | // we cache them here to avoid a malloc. In the future, we should investigate if this |
225 | // is necessary, as having these fields here is not technically necessary |
226 | long size_evicting_estimate; |
227 | evictor* ev; |
228 | pair_list* list; |
229 | |
230 | // A PAIR is stored in a pair_list (which happens to be PAIR->list). |
231 | // These variables are protected by the list lock in the pair_list |
232 | // |
233 | // clock_next,clock_prev represent a circular doubly-linked list. |
234 | PAIR clock_next,clock_prev; // In clock. |
235 | PAIR hash_chain; |
236 | |
237 | // pending_next,pending_next represent a non-circular doubly-linked list. |
238 | PAIR pending_next; |
239 | PAIR pending_prev; |
240 | |
241 | // cf_next, cf_prev represent a non-circular doubly-linked list. |
242 | // entries in linked list for PAIRs in a cachefile, these are protected |
243 | // by the list lock of the PAIR's pair_list. They are used to make |
244 | // cachetable_flush_cachefile cheaper so that we don't need |
245 | // to search the entire cachetable to find a particular cachefile's |
246 | // PAIRs |
247 | PAIR cf_next; |
248 | PAIR cf_prev; |
249 | }; |
250 | |
251 | // |
252 | // This initializes the fields and members of the pair. |
253 | // |
254 | void pair_init(PAIR p, |
255 | CACHEFILE cachefile, |
256 | CACHEKEY key, |
257 | void *value, |
258 | PAIR_ATTR attr, |
259 | enum cachetable_dirty dirty, |
260 | uint32_t fullhash, |
261 | CACHETABLE_WRITE_CALLBACK write_callback, |
262 | evictor *ev, |
263 | pair_list *list); |
264 | |
265 | |
266 | /////////////////////////////////////////////////////////////////////////////// |
267 | // |
268 | // The pair list maintains the set of PAIR's that make up |
269 | // the cachetable. |
270 | // |
271 | class pair_list { |
272 | public: |
273 | // |
274 | // the following fields are protected by the list lock |
275 | // |
276 | uint32_t m_n_in_table; // number of pairs in the hash table |
277 | uint32_t m_table_size; // number of buckets in the hash table |
278 | uint32_t m_num_locks; |
279 | PAIR *m_table; // hash table |
280 | toku_mutex_aligned_t *m_mutexes; |
281 | // |
282 | // The following fields are the heads of various linked lists. |
283 | // They also protected by the list lock, but their |
284 | // usage is not as straightforward. For each of them, |
285 | // only ONE thread is allowed iterate over them with |
286 | // a read lock on the list lock. All other threads |
287 | // that want to modify elements in the lists or iterate over |
288 | // the lists must hold the write list lock. Here is the |
289 | // association between what threads may hold a read lock |
290 | // on the list lock while iterating: |
291 | // - clock_head -> eviction thread (evictor) |
292 | // - cleaner_head -> cleaner thread (cleaner) |
293 | // - pending_head -> checkpoint thread (checkpointer) |
294 | // |
295 | PAIR m_clock_head; // of clock . head is the next thing to be up for decrement. |
296 | PAIR m_cleaner_head; // for cleaner thread. head is the next thing to look at for possible cleaning. |
297 | PAIR m_checkpoint_head; // for begin checkpoint to iterate over PAIRs and mark as pending_checkpoint |
298 | PAIR m_pending_head; // list of pairs marked with checkpoint_pending |
299 | |
300 | // this field is public so we are still POD |
301 | |
302 | // usage of this lock is described above |
303 | toku_pthread_rwlock_t m_list_lock; |
304 | // |
305 | // these locks are the "pending locks" referenced |
306 | // in comments about PAIR->checkpoint_pending. There |
307 | // are two of them, but both serve the same purpose, which |
308 | // is to protect the transition of a PAIR's checkpoint pending |
309 | // value from false to true during begin_checkpoint. |
310 | // We use two locks, because threads that want to read the |
311 | // checkpoint_pending value may hold a lock for varying periods of time. |
312 | // Threads running eviction may need to protect checkpoint_pending |
313 | // while writing a node to disk, which is an expensive operation, |
314 | // so it uses pending_lock_expensive. Client threads that |
315 | // want to pin PAIRs will want to protect checkpoint_pending |
316 | // just long enough to read the value and wipe it out. This is |
317 | // a cheap operation, and as a result, uses pending_lock_cheap. |
318 | // |
319 | // By having two locks, and making begin_checkpoint first |
320 | // grab pending_lock_expensive and then pending_lock_cheap, |
321 | // we ensure that threads that want to pin nodes can grab |
322 | // only pending_lock_cheap, and never block behind threads |
323 | // holding pending_lock_expensive and writing a node out to disk |
324 | // |
325 | toku_pthread_rwlock_t m_pending_lock_expensive; |
326 | toku_pthread_rwlock_t m_pending_lock_cheap; |
327 | void init(); |
328 | void destroy(); |
329 | void evict_completely(PAIR pair); |
330 | void evict_from_cachetable(PAIR pair); |
331 | void evict_from_cachefile(PAIR pair); |
332 | void add_to_cachetable_only(PAIR p); |
333 | void put(PAIR pair); |
334 | PAIR find_pair(CACHEFILE file, CACHEKEY key, uint32_t hash); |
335 | void pending_pairs_remove (PAIR p); |
336 | void verify(); |
337 | void get_state(int *num_entries, int *hash_size); |
338 | void read_list_lock(); |
339 | void read_list_unlock(); |
340 | void write_list_lock(); |
341 | void write_list_unlock(); |
342 | void read_pending_exp_lock(); |
343 | void read_pending_exp_unlock(); |
344 | void write_pending_exp_lock(); |
345 | void write_pending_exp_unlock(); |
346 | void read_pending_cheap_lock(); |
347 | void read_pending_cheap_unlock(); |
348 | void write_pending_cheap_lock(); |
349 | void write_pending_cheap_unlock(); |
350 | toku_mutex_t* get_mutex_for_pair(uint32_t fullhash); |
351 | void pair_lock_by_fullhash(uint32_t fullhash); |
352 | void pair_unlock_by_fullhash(uint32_t fullhash); |
353 | |
354 | private: |
355 | void pair_remove (PAIR p); |
356 | void remove_from_hash_chain(PAIR p); |
357 | void add_to_cf_list (PAIR p); |
358 | void add_to_clock (PAIR p); |
359 | void add_to_hash_chain(PAIR p); |
360 | }; |
361 | |
362 | /////////////////////////////////////////////////////////////////////////////// |
363 | // |
364 | // Wrapper for the head of our cachefile list. |
365 | // |
366 | class cachefile_list { |
367 | public: |
368 | void init(); |
369 | void destroy(); |
370 | void read_lock(); |
371 | void read_unlock(); |
372 | void write_lock(); |
373 | void write_unlock(); |
374 | int cachefile_of_iname_in_env(const char *iname_in_env, CACHEFILE *cf); |
375 | int cachefile_of_filenum(FILENUM filenum, CACHEFILE *cf); |
376 | void add_cf_unlocked(CACHEFILE newcf); |
377 | void add_stale_cf(CACHEFILE newcf); |
378 | void remove_cf(CACHEFILE cf); |
379 | void remove_stale_cf_unlocked(CACHEFILE cf); |
380 | FILENUM reserve_filenum(); |
381 | uint32_t get_new_hash_id_unlocked(); |
382 | CACHEFILE find_cachefile_unlocked(struct fileid* fileid); |
383 | CACHEFILE find_stale_cachefile_unlocked(struct fileid* fileid); |
384 | void verify_unused_filenum(FILENUM filenum); |
385 | bool evict_some_stale_pair(evictor* ev); |
386 | void free_stale_data(evictor* ev); |
387 | // access to these fields are protected by the lock |
388 | FILENUM m_next_filenum_to_use; |
389 | uint32_t m_next_hash_id_to_use; |
390 | toku_pthread_rwlock_t m_lock; // this field is publoc so we are still POD |
391 | toku::omt<CACHEFILE> m_active_filenum; |
392 | toku::omt<CACHEFILE> m_active_fileid; |
393 | toku::omt<CACHEFILE> m_stale_fileid; |
394 | private: |
395 | CACHEFILE find_cachefile_in_list_unlocked(CACHEFILE start, struct fileid* fileid); |
396 | }; |
397 | |
398 | |
399 | /////////////////////////////////////////////////////////////////////////////// |
400 | // |
401 | // The checkpointer handles starting and finishing checkpoints of the |
402 | // cachetable's data. |
403 | // |
404 | class checkpointer { |
405 | public: |
406 | int init(pair_list *_pl, TOKULOGGER _logger, evictor *_ev, cachefile_list *files); |
407 | void destroy(); |
408 | void set_checkpoint_period(uint32_t new_period); |
409 | uint32_t get_checkpoint_period(); |
410 | int shutdown(); |
411 | bool has_been_shutdown(); |
412 | void begin_checkpoint(); |
413 | void add_background_job(); |
414 | void remove_background_job(); |
415 | void end_checkpoint(void (*testcallback_f)(void*), void* ); |
416 | TOKULOGGER get_logger(); |
417 | // used during begin_checkpoint |
418 | void increment_num_txns(); |
419 | private: |
420 | uint32_t m_checkpoint_num_txns; // how many transactions are in the checkpoint |
421 | TOKULOGGER m_logger; |
422 | LSN m_lsn_of_checkpoint_in_progress; |
423 | uint32_t m_checkpoint_num_files; // how many cachefiles are in the checkpoint |
424 | struct minicron m_checkpointer_cron; // the periodic checkpointing thread |
425 | cachefile_list *m_cf_list; |
426 | pair_list *m_list; |
427 | evictor *m_ev; |
428 | bool m_checkpointer_cron_init; |
429 | bool m_checkpointer_init; |
430 | |
431 | // variable used by the checkpoint thread to know |
432 | // when all work induced by cloning on client threads is done |
433 | BACKGROUND_JOB_MANAGER m_checkpoint_clones_bjm; |
434 | // private methods for begin_checkpoint |
435 | void update_cachefiles(); |
436 | void log_begin_checkpoint(); |
437 | void turn_on_pending_bits(); |
438 | // private methods for end_checkpoint |
439 | void fill_checkpoint_cfs(CACHEFILE* checkpoint_cfs); |
440 | void checkpoint_pending_pairs(); |
441 | void checkpoint_userdata(CACHEFILE* checkpoint_cfs); |
442 | void log_end_checkpoint(); |
443 | void end_checkpoint_userdata(CACHEFILE* checkpoint_cfs); |
444 | void remove_cachefiles(CACHEFILE* checkpoint_cfs); |
445 | |
446 | // Unit test struct needs access to private members. |
447 | friend struct checkpointer_test; |
448 | }; |
449 | |
450 | // |
451 | // This is how often we want the eviction thread |
452 | // to run, in seconds. |
453 | // |
454 | const int EVICTION_PERIOD = 1; |
455 | |
456 | /////////////////////////////////////////////////////////////////////////////// |
457 | // |
458 | // The evictor handles the removal of pairs from the pair list/cachetable. |
459 | // |
460 | class evictor { |
461 | public: |
462 | int init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KIBBUTZ _kibbutz, uint32_t eviction_period); |
463 | void destroy(); |
464 | void add_pair_attr(PAIR_ATTR attr); |
465 | void remove_pair_attr(PAIR_ATTR attr); |
466 | void change_pair_attr(PAIR_ATTR old_attr, PAIR_ATTR new_attr); |
467 | void add_cloned_data_size(long size); |
468 | void remove_cloned_data_size(long size); |
469 | uint64_t reserve_memory(double fraction, uint64_t upper_bound); |
470 | void release_reserved_memory(uint64_t reserved_memory); |
471 | void run_eviction_thread(); |
472 | void do_partial_eviction(PAIR p); |
473 | void evict_pair(PAIR p, bool checkpoint_pending); |
474 | void wait_for_cache_pressure_to_subside(); |
475 | void signal_eviction_thread(); |
476 | void signal_eviction_thread_locked(); |
477 | bool should_client_thread_sleep(); |
478 | bool should_client_wake_eviction_thread(); |
479 | // function needed for testing |
480 | void get_state(long *size_current_ptr, long *size_limit_ptr); |
481 | void fill_engine_status(); |
482 | void set_enable_partial_eviction(bool enabled); |
483 | bool get_enable_partial_eviction(void) const; |
484 | private: |
485 | void add_to_size_current(long size); |
486 | void remove_from_size_current(long size); |
487 | void run_eviction(); |
488 | bool run_eviction_on_pair(PAIR p); |
489 | void try_evict_pair(PAIR p); |
490 | void decrease_size_evicting(long size_evicting_estimate); |
491 | bool should_sleeping_clients_wakeup(); |
492 | bool eviction_needed(); |
493 | |
494 | // We have some intentional races with these variables because we're ok with reading something a little bit old. |
495 | // Provide some hooks for reading variables in an unsafe way so that there are function names we can stick in a valgrind suppression. |
496 | int64_t unsafe_read_size_current(void) const; |
497 | int64_t unsafe_read_size_evicting(void) const; |
498 | |
499 | pair_list* m_pl; |
500 | cachefile_list* m_cf_list; |
501 | int64_t m_size_current; // the sum of the sizes of the pairs in the cachetable |
502 | int64_t m_size_cloned_data; // stores amount of cloned data we have, only used for engine status |
503 | // changes to these two values are protected |
504 | // by ev_thread_lock |
505 | int64_t m_size_reserved; // How much memory is reserved (e.g., by the loader) |
506 | int64_t m_size_evicting; // the sum of the sizes of the pairs being written |
507 | |
508 | // these are constants |
509 | int64_t m_low_size_watermark; // target max size of cachetable that eviction thread aims for |
510 | int64_t m_low_size_hysteresis; // if cachetable grows to this size, client threads wake up eviction thread upon adding data |
511 | int64_t m_high_size_watermark; // if cachetable grows to this size, client threads sleep upon adding data |
512 | int64_t m_high_size_hysteresis; // if > cachetable size, then sleeping client threads may wake up |
513 | |
514 | bool m_enable_partial_eviction; // true if partial evictions are permitted |
515 | |
516 | // used to calculate random numbers |
517 | struct random_data m_random_data; |
518 | char m_random_statebuf[64]; |
519 | |
520 | // mutex that protects fields listed immedietly below |
521 | toku_mutex_t m_ev_thread_lock; |
522 | // the eviction thread |
523 | toku_pthread_t m_ev_thread; |
524 | // condition variable that controls the sleeping period |
525 | // of the eviction thread |
526 | toku_cond_t m_ev_thread_cond; |
527 | // number of client threads that are currently sleeping |
528 | // due to an over-subscribed cachetable |
529 | uint32_t m_num_sleepers; |
530 | // states if the eviction thread should run. set to true |
531 | // in init, set to false during destroy |
532 | bool m_run_thread; |
533 | // bool that states if the eviction thread is currently running |
534 | bool m_ev_thread_is_running; |
535 | // period which the eviction thread sleeps |
536 | uint32_t m_period_in_seconds; |
537 | // condition variable on which client threads wait on when sleeping |
538 | // due to an over-subscribed cachetable |
539 | toku_cond_t m_flow_control_cond; |
540 | |
541 | // variables for engine status |
542 | PARTITIONED_COUNTER m_size_nonleaf; |
543 | PARTITIONED_COUNTER m_size_leaf; |
544 | PARTITIONED_COUNTER m_size_rollback; |
545 | PARTITIONED_COUNTER m_size_cachepressure; |
546 | PARTITIONED_COUNTER m_wait_pressure_count; |
547 | PARTITIONED_COUNTER m_wait_pressure_time; |
548 | PARTITIONED_COUNTER m_long_wait_pressure_count; |
549 | PARTITIONED_COUNTER m_long_wait_pressure_time; |
550 | |
551 | KIBBUTZ m_kibbutz; |
552 | |
553 | // this variable is ONLY used for testing purposes |
554 | uint64_t m_num_eviction_thread_runs; |
555 | |
556 | bool m_ev_thread_init; |
557 | bool m_evictor_init; |
558 | |
559 | friend class evictor_test_helpers; |
560 | friend class evictor_unit_test; |
561 | }; |
562 | |
563 | /////////////////////////////////////////////////////////////////////////////// |
564 | // |
565 | // Iterates over the clean head in the pair list, calling the cleaner |
566 | // callback on each node in that list. |
567 | // |
568 | class cleaner { |
569 | public: |
570 | int init(uint32_t cleaner_iterations, pair_list* _pl, CACHETABLE _ct); |
571 | void destroy(void); |
572 | uint32_t get_iterations(void); |
573 | void set_iterations(uint32_t new_iterations); |
574 | uint32_t get_period_unlocked(void); |
575 | void set_period(uint32_t new_period); |
576 | int run_cleaner(void); |
577 | |
578 | private: |
579 | pair_list* m_pl; |
580 | CACHETABLE m_ct; |
581 | struct minicron m_cleaner_cron; // the periodic cleaner thread |
582 | uint32_t m_cleaner_iterations; // how many times to run the cleaner per |
583 | // cleaner period (minicron has a |
584 | // minimum period of 1s so if you want |
585 | // more frequent cleaner runs you must |
586 | // use this) |
587 | bool m_cleaner_cron_init; |
588 | bool m_cleaner_init; |
589 | }; |
590 | |
591 | /////////////////////////////////////////////////////////////////////////////// |
592 | // |
593 | // The cachetable is as close to an ENV as we get. |
594 | // |
595 | struct cachetable { |
596 | pair_list list; |
597 | cleaner cl; |
598 | evictor ev; |
599 | checkpointer cp; |
600 | cachefile_list cf_list; |
601 | |
602 | KIBBUTZ client_kibbutz; // pool of worker threads and jobs to do asynchronously for the client. |
603 | KIBBUTZ ct_kibbutz; // pool of worker threads and jobs to do asynchronously for the cachetable |
604 | KIBBUTZ checkpointing_kibbutz; // small pool for checkpointing cloned pairs |
605 | |
606 | char *env_dir; |
607 | }; |
608 | |