1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
3 | #ident "$Id$" |
4 | /*====== |
5 | This file is part of PerconaFT. |
6 | |
7 | |
8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
9 | |
10 | PerconaFT is free software: you can redistribute it and/or modify |
11 | it under the terms of the GNU General Public License, version 2, |
12 | as published by the Free Software Foundation. |
13 | |
14 | PerconaFT is distributed in the hope that it will be useful, |
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | GNU General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU General Public License |
20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
21 | |
22 | ---------------------------------------- |
23 | |
24 | PerconaFT is free software: you can redistribute it and/or modify |
25 | it under the terms of the GNU Affero General Public License, version 3, |
26 | as published by the Free Software Foundation. |
27 | |
28 | PerconaFT is distributed in the hope that it will be useful, |
29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
31 | GNU Affero General Public License for more details. |
32 | |
33 | You should have received a copy of the GNU Affero General Public License |
34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
35 | ======= */ |
36 | |
37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
38 | |
39 | #pragma once |
40 | |
41 | #include <fcntl.h> |
42 | |
43 | #include "ft/logger/logger.h" |
44 | #include "ft/serialize/block_table.h" |
45 | #include "ft/txn/txn.h" |
46 | #include "ft/ft-status.h" |
47 | #include "util/minicron.h" |
48 | |
49 | // Maintain a cache mapping from cachekeys to values (void*) |
50 | // Some of the keys can be pinned. Don't pin too many or for too long. |
51 | // If the cachetable is too full, it will call the flush_callback() function with the key, the value, and the otherargs |
52 | // and then remove the key-value pair from the cache. |
53 | // The callback won't be any of the currently pinned keys. |
54 | // Also when flushing an object, the cachetable drops all references to it, |
55 | // so you may need to free() it. |
56 | // Note: The cachetable should use a common pool of memory, flushing things across cachetables. |
57 | // (The first implementation doesn't) |
58 | // If you pin something twice, you must unpin it twice. |
59 | // table_size is the initial size of the cache table hash table (in number of entries) |
60 | // size limit is the upper bound of the sum of size of the entries in the cache table (total number of bytes) |
61 | |
62 | typedef BLOCKNUM CACHEKEY; |
63 | |
64 | class checkpointer; |
65 | typedef class checkpointer *CHECKPOINTER; |
66 | typedef struct cachetable *CACHETABLE; |
67 | typedef struct cachefile *CACHEFILE; |
68 | typedef struct ctpair *PAIR; |
69 | |
70 | // This struct hold information about values stored in the cachetable. |
71 | // As one can tell from the names, we are probably violating an |
72 | // abstraction layer by placing names. |
73 | // |
74 | // The purpose of having this struct is to have a way for the |
75 | // cachetable to accumulate the some totals we are interested in. |
76 | // Breaking this abstraction layer by having these names was the |
77 | // easiest way. |
78 | // |
79 | typedef struct pair_attr_s { |
80 | long size; // size PAIR's value takes in memory |
81 | long nonleaf_size; // size if PAIR is a nonleaf node, 0 otherwise, used only for engine status |
82 | long leaf_size; // size if PAIR is a leaf node, 0 otherwise, used only for engine status |
83 | long rollback_size; // size of PAIR is a rollback node, 0 otherwise, used only for engine status |
84 | long cache_pressure_size; // amount PAIR contributes to cache pressure, is sum of buffer sizes and workdone counts |
85 | bool is_valid; |
86 | } PAIR_ATTR; |
87 | |
88 | static inline PAIR_ATTR make_pair_attr(long size) { |
89 | PAIR_ATTR result={ |
90 | .size = size, |
91 | .nonleaf_size = 0, |
92 | .leaf_size = 0, |
93 | .rollback_size = 0, |
94 | .cache_pressure_size = 0, |
95 | .is_valid = true |
96 | }; |
97 | return result; |
98 | } |
99 | |
100 | void toku_set_cleaner_period (CACHETABLE ct, uint32_t new_period); |
101 | uint32_t toku_get_cleaner_period_unlocked (CACHETABLE ct); |
102 | void toku_set_cleaner_iterations (CACHETABLE ct, uint32_t new_iterations); |
103 | uint32_t toku_get_cleaner_iterations (CACHETABLE ct); |
104 | uint32_t toku_get_cleaner_iterations_unlocked (CACHETABLE ct); |
105 | void toku_set_enable_partial_eviction (CACHETABLE ct, bool enabled); |
106 | bool toku_get_enable_partial_eviction (CACHETABLE ct); |
107 | |
108 | // cachetable operations |
109 | |
110 | // create and initialize a cache table |
111 | // size_limit is the upper limit on the size of the size of the values in the table |
112 | // pass 0 if you want the default |
113 | int toku_cachetable_create_ex(CACHETABLE *result, long size_limit, |
114 | unsigned long client_pool_threads, |
115 | unsigned long cachetable_pool_threads, |
116 | unsigned long checkpoint_pool_threads, |
117 | LSN initial_lsn, struct tokulogger *logger); |
118 | |
119 | #define toku_cachetable_create(r, s, l, o) \ |
120 | toku_cachetable_create_ex(r, s, 0, 0, 0, l, o); |
121 | |
122 | // Create a new cachetable. |
123 | // Effects: a new cachetable is created and initialized. |
124 | // The cachetable pointer is stored into result. |
125 | // The sum of the sizes of the memory objects is set to size_limit, in whatever |
126 | // units make sense to the user of the cachetable. |
127 | // Returns: If success, returns 0 and result points to the new cachetable. Otherwise, |
128 | // returns an error number. |
129 | |
130 | // Returns a pointer to the checkpointer within the given cachetable. |
131 | CHECKPOINTER toku_cachetable_get_checkpointer(CACHETABLE ct); |
132 | |
133 | // What is the cachefile that goes with a particular filenum? |
134 | // During a transaction, we cannot reuse a filenum. |
135 | int toku_cachefile_of_filenum (CACHETABLE t, FILENUM filenum, CACHEFILE *cf); |
136 | |
137 | // What is the cachefile that goes with a particular iname (relative to env)? |
138 | // During a transaction, we cannot reuse an iname. |
139 | int toku_cachefile_of_iname_in_env (CACHETABLE ct, const char *iname_in_env, CACHEFILE *cf); |
140 | |
141 | // Get the iname (within the cwd) associated with the cachefile |
142 | // Return the filename |
143 | char *toku_cachefile_fname_in_cwd (CACHEFILE cf); |
144 | |
145 | void toku_cachetable_begin_checkpoint (CHECKPOINTER cp, struct tokulogger *logger); |
146 | |
147 | void toku_cachetable_end_checkpoint(CHECKPOINTER cp, struct tokulogger *logger, |
148 | void (*testcallback_f)(void*), void * ); |
149 | |
150 | |
151 | // Shuts down checkpoint thread |
152 | // Requires no locks be held that are taken by the checkpoint function |
153 | void toku_cachetable_minicron_shutdown(CACHETABLE ct); |
154 | |
155 | // Prepare to close the cachetable. This informs the cachetable that it is about to be closed |
156 | // so that it can tune its checkpoint resource use. |
157 | void toku_cachetable_prepare_close(CACHETABLE ct); |
158 | |
159 | // Close the cachetable. |
160 | // Effects: All of the memory objects are flushed to disk, and the cachetable is destroyed. |
161 | void toku_cachetable_close(CACHETABLE *ct); |
162 | |
163 | // Open a file and bind the file to a new cachefile object. (For use by test programs only.) |
164 | int toku_cachetable_openf(CACHEFILE *,CACHETABLE, const char *fname_in_env, int flags, mode_t mode); |
165 | |
166 | // Bind a file to a new cachefile object. |
167 | int toku_cachetable_openfd(CACHEFILE *,CACHETABLE, int fd, |
168 | const char *fname_relative_to_env); |
169 | int toku_cachetable_openfd_with_filenum (CACHEFILE *,CACHETABLE, int fd, |
170 | const char *fname_in_env, |
171 | FILENUM filenum, bool* was_open); |
172 | |
173 | // reserve a unique filenum |
174 | FILENUM toku_cachetable_reserve_filenum(CACHETABLE ct); |
175 | |
176 | // Effect: Reserve a fraction of the cachetable memory. |
177 | // Returns the amount reserved. |
178 | // To return the memory to the cachetable, call toku_cachetable_release_reserved_memory |
179 | // Requires 0<fraction<1. |
180 | uint64_t toku_cachetable_reserve_memory(CACHETABLE, double fraction, uint64_t upper_bound); |
181 | void toku_cachetable_release_reserved_memory(CACHETABLE, uint64_t); |
182 | |
183 | // cachefile operations |
184 | |
185 | // Does an fsync of a cachefile. |
186 | void toku_cachefile_fsync(CACHEFILE cf); |
187 | |
188 | enum partial_eviction_cost { |
189 | PE_CHEAP=0, // running partial eviction is cheap, and can be done on the client thread |
190 | PE_EXPENSIVE=1, // running partial eviction is expensive, and should not be done on the client thread |
191 | }; |
192 | |
193 | // cachetable pair clean or dirty WRT external memory |
194 | enum cachetable_dirty { |
195 | CACHETABLE_CLEAN=0, // the cached object is clean WRT the cachefile |
196 | CACHETABLE_DIRTY=1, // the cached object is dirty WRT the cachefile |
197 | }; |
198 | |
199 | // The flush callback is called when a key value pair is being written to storage and possibly removed from the cachetable. |
200 | // When write_me is true, the value should be written to storage. |
201 | // When keep_me is false, the value should be freed. |
202 | // When for_checkpoint is true, this was a 'pending' write |
203 | // Returns: 0 if success, otherwise an error number. |
204 | // Can access fd (fd is protected by a readlock during call) |
205 | typedef void (*CACHETABLE_FLUSH_CALLBACK)(CACHEFILE, int fd, CACHEKEY key, void *value, void **disk_data, void *, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool is_clone); |
206 | |
207 | // The fetch callback is called when a thread is attempting to get and pin a memory |
208 | // object and it is not in the cachetable. |
209 | // Returns: 0 if success, otherwise an error number. The address and size of the object |
210 | // associated with the key are returned. |
211 | // Can access fd (fd is protected by a readlock during call) |
212 | typedef int (*CACHETABLE_FETCH_CALLBACK)(CACHEFILE, PAIR p, int fd, CACHEKEY key, uint32_t fullhash, void **value_data, void **disk_data, PAIR_ATTR *sizep, int *dirtyp, void *); |
213 | |
214 | // The cachetable calls the partial eviction estimate callback to determine if |
215 | // partial eviction is a cheap operation that may be called by on the client thread |
216 | // or whether partial eviction is expensive and should be done on a background (writer) thread. |
217 | // The callback conveys this information by setting cost to either PE_CHEAP or PE_EXPENSIVE. |
218 | // If cost is PE_EXPENSIVE, then the callback also sets bytes_freed_estimate |
219 | // to return an estimate of the number of bytes it will free |
220 | // so that the cachetable can estimate how much data is being evicted on background threads. |
221 | // If cost is PE_CHEAP, then the callback does not set bytes_freed_estimate. |
222 | typedef void (*CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK)(void *ftnode_pv, void* disk_data, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void *); |
223 | |
224 | // The cachetable calls the partial eviction callback is to possibly try and partially evict pieces |
225 | // of the PAIR. The callback determines the strategy for what to evict. The callback may choose to free |
226 | // nothing, or may choose to free as much as possible. When the partial eviction callback is finished, |
227 | // it must call finalize with the new PAIR_ATTR and the given finalize_extra. After this point, the |
228 | // write lock will be released on the PAIR and it is no longer safe to operate on any of the passed arguments. |
229 | // This is useful for doing expensive cleanup work outside of the PAIR's write lock (such as destroying objects, etc) |
230 | // |
231 | // on entry, requires a write lock to be held on the PAIR in the cachetable while this function is called |
232 | // on exit, the finalize continuation is called |
233 | typedef int (*CACHETABLE_PARTIAL_EVICTION_CALLBACK)(void *ftnode_pv, PAIR_ATTR old_attr, void *, |
234 | void (*finalize)(PAIR_ATTR new_attr, void *), void *); |
235 | |
236 | // The cachetable calls this function to determine if get_and_pin call requires a partial fetch. If this function returns true, |
237 | // then the cachetable will subsequently call CACHETABLE_PARTIAL_FETCH_CALLBACK to perform |
238 | // a partial fetch. If this function returns false, then the PAIR's value is returned to the caller as is. |
239 | // |
240 | // An alternative to having this callback is to always call CACHETABLE_PARTIAL_FETCH_CALLBACK, and let |
241 | // CACHETABLE_PARTIAL_FETCH_CALLBACK decide whether to do any partial fetching or not. |
242 | // There is no particular reason why this alternative was not chosen. |
243 | // Requires: a read lock to be held on the PAIR |
244 | typedef bool (*CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK)(void *ftnode_pv, void *); |
245 | |
246 | // The cachetable calls the partial fetch callback when a thread needs to read or decompress a subset of a PAIR into memory. |
247 | // An example is needing to read a basement node into memory. Another example is decompressing an internal node's |
248 | // message buffer. The cachetable determines if a partial fetch is necessary by first calling CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK. |
249 | // The new PAIR_ATTR of the PAIR is returned in sizep |
250 | // Can access fd (fd is protected by a readlock during call) |
251 | // Returns: 0 if success, otherwise an error number. |
252 | typedef int (*CACHETABLE_PARTIAL_FETCH_CALLBACK)(void *value_data, void* disk_data, void *, int fd, PAIR_ATTR *sizep); |
253 | |
254 | // The cachetable calls the put callback during a cachetable_put command to provide the opaque PAIR. |
255 | // The PAIR can then be used to later unpin the pair. |
256 | // Returns: 0 if success, otherwise an error number. |
257 | typedef void (*CACHETABLE_PUT_CALLBACK)(CACHEKEY key, void *value_data, PAIR p); |
258 | |
259 | // TODO(leif) XXX TODO XXX |
260 | typedef int (*CACHETABLE_CLEANER_CALLBACK)(void *ftnode_pv, BLOCKNUM blocknum, uint32_t fullhash, void *); |
261 | |
262 | typedef void (*CACHETABLE_CLONE_CALLBACK)(void* value_data, void** cloned_value_data, long* clone_size, PAIR_ATTR* new_attr, bool for_checkpoint, void* ); |
263 | |
264 | typedef void (*CACHETABLE_CHECKPOINT_COMPLETE_CALLBACK)(void *value_data); |
265 | |
266 | typedef struct { |
267 | CACHETABLE_FLUSH_CALLBACK flush_callback; |
268 | CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK pe_est_callback; |
269 | CACHETABLE_PARTIAL_EVICTION_CALLBACK pe_callback; |
270 | CACHETABLE_CLEANER_CALLBACK cleaner_callback; |
271 | CACHETABLE_CLONE_CALLBACK clone_callback; |
272 | CACHETABLE_CHECKPOINT_COMPLETE_CALLBACK checkpoint_complete_callback; |
273 | void* ; // parameter for flush_callback, pe_est_callback, pe_callback, and cleaner_callback |
274 | } CACHETABLE_WRITE_CALLBACK; |
275 | |
276 | typedef void (*CACHETABLE_GET_KEY_AND_FULLHASH)(CACHEKEY* cachekey, uint32_t* fullhash, void* ); |
277 | |
278 | typedef void (*CACHETABLE_REMOVE_KEY)(CACHEKEY* cachekey, bool for_checkpoint, void* ); |
279 | |
280 | void toku_cachefile_set_userdata(CACHEFILE cf, void *userdata, |
281 | void (*log_fassociate_during_checkpoint)(CACHEFILE, void*), |
282 | void (*close_userdata)(CACHEFILE, int, void*, bool, LSN), |
283 | void (*free_userdata)(CACHEFILE, void*), |
284 | void (*checkpoint_userdata)(CACHEFILE, int, void*), |
285 | void (*begin_checkpoint_userdata)(LSN, void*), |
286 | void (*end_checkpoint_userdata)(CACHEFILE, int, void*), |
287 | void (*note_pin_by_checkpoint)(CACHEFILE, void*), |
288 | void (*note_unpin_by_checkpoint)(CACHEFILE, void*)); |
289 | // Effect: Store some cachefile-specific user data. When the last reference to a cachefile is closed, we call close_userdata(). |
290 | // Before starting a checkpoint, we call checkpoint_prepare_userdata(). |
291 | // When the cachefile needs to be checkpointed, we call checkpoint_userdata(). |
292 | // If userdata is already non-NULL, then we simply overwrite it. |
293 | |
294 | void *toku_cachefile_get_userdata(CACHEFILE); |
295 | // Effect: Get the user data. |
296 | |
297 | CACHETABLE toku_cachefile_get_cachetable(CACHEFILE cf); |
298 | // Effect: Get the cachetable. |
299 | |
300 | CACHEFILE toku_pair_get_cachefile(PAIR); |
301 | // Effect: Get the cachefile of the pair |
302 | |
303 | void toku_cachetable_swap_pair_values(PAIR old_pair, PAIR new_pair); |
304 | // Effect: Swaps the value_data of old_pair and new_pair. |
305 | // Requires: both old_pair and new_pair to be pinned with write locks. |
306 | |
307 | typedef enum { |
308 | PL_READ = 0, |
309 | PL_WRITE_CHEAP, |
310 | PL_WRITE_EXPENSIVE |
311 | } pair_lock_type; |
312 | |
313 | // put something into the cachetable and checkpoint dependent pairs |
314 | // if the checkpointing is necessary |
315 | void toku_cachetable_put_with_dep_pairs( |
316 | CACHEFILE cachefile, |
317 | CACHETABLE_GET_KEY_AND_FULLHASH get_key_and_fullhash, |
318 | void *value, |
319 | PAIR_ATTR attr, |
320 | CACHETABLE_WRITE_CALLBACK write_callback, |
321 | void *get_key_and_fullhash_extra, |
322 | uint32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint |
323 | PAIR* dependent_pairs, |
324 | enum cachetable_dirty* dependent_dirty, // array stating dirty/cleanness of dependent pairs |
325 | CACHEKEY* key, |
326 | uint32_t* fullhash, |
327 | CACHETABLE_PUT_CALLBACK put_callback |
328 | ); |
329 | |
330 | // Put a memory object into the cachetable. |
331 | // Effects: Lookup the key in the cachetable. If the key is not in the cachetable, |
332 | // then insert the pair and pin it. Otherwise return an error. Some of the key |
333 | // value pairs may be evicted from the cachetable when the cachetable gets too big. |
334 | void toku_cachetable_put(CACHEFILE cf, CACHEKEY key, uint32_t fullhash, |
335 | void *value, PAIR_ATTR size, |
336 | CACHETABLE_WRITE_CALLBACK write_callback, |
337 | CACHETABLE_PUT_CALLBACK put_callback |
338 | ); |
339 | |
340 | // Get and pin the memory object of a PAIR, and write dependent pairs to disk |
341 | // if the dependent pairs are pending a checkpoint. |
342 | // Effects: If the memory object is in the cachetable, acquire a PAIR lock on it. |
343 | // Otherwise, fetch it from storage by calling the fetch callback. If the fetch |
344 | // succeeded, add the memory object to the cachetable with a PAIR lock on it. |
345 | // Before returning to the user, if the PAIR object being retrieved, or any of the |
346 | // dependent pairs passed in as parameters must be written to disk for checkpoint, |
347 | // then the required PAIRs are written to disk for checkpoint. |
348 | // KEY PROPERTY OF DEPENDENT PAIRS: They are already locked by the client |
349 | // Returns: 0 if the memory object is in memory, otherwise an error number. |
350 | int toku_cachetable_get_and_pin_with_dep_pairs ( |
351 | CACHEFILE cachefile, |
352 | CACHEKEY key, |
353 | uint32_t fullhash, |
354 | void**value, |
355 | long *sizep, |
356 | CACHETABLE_WRITE_CALLBACK write_callback, |
357 | CACHETABLE_FETCH_CALLBACK fetch_callback, |
358 | CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, |
359 | CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, |
360 | pair_lock_type lock_type, |
361 | void* , // parameter for fetch_callback, pf_req_callback, and pf_callback |
362 | uint32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint |
363 | PAIR* dependent_pairs, |
364 | enum cachetable_dirty* dependent_dirty // array stating dirty/cleanness of dependent pairs |
365 | ); |
366 | |
367 | // Get and pin a memory object. |
368 | // Effects: If the memory object is in the cachetable acquire the PAIR lock on it. |
369 | // Otherwise, fetch it from storage by calling the fetch callback. If the fetch |
370 | // succeeded, add the memory object to the cachetable with a read lock on it. |
371 | // Returns: 0 if the memory object is in memory, otherwise an error number. |
372 | int toku_cachetable_get_and_pin ( |
373 | CACHEFILE cachefile, |
374 | CACHEKEY key, |
375 | uint32_t fullhash, |
376 | void**value, |
377 | long *sizep, |
378 | CACHETABLE_WRITE_CALLBACK write_callback, |
379 | CACHETABLE_FETCH_CALLBACK fetch_callback, |
380 | CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, |
381 | CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, |
382 | bool may_modify_value, |
383 | void* // parameter for fetch_callback, pf_req_callback, and pf_callback |
384 | ); |
385 | |
386 | // does partial fetch on a pinned pair |
387 | void toku_cachetable_pf_pinned_pair( |
388 | void* value, |
389 | CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, |
390 | void* , |
391 | CACHEFILE cf, |
392 | CACHEKEY key, |
393 | uint32_t fullhash |
394 | ); |
395 | |
396 | struct unlockers { |
397 | bool locked; |
398 | void (*f)(void* ); |
399 | void *; |
400 | struct unlockers *next; |
401 | }; |
402 | typedef struct unlockers *UNLOCKERS; |
403 | |
404 | // Effect: If the block is in the cachetable, then return it. |
405 | // Otherwise call the functions in unlockers, fetch the data (but don't pin it, since we'll just end up pinning it again later), and return TOKUDB_TRY_AGAIN. |
406 | int toku_cachetable_get_and_pin_nonblocking ( |
407 | CACHEFILE cf, |
408 | CACHEKEY key, |
409 | uint32_t fullhash, |
410 | void**value, |
411 | long *sizep, |
412 | CACHETABLE_WRITE_CALLBACK write_callback, |
413 | CACHETABLE_FETCH_CALLBACK fetch_callback, |
414 | CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, |
415 | CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, |
416 | pair_lock_type lock_type, |
417 | void *, // parameter for fetch_callback, pf_req_callback, and pf_callback |
418 | UNLOCKERS unlockers |
419 | ); |
420 | |
421 | int toku_cachetable_maybe_get_and_pin (CACHEFILE, CACHEKEY, uint32_t /*fullhash*/, pair_lock_type, void**); |
422 | // Effect: Maybe get and pin a memory object. |
423 | // This function is similar to the get_and_pin function except that it |
424 | // will not attempt to fetch a memory object that is not in the cachetable or requires any kind of blocking to get it. |
425 | // Returns: If the the item is already in memory, then return 0 and store it in the |
426 | // void**. If the item is not in memory, then return a nonzero error number. |
427 | |
428 | int toku_cachetable_maybe_get_and_pin_clean (CACHEFILE, CACHEKEY, uint32_t /*fullhash*/, pair_lock_type, void**); |
429 | // Effect: Like maybe get and pin, but may pin a clean pair. |
430 | |
431 | int toku_cachetable_unpin(CACHEFILE, PAIR, enum cachetable_dirty dirty, PAIR_ATTR size); |
432 | // Effect: Unpin a memory object |
433 | // Modifies: If the memory object is in the cachetable, then OR the dirty flag, |
434 | // update the size, and release the read lock on the memory object. |
435 | // Returns: 0 if success, otherwise returns an error number. |
436 | // Requires: The ct is locked. |
437 | |
438 | int toku_cachetable_unpin_ct_prelocked_no_flush(CACHEFILE, PAIR, enum cachetable_dirty dirty, PAIR_ATTR size); |
439 | // Effect: The same as tokud_cachetable_unpin, except that the ct must not be locked. |
440 | // Requires: The ct is NOT locked. |
441 | |
442 | int toku_cachetable_unpin_and_remove (CACHEFILE, PAIR, CACHETABLE_REMOVE_KEY, void*); /* Removing something already present is OK. */ |
443 | // Effect: Remove an object from the cachetable. Don't write it back. |
444 | // Requires: The object must be pinned exactly once. |
445 | |
446 | // test-only wrapper that use CACHEKEY and fullhash |
447 | int toku_test_cachetable_unpin(CACHEFILE, CACHEKEY, uint32_t fullhash, enum cachetable_dirty dirty, PAIR_ATTR size); |
448 | |
449 | // test-only wrapper that use CACHEKEY and fullhash |
450 | int toku_test_cachetable_unpin_ct_prelocked_no_flush(CACHEFILE, CACHEKEY, uint32_t fullhash, enum cachetable_dirty dirty, PAIR_ATTR size); |
451 | |
452 | // test-only wrapper that use CACHEKEY |
453 | int toku_test_cachetable_unpin_and_remove (CACHEFILE, CACHEKEY, CACHETABLE_REMOVE_KEY, void*); /* Removing something already present is OK. */ |
454 | |
455 | int toku_cachefile_prefetch(CACHEFILE cf, CACHEKEY key, uint32_t fullhash, |
456 | CACHETABLE_WRITE_CALLBACK write_callback, |
457 | CACHETABLE_FETCH_CALLBACK fetch_callback, |
458 | CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, |
459 | CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, |
460 | void *, // parameter for fetch_callback, pf_req_callback, and pf_callback |
461 | bool *doing_prefetch); |
462 | // Effect: Prefetch a memory object for a given key into the cachetable |
463 | // Precondition: The cachetable mutex is NOT held. |
464 | // Postcondition: The cachetable mutex is NOT held. |
465 | // Returns: 0 if success |
466 | // Implement Note: |
467 | // 1) The pair's rwlock is acquired (for write) (there is not a deadlock here because the rwlock is a pthread_cond_wait using the cachetable mutex). |
468 | // Case A: Single-threaded. |
469 | // A1) Call cachetable_fetch_pair, which |
470 | // a) Obtains a readlock on the cachefile's fd (to prevent multipler readers at once) |
471 | // b) Unlocks the cachetable |
472 | // c) Does the fetch off disk. |
473 | // d) Locks the cachetable |
474 | // e) Unlocks the fd lock. |
475 | // f) Unlocks the pair rwlock. |
476 | // Case B: Multithreaded |
477 | // a) Enqueue a cachetable_reader into the workqueue. |
478 | // b) Unlock the cache table. |
479 | // c) The enqueue'd job later locks the cachetable, and calls cachetable_fetch_pair (doing the steps in A1 above). |
480 | |
481 | int toku_cachetable_assert_all_unpinned (CACHETABLE); |
482 | |
483 | int toku_cachefile_count_pinned (CACHEFILE, int /*printthem*/ ); |
484 | |
485 | // Close the cachefile. |
486 | // Effects: All of the cached object associated with the cachefile are evicted from |
487 | // the cachetable. The flush callback is called for each of these objects. The |
488 | // close function does not return until all of the objects are evicted. The cachefile |
489 | // object is freed. |
490 | // If oplsn_valid is true then use oplsn as the LSN of the close instead of asking the logger. oplsn_valid being true is only allowed during recovery, and requires that you are removing the last reference (otherwise the lsn wouldn't make it in.) |
491 | void toku_cachefile_close (CACHEFILE*, bool oplsn_valid, LSN oplsn); |
492 | |
493 | // Return on success (different from pread and pwrite) |
494 | //int cachefile_pwrite (CACHEFILE, const void *buf, size_t count, toku_off_t offset); |
495 | //int cachefile_pread (CACHEFILE, void *buf, size_t count, toku_off_t offset); |
496 | |
497 | // Get the file descriptor associated with the cachefile |
498 | // Return the file descriptor |
499 | // Grabs a read lock protecting the fd |
500 | int toku_cachefile_get_fd (CACHEFILE); |
501 | |
502 | // Get the iname (within the environment) associated with the cachefile |
503 | // Return the filename |
504 | char * toku_cachefile_fname_in_env (CACHEFILE cf); |
505 | |
506 | void toku_cachefile_set_fname_in_env(CACHEFILE cf, char *new_fname_in_env); |
507 | |
508 | // Make it so when the cachefile closes, the underlying file is unlinked |
509 | void toku_cachefile_unlink_on_close(CACHEFILE cf); |
510 | |
511 | // is this cachefile marked as unlink on close? |
512 | bool toku_cachefile_is_unlink_on_close(CACHEFILE cf); |
513 | |
514 | void toku_cachefile_skip_log_recover_on_close(CACHEFILE cf); |
515 | void toku_cachefile_do_log_recover_on_close(CACHEFILE cf); |
516 | bool toku_cachefile_is_skip_log_recover_on_close(CACHEFILE cf); |
517 | |
518 | // Return the logger associated with the cachefile |
519 | struct tokulogger *toku_cachefile_logger(CACHEFILE cf); |
520 | |
521 | // Return the filenum associated with the cachefile |
522 | FILENUM toku_cachefile_filenum(CACHEFILE cf); |
523 | |
524 | // Effect: Return a 32-bit hash key. The hash key shall be suitable for using with bitmasking for a table of size power-of-two. |
525 | uint32_t toku_cachetable_hash(CACHEFILE cf, CACHEKEY key); |
526 | |
527 | uint32_t (CACHEFILE cf); |
528 | |
529 | // debug functions |
530 | |
531 | // Print the contents of the cachetable. This is mainly used from gdb |
532 | void toku_cachetable_print_state (CACHETABLE ct); |
533 | |
534 | // Get the state of the cachetable. This is used to verify the cachetable |
535 | void toku_cachetable_get_state(CACHETABLE ct, int *num_entries_ptr, int *hash_size_ptr, long *size_current_ptr, long *size_limit_ptr); |
536 | |
537 | // Get the state of a cachetable entry by key. This is used to verify the cachetable |
538 | int toku_cachetable_get_key_state(CACHETABLE ct, CACHEKEY key, CACHEFILE cf, |
539 | void **value_ptr, |
540 | int *dirty_ptr, |
541 | long long *pin_ptr, |
542 | long *size_ptr); |
543 | |
544 | // Verify the whole cachetable that the cachefile is in. Slow. |
545 | void toku_cachefile_verify (CACHEFILE cf); |
546 | |
547 | // Verify the cachetable. Slow. |
548 | void toku_cachetable_verify (CACHETABLE t); |
549 | |
550 | // Not for use in production, but useful for testing. |
551 | void toku_cachetable_print_hash_histogram (void) __attribute__((__visibility__("default" ))); |
552 | |
553 | void toku_cachetable_maybe_flush_some(CACHETABLE ct); |
554 | |
555 | // for stat64 |
556 | uint64_t toku_cachefile_size(CACHEFILE cf); |
557 | |
558 | void toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS s); |
559 | |
560 | void toku_cachetable_set_env_dir(CACHETABLE ct, const char *env_dir); |
561 | char * toku_construct_full_name(int count, ...); |
562 | char * toku_cachetable_get_fname_in_cwd(CACHETABLE ct, const char * fname_in_env); |
563 | |
564 | void cachefile_kibbutz_enq (CACHEFILE cf, void (*f)(void*), void *); |
565 | // Effect: Add a job to the cachetable's collection of work to do. Note that function f must call remove_background_job_from_cf() |
566 | |
567 | void remove_background_job_from_cf (CACHEFILE cf); |
568 | // Effect: When a kibbutz job or cleaner thread finishes in a cachefile, |
569 | // the cachetable must be notified. |
570 | |
571 | // test-only function |
572 | int toku_cachetable_get_checkpointing_user_data_status(void); |
573 | |
574 | // test-only function |
575 | int toku_cleaner_thread_for_test(CACHETABLE ct); |
576 | int toku_cleaner_thread(void *cleaner_v); |
577 | |
578 | // test function. Exported in the ydb layer and used by tests that want to run DRD |
579 | // The default of 1M is too high for drd tests, so this is a mechanism to set a smaller number. |
580 | void toku_pair_list_set_lock_size(uint32_t num_locks); |
581 | |
582 | // Used by ft-ops.cc to figure out if it has the write lock on a pair. |
583 | // Pretty hacky and not accurate enough, should be improved at the frwlock |
584 | // layer. |
585 | __attribute__((const,nonnull)) |
586 | bool toku_ctpair_is_write_locked(PAIR pair); |
587 | |