1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
3 | /*====== |
4 | This file is part of PerconaFT. |
5 | |
6 | |
7 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
8 | |
9 | PerconaFT is free software: you can redistribute it and/or modify |
10 | it under the terms of the GNU General Public License, version 2, |
11 | as published by the Free Software Foundation. |
12 | |
13 | PerconaFT is distributed in the hope that it will be useful, |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | GNU General Public License for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
20 | |
21 | ---------------------------------------- |
22 | |
23 | PerconaFT is free software: you can redistribute it and/or modify |
24 | it under the terms of the GNU Affero General Public License, version 3, |
25 | as published by the Free Software Foundation. |
26 | |
27 | PerconaFT is distributed in the hope that it will be useful, |
28 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
29 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
30 | GNU Affero General Public License for more details. |
31 | |
32 | You should have received a copy of the GNU Affero General Public License |
33 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
34 | ======= */ |
35 | |
36 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
37 | |
38 | #include <my_global.h> |
39 | #include "ft/ft-internal.h" |
40 | |
41 | #include "ft/cursor.h" |
42 | #include "ft/leafentry.h" |
43 | #include "ft/txn/txn.h" |
44 | #include "util/dbt.h" |
45 | |
46 | int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN ttxn, |
47 | enum cursor_read_type read_type, |
48 | bool disable_prefetching, |
49 | bool is_temporary) { |
50 | if (read_type == C_READ_SNAPSHOT) { |
51 | invariant(ttxn != NULL); |
52 | int accepted = toku_txn_reads_txnid(ft_handle->ft->h->root_xid_that_created, ttxn, false); // last parameter is irrelevant |
53 | if (accepted != TOKUDB_ACCEPT) { |
54 | invariant(accepted == 0); |
55 | return TOKUDB_MVCC_DICTIONARY_TOO_NEW; |
56 | } |
57 | } |
58 | |
59 | memset(cursor, 0, sizeof(*cursor)); |
60 | cursor->ft_handle = ft_handle; |
61 | cursor->ttxn = ttxn; |
62 | cursor->read_type = read_type; |
63 | cursor->disable_prefetching = disable_prefetching; |
64 | cursor->is_temporary = is_temporary; |
65 | return 0; |
66 | } |
67 | |
68 | void toku_ft_cursor_destroy(FT_CURSOR cursor) { |
69 | toku_destroy_dbt(&cursor->key); |
70 | toku_destroy_dbt(&cursor->val); |
71 | toku_destroy_dbt(&cursor->range_lock_left_key); |
72 | toku_destroy_dbt(&cursor->range_lock_right_key); |
73 | } |
74 | |
75 | // deprecated, should only be used by tests |
76 | int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR *cursorptr, TOKUTXN ttxn, |
77 | bool is_snapshot_read, bool disable_prefetching) { |
78 | FT_CURSOR XCALLOC(cursor); |
79 | enum cursor_read_type read_type = is_snapshot_read ? C_READ_SNAPSHOT : C_READ_ANY; |
80 | int r = toku_ft_cursor_create(ft_handle, cursor, ttxn, read_type, disable_prefetching, false); |
81 | if (r == 0) { |
82 | *cursorptr = cursor; |
83 | } else { |
84 | toku_free(cursor); |
85 | } |
86 | return r; |
87 | } |
88 | |
89 | // deprecated, should only be used by tests |
90 | void toku_ft_cursor_close(FT_CURSOR cursor) { |
91 | toku_ft_cursor_destroy(cursor); |
92 | toku_free(cursor); |
93 | } |
94 | |
95 | void toku_ft_cursor_remove_restriction(FT_CURSOR cursor) { |
96 | cursor->out_of_range_error = 0; |
97 | cursor->direction = 0; |
98 | } |
99 | |
100 | void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR cursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *) { |
101 | cursor->interrupt_cb = cb; |
102 | cursor->interrupt_cb_extra = extra; |
103 | } |
104 | |
105 | void toku_ft_cursor_set_leaf_mode(FT_CURSOR cursor) { |
106 | cursor->is_leaf_mode = true; |
107 | } |
108 | |
109 | int toku_ft_cursor_is_leaf_mode(FT_CURSOR cursor) { |
110 | return cursor->is_leaf_mode; |
111 | } |
112 | |
113 | // TODO: Rename / cleanup - this has nothing to do with locking |
114 | void toku_ft_cursor_set_range_lock(FT_CURSOR cursor, |
115 | const DBT *left, const DBT *right, |
116 | bool left_is_neg_infty, bool right_is_pos_infty, |
117 | int out_of_range_error) { |
118 | // Destroy any existing keys and then clone the given left, right keys |
119 | toku_destroy_dbt(&cursor->range_lock_left_key); |
120 | if (left_is_neg_infty) { |
121 | cursor->left_is_neg_infty = true; |
122 | } else { |
123 | toku_clone_dbt(&cursor->range_lock_left_key, *left); |
124 | } |
125 | |
126 | toku_destroy_dbt(&cursor->range_lock_right_key); |
127 | if (right_is_pos_infty) { |
128 | cursor->right_is_pos_infty = true; |
129 | } else { |
130 | toku_clone_dbt(&cursor->range_lock_right_key, *right); |
131 | } |
132 | |
133 | // TOKUDB_FOUND_BUT_REJECTED is a DB_NOTFOUND with instructions to stop looking. (Faster) |
134 | cursor->out_of_range_error = out_of_range_error == DB_NOTFOUND ? TOKUDB_FOUND_BUT_REJECTED : out_of_range_error; |
135 | cursor->direction = 0; |
136 | } |
137 | |
138 | void toku_ft_cursor_set_prefetching(FT_CURSOR cursor) { |
139 | cursor->prefetching = true; |
140 | } |
141 | |
142 | bool toku_ft_cursor_prefetching(FT_CURSOR cursor) { |
143 | return cursor->prefetching; |
144 | } |
145 | |
146 | //Return true if cursor is uninitialized. false otherwise. |
147 | bool toku_ft_cursor_not_set(FT_CURSOR cursor) { |
148 | assert((cursor->key.data==NULL) == (cursor->val.data==NULL)); |
149 | return (bool)(cursor->key.data == NULL); |
150 | } |
151 | |
152 | struct ft_cursor_search_struct { |
153 | FT_GET_CALLBACK_FUNCTION getf; |
154 | void *getf_v; |
155 | FT_CURSOR cursor; |
156 | ft_search *search; |
157 | }; |
158 | |
159 | /* search for the first kv pair that matches the search object */ |
160 | static int ft_cursor_search(FT_CURSOR cursor, ft_search *search, |
161 | FT_GET_CALLBACK_FUNCTION getf, void *getf_v, bool can_bulk_fetch) { |
162 | int r = toku_ft_search(cursor->ft_handle, search, getf, getf_v, cursor, can_bulk_fetch); |
163 | return r; |
164 | } |
165 | |
166 | static inline int compare_k_x(FT_HANDLE ft_handle, const DBT *k, const DBT *x) { |
167 | return ft_handle->ft->cmp(k, x); |
168 | } |
169 | |
170 | int toku_ft_cursor_compare_one(const ft_search &UU(search), const DBT *UU(x)) { |
171 | return 1; |
172 | } |
173 | |
174 | static int ft_cursor_compare_set(const ft_search &search, const DBT *x) { |
175 | FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); |
176 | return compare_k_x(ft_handle, search.k, x) <= 0; /* return min xy: kv <= xy */ |
177 | } |
178 | |
179 | static int |
180 | ft_cursor_current_getf(uint32_t keylen, const void *key, |
181 | uint32_t vallen, const void *val, |
182 | void *v, bool lock_only) { |
183 | struct ft_cursor_search_struct *CAST_FROM_VOIDP(bcss, v); |
184 | int r; |
185 | if (key==NULL) { |
186 | r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only); |
187 | } else { |
188 | FT_CURSOR cursor = bcss->cursor; |
189 | DBT newkey; |
190 | toku_fill_dbt(&newkey, key, keylen); |
191 | if (compare_k_x(cursor->ft_handle, &cursor->key, &newkey) != 0) { |
192 | r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only); // This was once DB_KEYEMPTY |
193 | if (r==0) r = TOKUDB_FOUND_BUT_REJECTED; |
194 | } |
195 | else |
196 | r = bcss->getf(keylen, key, vallen, val, bcss->getf_v, lock_only); |
197 | } |
198 | return r; |
199 | } |
200 | |
201 | static int ft_cursor_compare_next(const ft_search &search, const DBT *x) { |
202 | FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); |
203 | return compare_k_x(ft_handle, search.k, x) < 0; /* return min xy: kv < xy */ |
204 | } |
205 | |
206 | int toku_ft_cursor_current(FT_CURSOR cursor, int op, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { |
207 | if (toku_ft_cursor_not_set(cursor)) { |
208 | return EINVAL; |
209 | } |
210 | cursor->direction = 0; |
211 | if (op == DB_CURRENT) { |
212 | struct ft_cursor_search_struct bcss = {getf, getf_v, cursor, 0}; |
213 | ft_search search; |
214 | ft_search_init(&search, ft_cursor_compare_set, FT_SEARCH_LEFT, &cursor->key, nullptr, cursor->ft_handle); |
215 | int r = toku_ft_search(cursor->ft_handle, &search, ft_cursor_current_getf, &bcss, cursor, false); |
216 | ft_search_finish(&search); |
217 | return r; |
218 | } |
219 | return getf(cursor->key.size, cursor->key.data, cursor->val.size, cursor->val.data, getf_v, false); // ft_cursor_copyout(cursor, outkey, outval); |
220 | } |
221 | |
222 | int toku_ft_cursor_first(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { |
223 | cursor->direction = 0; |
224 | ft_search search; |
225 | ft_search_init(&search, toku_ft_cursor_compare_one, FT_SEARCH_LEFT, nullptr, nullptr, cursor->ft_handle); |
226 | int r = ft_cursor_search(cursor, &search, getf, getf_v, false); |
227 | ft_search_finish(&search); |
228 | return r; |
229 | } |
230 | |
231 | int toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { |
232 | cursor->direction = 0; |
233 | ft_search search; |
234 | ft_search_init(&search, toku_ft_cursor_compare_one, FT_SEARCH_RIGHT, nullptr, nullptr, cursor->ft_handle); |
235 | int r = ft_cursor_search(cursor, &search, getf, getf_v, false); |
236 | ft_search_finish(&search); |
237 | return r; |
238 | } |
239 | |
240 | int toku_ft_cursor_check_restricted_range(FT_CURSOR c, const void *key, uint32_t keylen) { |
241 | if (c->out_of_range_error) { |
242 | FT ft = c->ft_handle->ft; |
243 | DBT found_key; |
244 | toku_fill_dbt(&found_key, key, keylen); |
245 | if ((!c->left_is_neg_infty && c->direction <= 0 && ft->cmp(&found_key, &c->range_lock_left_key) < 0) || |
246 | (!c->right_is_pos_infty && c->direction >= 0 && ft->cmp(&found_key, &c->range_lock_right_key) > 0)) { |
247 | invariant(c->out_of_range_error); |
248 | return c->out_of_range_error; |
249 | } |
250 | } |
251 | // Reset cursor direction to mitigate risk if some query type doesn't set the direction. |
252 | // It is always correct to check both bounds (which happens when direction==0) but it can be slower. |
253 | c->direction = 0; |
254 | return 0; |
255 | } |
256 | |
257 | int toku_ft_cursor_shortcut(FT_CURSOR cursor, int direction, uint32_t index, bn_data *bd, |
258 | FT_GET_CALLBACK_FUNCTION getf, void *getf_v, |
259 | uint32_t *keylen, void **key, uint32_t *vallen, void **val) { |
260 | int r = 0; |
261 | // if we are searching towards the end, limit is last element |
262 | // if we are searching towards the beginning, limit is the first element |
263 | uint32_t limit = (direction > 0) ? (bd->num_klpairs() - 1) : 0; |
264 | |
265 | //Starting with the prev, find the first real (non-provdel) leafentry. |
266 | while (index != limit) { |
267 | index += direction; |
268 | LEAFENTRY le; |
269 | void* foundkey = NULL; |
270 | uint32_t foundkeylen = 0; |
271 | |
272 | r = bd->fetch_klpair(index, &le, &foundkeylen, &foundkey); |
273 | invariant_zero(r); |
274 | |
275 | if (toku_ft_cursor_is_leaf_mode(cursor) || !le_val_is_del(le, cursor->read_type, cursor->ttxn)) { |
276 | le_extract_val( |
277 | le, |
278 | toku_ft_cursor_is_leaf_mode(cursor), |
279 | cursor->read_type, |
280 | cursor->ttxn, |
281 | vallen, |
282 | val |
283 | ); |
284 | *key = foundkey; |
285 | *keylen = foundkeylen; |
286 | |
287 | cursor->direction = direction; |
288 | r = toku_ft_cursor_check_restricted_range(cursor, *key, *keylen); |
289 | if (r!=0) { |
290 | paranoid_invariant(r == cursor->out_of_range_error); |
291 | // We already got at least one entry from the bulk fetch. |
292 | // Return 0 (instead of out of range error). |
293 | r = 0; |
294 | break; |
295 | } |
296 | r = getf(*keylen, *key, *vallen, *val, getf_v, false); |
297 | if (r == TOKUDB_CURSOR_CONTINUE) { |
298 | continue; |
299 | } |
300 | else { |
301 | break; |
302 | } |
303 | } |
304 | } |
305 | |
306 | return r; |
307 | } |
308 | |
309 | int toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { |
310 | cursor->direction = +1; |
311 | ft_search search; |
312 | ft_search_init(&search, ft_cursor_compare_next, FT_SEARCH_LEFT, &cursor->key, nullptr, cursor->ft_handle); |
313 | int r = ft_cursor_search(cursor, &search, getf, getf_v, true); |
314 | ft_search_finish(&search); |
315 | if (r == 0) { |
316 | toku_ft_cursor_set_prefetching(cursor); |
317 | } |
318 | return r; |
319 | } |
320 | |
321 | static int ft_cursor_search_eq_k_x_getf(uint32_t keylen, const void *key, |
322 | uint32_t vallen, const void *val, |
323 | void *v, bool lock_only) { |
324 | struct ft_cursor_search_struct *CAST_FROM_VOIDP(bcss, v); |
325 | int r; |
326 | if (key==NULL) { |
327 | r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, false); |
328 | } else { |
329 | FT_CURSOR cursor = bcss->cursor; |
330 | DBT newkey; |
331 | toku_fill_dbt(&newkey, key, keylen); |
332 | if (compare_k_x(cursor->ft_handle, bcss->search->k, &newkey) == 0) { |
333 | r = bcss->getf(keylen, key, vallen, val, bcss->getf_v, lock_only); |
334 | } else { |
335 | r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only); |
336 | if (r==0) r = TOKUDB_FOUND_BUT_REJECTED; |
337 | } |
338 | } |
339 | return r; |
340 | } |
341 | |
342 | /* search for the kv pair that matches the search object and is equal to k */ |
343 | static int ft_cursor_search_eq_k_x(FT_CURSOR cursor, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { |
344 | struct ft_cursor_search_struct bcss = {getf, getf_v, cursor, search}; |
345 | int r = toku_ft_search(cursor->ft_handle, search, ft_cursor_search_eq_k_x_getf, &bcss, cursor, false); |
346 | return r; |
347 | } |
348 | |
349 | static int ft_cursor_compare_prev(const ft_search &search, const DBT *x) { |
350 | FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); |
351 | return compare_k_x(ft_handle, search.k, x) > 0; /* return max xy: kv > xy */ |
352 | } |
353 | |
354 | int toku_ft_cursor_prev(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { |
355 | cursor->direction = -1; |
356 | ft_search search; |
357 | ft_search_init(&search, ft_cursor_compare_prev, FT_SEARCH_RIGHT, &cursor->key, nullptr, cursor->ft_handle); |
358 | int r = ft_cursor_search(cursor, &search, getf, getf_v, true); |
359 | ft_search_finish(&search); |
360 | return r; |
361 | } |
362 | |
363 | int toku_ft_cursor_compare_set_range(const ft_search &search, const DBT *x) { |
364 | FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); |
365 | return compare_k_x(ft_handle, search.k, x) <= 0; /* return kv <= xy */ |
366 | } |
367 | |
368 | int toku_ft_cursor_set(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { |
369 | cursor->direction = 0; |
370 | ft_search search; |
371 | ft_search_init(&search, toku_ft_cursor_compare_set_range, FT_SEARCH_LEFT, key, nullptr, cursor->ft_handle); |
372 | int r = ft_cursor_search_eq_k_x(cursor, &search, getf, getf_v); |
373 | ft_search_finish(&search); |
374 | return r; |
375 | } |
376 | |
377 | int toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { |
378 | cursor->direction = 0; |
379 | ft_search search; |
380 | ft_search_init(&search, toku_ft_cursor_compare_set_range, FT_SEARCH_LEFT, key, key_bound, cursor->ft_handle); |
381 | int r = ft_cursor_search(cursor, &search, getf, getf_v, false); |
382 | ft_search_finish(&search); |
383 | return r; |
384 | } |
385 | |
386 | static int ft_cursor_compare_set_range_reverse(const ft_search &search, const DBT *x) { |
387 | FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); |
388 | return compare_k_x(ft_handle, search.k, x) >= 0; /* return kv >= xy */ |
389 | } |
390 | |
391 | int toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { |
392 | cursor->direction = 0; |
393 | ft_search search; |
394 | ft_search_init(&search, ft_cursor_compare_set_range_reverse, FT_SEARCH_RIGHT, key, nullptr, cursor->ft_handle); |
395 | int r = ft_cursor_search(cursor, &search, getf, getf_v, false); |
396 | ft_search_finish(&search); |
397 | return r; |
398 | } |
399 | |
400 | //TODO: When tests have been rewritten, get rid of this function. |
401 | //Only used by tests. |
402 | int toku_ft_cursor_get (FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags) { |
403 | int op = get_flags & DB_OPFLAGS_MASK; |
404 | if (get_flags & ~DB_OPFLAGS_MASK) |
405 | return EINVAL; |
406 | |
407 | switch (op) { |
408 | case DB_CURRENT: |
409 | case DB_CURRENT_BINDING: |
410 | return toku_ft_cursor_current(cursor, op, getf, getf_v); |
411 | case DB_FIRST: |
412 | return toku_ft_cursor_first(cursor, getf, getf_v); |
413 | case DB_LAST: |
414 | return toku_ft_cursor_last(cursor, getf, getf_v); |
415 | case DB_NEXT: |
416 | if (toku_ft_cursor_not_set(cursor)) { |
417 | return toku_ft_cursor_first(cursor, getf, getf_v); |
418 | } else { |
419 | return toku_ft_cursor_next(cursor, getf, getf_v); |
420 | } |
421 | case DB_PREV: |
422 | if (toku_ft_cursor_not_set(cursor)) { |
423 | return toku_ft_cursor_last(cursor, getf, getf_v); |
424 | } else { |
425 | return toku_ft_cursor_prev(cursor, getf, getf_v); |
426 | } |
427 | case DB_SET: |
428 | return toku_ft_cursor_set(cursor, key, getf, getf_v); |
429 | case DB_SET_RANGE: |
430 | return toku_ft_cursor_set_range(cursor, key, nullptr, getf, getf_v); |
431 | default: ;// Fall through |
432 | } |
433 | return EINVAL; |
434 | } |
435 | |
436 | void toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval) { |
437 | *pkey = &cursor->key; |
438 | *pval = &cursor->val; |
439 | } |
440 | |
441 | bool toku_ft_cursor_uninitialized(FT_CURSOR c) { |
442 | return toku_ft_cursor_not_set(c); |
443 | } |
444 | |
445 | int toku_ft_lookup(FT_HANDLE ft_handle, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { |
446 | FT_CURSOR cursor; |
447 | int r = toku_ft_cursor(ft_handle, &cursor, NULL, false, false); |
448 | if (r != 0) { |
449 | return r; |
450 | } |
451 | |
452 | r = toku_ft_cursor_set(cursor, k, getf, getf_v); |
453 | |
454 | toku_ft_cursor_close(cursor); |
455 | return r; |
456 | } |
457 | |