| 1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
| 2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
| 3 | #ident "$Id$" |
| 4 | /*====== |
| 5 | This file is part of PerconaFT. |
| 6 | |
| 7 | |
| 8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
| 9 | |
| 10 | PerconaFT is free software: you can redistribute it and/or modify |
| 11 | it under the terms of the GNU General Public License, version 2, |
| 12 | as published by the Free Software Foundation. |
| 13 | |
| 14 | PerconaFT is distributed in the hope that it will be useful, |
| 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 17 | GNU General Public License for more details. |
| 18 | |
| 19 | You should have received a copy of the GNU General Public License |
| 20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 21 | |
| 22 | ---------------------------------------- |
| 23 | |
| 24 | PerconaFT is free software: you can redistribute it and/or modify |
| 25 | it under the terms of the GNU Affero General Public License, version 3, |
| 26 | as published by the Free Software Foundation. |
| 27 | |
| 28 | PerconaFT is distributed in the hope that it will be useful, |
| 29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 31 | GNU Affero General Public License for more details. |
| 32 | |
| 33 | You should have received a copy of the GNU Affero General Public License |
| 34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 35 | ======= */ |
| 36 | |
| 37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
| 38 | |
| 39 | #include "portability/memory.h" |
| 40 | #include "portability/toku_portability.h" |
| 41 | |
| 42 | #include "ft/serialize/block_table.h" |
| 43 | #include "ft/ft-internal.h" |
| 44 | #include "ft/serialize/ft_node-serialize.h" |
| 45 | #include "ft/txn/rollback.h" |
| 46 | #include "ft/txn/rollback-ct-callbacks.h" |
| 47 | |
| 48 | #include "util/memarena.h" |
| 49 | |
| 50 | // Address used as a sentinel. Otherwise unused. |
| 51 | static struct serialized_rollback_log_node cloned_rollback; |
| 52 | |
| 53 | // Cleanup the rollback memory |
| 54 | static void |
| 55 | rollback_log_destroy(ROLLBACK_LOG_NODE log) { |
| 56 | make_rollback_log_empty(log); |
| 57 | toku_free(log); |
| 58 | } |
| 59 | |
| 60 | // flush an ununused log to disk, by allocating a size 0 blocknum in |
| 61 | // the blocktable |
| 62 | static void toku_rollback_flush_unused_log(ROLLBACK_LOG_NODE log, |
| 63 | BLOCKNUM logname, |
| 64 | int fd, |
| 65 | FT ft, |
| 66 | bool write_me, |
| 67 | bool keep_me, |
| 68 | bool for_checkpoint, |
| 69 | bool is_clone) { |
| 70 | if (write_me) { |
| 71 | DISKOFF offset; |
| 72 | ft->blocktable.realloc_on_disk( |
| 73 | logname, 0, &offset, ft, fd, for_checkpoint); |
| 74 | } |
| 75 | if (!keep_me && !is_clone) { |
| 76 | toku_free(log); |
| 77 | } |
| 78 | } |
| 79 | |
| 80 | // flush a used log to disk by serializing and writing the node out |
| 81 | static void |
| 82 | toku_rollback_flush_used_log ( |
| 83 | ROLLBACK_LOG_NODE log, |
| 84 | SERIALIZED_ROLLBACK_LOG_NODE serialized, |
| 85 | int fd, |
| 86 | FT ft, |
| 87 | bool write_me, |
| 88 | bool keep_me, |
| 89 | bool for_checkpoint, |
| 90 | bool is_clone |
| 91 | ) |
| 92 | { |
| 93 | |
| 94 | if (write_me) { |
| 95 | int r = toku_serialize_rollback_log_to(fd, log, serialized, is_clone, ft, for_checkpoint); |
| 96 | assert(r == 0); |
| 97 | } |
| 98 | if (!keep_me) { |
| 99 | if (is_clone) { |
| 100 | toku_serialized_rollback_log_destroy(serialized); |
| 101 | } |
| 102 | else { |
| 103 | rollback_log_destroy(log); |
| 104 | } |
| 105 | } |
| 106 | } |
| 107 | |
| 108 | // Write something out. Keep trying even if partial writes occur. |
| 109 | // On error: Return negative with errno set. |
| 110 | // On success return nbytes. |
| 111 | void toku_rollback_flush_callback ( |
| 112 | CACHEFILE UU(cachefile), |
| 113 | int fd, |
| 114 | BLOCKNUM logname, |
| 115 | void *rollback_v, |
| 116 | void** UU(disk_data), |
| 117 | void *, |
| 118 | PAIR_ATTR size, |
| 119 | PAIR_ATTR* new_size, |
| 120 | bool write_me, |
| 121 | bool keep_me, |
| 122 | bool for_checkpoint, |
| 123 | bool is_clone |
| 124 | ) |
| 125 | { |
| 126 | ROLLBACK_LOG_NODE log = nullptr; |
| 127 | SERIALIZED_ROLLBACK_LOG_NODE serialized = nullptr; |
| 128 | bool is_unused = false; |
| 129 | if (is_clone) { |
| 130 | is_unused = (rollback_v == &cloned_rollback); |
| 131 | CAST_FROM_VOIDP(serialized, rollback_v); |
| 132 | } |
| 133 | else { |
| 134 | CAST_FROM_VOIDP(log, rollback_v); |
| 135 | is_unused = rollback_log_is_unused(log); |
| 136 | } |
| 137 | *new_size = size; |
| 138 | FT ft; |
| 139 | CAST_FROM_VOIDP(ft, extraargs); |
| 140 | if (is_unused) { |
| 141 | toku_rollback_flush_unused_log( |
| 142 | log, |
| 143 | logname, |
| 144 | fd, |
| 145 | ft, |
| 146 | write_me, |
| 147 | keep_me, |
| 148 | for_checkpoint, |
| 149 | is_clone |
| 150 | ); |
| 151 | } |
| 152 | else { |
| 153 | toku_rollback_flush_used_log( |
| 154 | log, |
| 155 | serialized, |
| 156 | fd, |
| 157 | ft, |
| 158 | write_me, |
| 159 | keep_me, |
| 160 | for_checkpoint, |
| 161 | is_clone |
| 162 | ); |
| 163 | } |
| 164 | } |
| 165 | |
| 166 | int toku_rollback_fetch_callback (CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM logname, uint32_t fullhash UU(), |
| 167 | void **rollback_pv, void** UU(disk_data), PAIR_ATTR *sizep, int * UU(dirtyp), void *) { |
| 168 | int r; |
| 169 | FT CAST_FROM_VOIDP(h, extraargs); |
| 170 | assert(h->cf == cachefile); |
| 171 | ROLLBACK_LOG_NODE *result = (ROLLBACK_LOG_NODE*)rollback_pv; |
| 172 | r = toku_deserialize_rollback_log_from(fd, logname, result, h); |
| 173 | if (r==0) { |
| 174 | (*result)->ct_pair = p; |
| 175 | *sizep = rollback_memory_size(*result); |
| 176 | } |
| 177 | return r; |
| 178 | } |
| 179 | |
| 180 | void toku_rollback_pe_est_callback( |
| 181 | void* rollback_v, |
| 182 | void* UU(disk_data), |
| 183 | long* bytes_freed_estimate, |
| 184 | enum partial_eviction_cost *cost, |
| 185 | void* UU() |
| 186 | ) |
| 187 | { |
| 188 | assert(rollback_v != NULL); |
| 189 | *bytes_freed_estimate = 0; |
| 190 | *cost = PE_CHEAP; |
| 191 | } |
| 192 | |
| 193 | // callback for partially evicting a cachetable entry |
| 194 | int toku_rollback_pe_callback ( |
| 195 | void *rollback_v, |
| 196 | PAIR_ATTR old_attr, |
| 197 | void* UU(), |
| 198 | void (*finalize)(PAIR_ATTR new_attr, void * ), |
| 199 | void * |
| 200 | ) |
| 201 | { |
| 202 | assert(rollback_v != NULL); |
| 203 | finalize(old_attr, finalize_extra); |
| 204 | return 0; |
| 205 | } |
| 206 | |
| 207 | // partial fetch is never required for a rollback log node |
| 208 | bool toku_rollback_pf_req_callback(void* UU(ftnode_pv), void* UU()) { |
| 209 | return false; |
| 210 | } |
| 211 | |
| 212 | // a rollback node should never be partial fetched, |
| 213 | // because we always say it is not required. |
| 214 | // (pf req callback always returns false) |
| 215 | int toku_rollback_pf_callback(void* UU(ftnode_pv), void* UU(disk_data), void* UU(), int UU(fd), PAIR_ATTR* UU(sizep)) { |
| 216 | assert(false); |
| 217 | return 0; |
| 218 | } |
| 219 | |
| 220 | // the cleaner thread should never choose a rollback node for cleaning |
| 221 | int toku_rollback_cleaner_callback ( |
| 222 | void* UU(ftnode_pv), |
| 223 | BLOCKNUM UU(blocknum), |
| 224 | uint32_t UU(fullhash), |
| 225 | void* UU() |
| 226 | ) |
| 227 | { |
| 228 | assert(false); |
| 229 | return 0; |
| 230 | } |
| 231 | |
| 232 | void toku_rollback_clone_callback( |
| 233 | void* value_data, |
| 234 | void** cloned_value_data, |
| 235 | long* clone_size, |
| 236 | PAIR_ATTR* new_attr, |
| 237 | bool UU(for_checkpoint), |
| 238 | void* UU() |
| 239 | ) |
| 240 | { |
| 241 | ROLLBACK_LOG_NODE CAST_FROM_VOIDP(log, value_data); |
| 242 | SERIALIZED_ROLLBACK_LOG_NODE serialized = nullptr; |
| 243 | if (!rollback_log_is_unused(log)) { |
| 244 | XMALLOC(serialized); |
| 245 | toku_serialize_rollback_log_to_memory_uncompressed(log, serialized); |
| 246 | *cloned_value_data = serialized; |
| 247 | *clone_size = sizeof(struct serialized_rollback_log_node) + serialized->len; |
| 248 | } |
| 249 | else { |
| 250 | *cloned_value_data = &cloned_rollback; |
| 251 | *clone_size = sizeof(cloned_rollback); |
| 252 | } |
| 253 | // clear the dirty bit, because the node has been cloned |
| 254 | log->dirty = 0; |
| 255 | new_attr->is_valid = false; |
| 256 | } |
| 257 | |
| 258 | |