| 1 | /* |
| 2 | Portions Copyright (c) 2015-Present, Facebook, Inc. |
| 3 | Portions Copyright (c) 2012, Monty Program Ab |
| 4 | |
| 5 | This program is free software; you can redistribute it and/or modify |
| 6 | it under the terms of the GNU General Public License as published by |
| 7 | the Free Software Foundation; version 2 of the License. |
| 8 | |
| 9 | This program is distributed in the hope that it will be useful, |
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | GNU General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU General Public License |
| 15 | along with this program; if not, write to the Free Software |
| 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ |
| 17 | |
| 18 | #include <my_global.h> |
| 19 | |
| 20 | #include "rdb_mariadb_port.h" |
| 21 | /* This C++ file's header file */ |
| 22 | #include "./rdb_perf_context.h" |
| 23 | |
| 24 | /* C++ system header files */ |
| 25 | #include <string> |
| 26 | |
| 27 | /* RocksDB header files */ |
| 28 | #include "rocksdb/iostats_context.h" |
| 29 | #include "rocksdb/perf_context.h" |
| 30 | |
| 31 | /* MyRocks header files */ |
| 32 | #include "./ha_rocksdb_proto.h" |
| 33 | |
| 34 | namespace myrocks { |
| 35 | |
| 36 | // To add a new metric: |
| 37 | // 1. Update the PC enum in rdb_perf_context.h |
| 38 | // 2. Update sections (A), (B), and (C) below |
| 39 | // 3. Update perf_context.test and show_engine.test |
| 40 | |
| 41 | std::string rdb_pc_stat_types[] = { |
| 42 | // (A) These should be in the same order as the PC enum |
| 43 | "USER_KEY_COMPARISON_COUNT" , |
| 44 | "BLOCK_CACHE_HIT_COUNT" , |
| 45 | "BLOCK_READ_COUNT" , |
| 46 | "BLOCK_READ_BYTE" , |
| 47 | "BLOCK_READ_TIME" , |
| 48 | "BLOCK_CHECKSUM_TIME" , |
| 49 | "BLOCK_DECOMPRESS_TIME" , |
| 50 | "GET_READ_BYTES" , |
| 51 | "MULTIGET_READ_BYTES" , |
| 52 | "ITER_READ_BYTES" , |
| 53 | "INTERNAL_KEY_SKIPPED_COUNT" , |
| 54 | "INTERNAL_DELETE_SKIPPED_COUNT" , |
| 55 | "INTERNAL_RECENT_SKIPPED_COUNT" , |
| 56 | "INTERNAL_MERGE_COUNT" , |
| 57 | "GET_SNAPSHOT_TIME" , |
| 58 | "GET_FROM_MEMTABLE_TIME" , |
| 59 | "GET_FROM_MEMTABLE_COUNT" , |
| 60 | "GET_POST_PROCESS_TIME" , |
| 61 | "GET_FROM_OUTPUT_FILES_TIME" , |
| 62 | "SEEK_ON_MEMTABLE_TIME" , |
| 63 | "SEEK_ON_MEMTABLE_COUNT" , |
| 64 | "NEXT_ON_MEMTABLE_COUNT" , |
| 65 | "PREV_ON_MEMTABLE_COUNT" , |
| 66 | "SEEK_CHILD_SEEK_TIME" , |
| 67 | "SEEK_CHILD_SEEK_COUNT" , |
| 68 | "SEEK_MIN_HEAP_TIME" , |
| 69 | "SEEK_MAX_HEAP_TIME" , |
| 70 | "SEEK_INTERNAL_SEEK_TIME" , |
| 71 | "FIND_NEXT_USER_ENTRY_TIME" , |
| 72 | "WRITE_WAL_TIME" , |
| 73 | "WRITE_MEMTABLE_TIME" , |
| 74 | "WRITE_DELAY_TIME" , |
| 75 | "WRITE_PRE_AND_POST_PROCESS_TIME" , |
| 76 | "DB_MUTEX_LOCK_NANOS" , |
| 77 | "DB_CONDITION_WAIT_NANOS" , |
| 78 | "MERGE_OPERATOR_TIME_NANOS" , |
| 79 | "READ_INDEX_BLOCK_NANOS" , |
| 80 | "READ_FILTER_BLOCK_NANOS" , |
| 81 | "NEW_TABLE_BLOCK_ITER_NANOS" , |
| 82 | "NEW_TABLE_ITERATOR_NANOS" , |
| 83 | "BLOCK_SEEK_NANOS" , |
| 84 | "FIND_TABLE_NANOS" , |
| 85 | "BLOOM_MEMTABLE_HIT_COUNT" , |
| 86 | "BLOOM_MEMTABLE_MISS_COUNT" , |
| 87 | "BLOOM_SST_HIT_COUNT" , |
| 88 | "BLOOM_SST_MISS_COUNT" , |
| 89 | "KEY_LOCK_WAIT_TIME" , |
| 90 | "KEY_LOCK_WAIT_COUNT" , |
| 91 | "IO_THREAD_POOL_ID" , |
| 92 | "IO_BYTES_WRITTEN" , |
| 93 | "IO_BYTES_READ" , |
| 94 | "IO_OPEN_NANOS" , |
| 95 | "IO_ALLOCATE_NANOS" , |
| 96 | "IO_WRITE_NANOS" , |
| 97 | "IO_READ_NANOS" , |
| 98 | "IO_RANGE_SYNC_NANOS" , |
| 99 | "IO_LOGGER_NANOS" }; |
| 100 | |
| 101 | #define IO_PERF_RECORD(_field_) \ |
| 102 | do { \ |
| 103 | if (rocksdb::get_perf_context()->_field_ > 0) \ |
| 104 | counters->m_value[idx] += rocksdb::get_perf_context()->_field_; \ |
| 105 | idx++; \ |
| 106 | } while (0) |
| 107 | #define IO_STAT_RECORD(_field_) \ |
| 108 | do { \ |
| 109 | if (rocksdb::get_iostats_context()->_field_ > 0) \ |
| 110 | counters->m_value[idx] += rocksdb::get_iostats_context()->_field_; \ |
| 111 | idx++; \ |
| 112 | } while (0) |
| 113 | |
| 114 | static void harvest_diffs(Rdb_atomic_perf_counters *const counters) { |
| 115 | // (C) These should be in the same order as the PC enum |
| 116 | size_t idx = 0; |
| 117 | IO_PERF_RECORD(user_key_comparison_count); |
| 118 | IO_PERF_RECORD(block_cache_hit_count); |
| 119 | IO_PERF_RECORD(block_read_count); |
| 120 | IO_PERF_RECORD(block_read_byte); |
| 121 | IO_PERF_RECORD(block_read_time); |
| 122 | IO_PERF_RECORD(block_checksum_time); |
| 123 | IO_PERF_RECORD(block_decompress_time); |
| 124 | IO_PERF_RECORD(get_read_bytes); |
| 125 | IO_PERF_RECORD(multiget_read_bytes); |
| 126 | IO_PERF_RECORD(iter_read_bytes); |
| 127 | IO_PERF_RECORD(internal_key_skipped_count); |
| 128 | IO_PERF_RECORD(internal_delete_skipped_count); |
| 129 | IO_PERF_RECORD(internal_recent_skipped_count); |
| 130 | IO_PERF_RECORD(internal_merge_count); |
| 131 | IO_PERF_RECORD(get_snapshot_time); |
| 132 | IO_PERF_RECORD(get_from_memtable_time); |
| 133 | IO_PERF_RECORD(get_from_memtable_count); |
| 134 | IO_PERF_RECORD(get_post_process_time); |
| 135 | IO_PERF_RECORD(get_from_output_files_time); |
| 136 | IO_PERF_RECORD(seek_on_memtable_time); |
| 137 | IO_PERF_RECORD(seek_on_memtable_count); |
| 138 | IO_PERF_RECORD(next_on_memtable_count); |
| 139 | IO_PERF_RECORD(prev_on_memtable_count); |
| 140 | IO_PERF_RECORD(seek_child_seek_time); |
| 141 | IO_PERF_RECORD(seek_child_seek_count); |
| 142 | IO_PERF_RECORD(seek_min_heap_time); |
| 143 | IO_PERF_RECORD(seek_max_heap_time); |
| 144 | IO_PERF_RECORD(seek_internal_seek_time); |
| 145 | IO_PERF_RECORD(find_next_user_entry_time); |
| 146 | IO_PERF_RECORD(write_wal_time); |
| 147 | IO_PERF_RECORD(write_memtable_time); |
| 148 | IO_PERF_RECORD(write_delay_time); |
| 149 | IO_PERF_RECORD(write_pre_and_post_process_time); |
| 150 | IO_PERF_RECORD(db_mutex_lock_nanos); |
| 151 | IO_PERF_RECORD(db_condition_wait_nanos); |
| 152 | IO_PERF_RECORD(merge_operator_time_nanos); |
| 153 | IO_PERF_RECORD(read_index_block_nanos); |
| 154 | IO_PERF_RECORD(read_filter_block_nanos); |
| 155 | IO_PERF_RECORD(new_table_block_iter_nanos); |
| 156 | IO_PERF_RECORD(new_table_iterator_nanos); |
| 157 | IO_PERF_RECORD(block_seek_nanos); |
| 158 | IO_PERF_RECORD(find_table_nanos); |
| 159 | IO_PERF_RECORD(bloom_memtable_hit_count); |
| 160 | IO_PERF_RECORD(bloom_memtable_miss_count); |
| 161 | IO_PERF_RECORD(bloom_sst_hit_count); |
| 162 | IO_PERF_RECORD(bloom_sst_miss_count); |
| 163 | IO_PERF_RECORD(key_lock_wait_time); |
| 164 | IO_PERF_RECORD(key_lock_wait_count); |
| 165 | |
| 166 | IO_STAT_RECORD(thread_pool_id); |
| 167 | IO_STAT_RECORD(bytes_written); |
| 168 | IO_STAT_RECORD(bytes_read); |
| 169 | IO_STAT_RECORD(open_nanos); |
| 170 | IO_STAT_RECORD(allocate_nanos); |
| 171 | IO_STAT_RECORD(write_nanos); |
| 172 | IO_STAT_RECORD(read_nanos); |
| 173 | IO_STAT_RECORD(range_sync_nanos); |
| 174 | IO_STAT_RECORD(logger_nanos); |
| 175 | } |
| 176 | |
| 177 | #undef IO_PERF_DIFF |
| 178 | #undef IO_STAT_DIFF |
| 179 | |
| 180 | static Rdb_atomic_perf_counters rdb_global_perf_counters; |
| 181 | |
| 182 | void rdb_get_global_perf_counters(Rdb_perf_counters *const counters) { |
| 183 | DBUG_ASSERT(counters != nullptr); |
| 184 | |
| 185 | counters->load(rdb_global_perf_counters); |
| 186 | } |
| 187 | |
| 188 | void Rdb_perf_counters::load(const Rdb_atomic_perf_counters &atomic_counters) { |
| 189 | for (int i = 0; i < PC_MAX_IDX; i++) { |
| 190 | m_value[i] = atomic_counters.m_value[i].load(std::memory_order_relaxed); |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | bool Rdb_io_perf::start(const uint32_t perf_context_level) { |
| 195 | const rocksdb::PerfLevel perf_level = |
| 196 | static_cast<rocksdb::PerfLevel>(perf_context_level); |
| 197 | |
| 198 | if (rocksdb::GetPerfLevel() != perf_level) { |
| 199 | rocksdb::SetPerfLevel(perf_level); |
| 200 | } |
| 201 | |
| 202 | if (perf_level == rocksdb::kDisable) { |
| 203 | return false; |
| 204 | } |
| 205 | |
| 206 | rocksdb::get_perf_context()->Reset(); |
| 207 | rocksdb::get_iostats_context()->Reset(); |
| 208 | return true; |
| 209 | } |
| 210 | |
| 211 | void Rdb_io_perf::update_bytes_written(const uint32_t perf_context_level, |
| 212 | ulonglong bytes_written) { |
| 213 | const rocksdb::PerfLevel perf_level = |
| 214 | static_cast<rocksdb::PerfLevel>(perf_context_level); |
| 215 | if (perf_level != rocksdb::kDisable && m_shared_io_perf_write) { |
| 216 | io_write_bytes += bytes_written; |
| 217 | io_write_requests += 1; |
| 218 | } |
| 219 | } |
| 220 | |
| 221 | void Rdb_io_perf::end_and_record(const uint32_t perf_context_level) { |
| 222 | const rocksdb::PerfLevel perf_level = |
| 223 | static_cast<rocksdb::PerfLevel>(perf_context_level); |
| 224 | |
| 225 | if (perf_level == rocksdb::kDisable) { |
| 226 | return; |
| 227 | } |
| 228 | |
| 229 | if (m_atomic_counters) { |
| 230 | harvest_diffs(m_atomic_counters); |
| 231 | } |
| 232 | harvest_diffs(&rdb_global_perf_counters); |
| 233 | |
| 234 | if (m_shared_io_perf_read && |
| 235 | (rocksdb::get_perf_context()->block_read_byte != 0 || |
| 236 | rocksdb::get_perf_context()->block_read_count != 0 || |
| 237 | rocksdb::get_perf_context()->block_read_time != 0)) |
| 238 | { |
| 239 | #ifdef MARIAROCKS_NOT_YET |
| 240 | my_io_perf_t io_perf_read; |
| 241 | |
| 242 | io_perf_read.init(); |
| 243 | io_perf_read.bytes = rocksdb::get_perf_context()->block_read_byte; |
| 244 | io_perf_read.requests = rocksdb::get_perf_context()->block_read_count; |
| 245 | |
| 246 | /* |
| 247 | Rocksdb does not distinguish between I/O service and wait time, so just |
| 248 | use svc time. |
| 249 | */ |
| 250 | io_perf_read.svc_time_max = io_perf_read.svc_time = |
| 251 | rocksdb::get_perf_context()->block_read_time; |
| 252 | |
| 253 | m_shared_io_perf_read->sum(io_perf_read); |
| 254 | m_stats->table_io_perf_read.sum(io_perf_read); |
| 255 | #endif |
| 256 | } |
| 257 | |
| 258 | #ifdef MARIAROCKS_NOT_YET |
| 259 | if (m_shared_io_perf_write && |
| 260 | (io_write_bytes != 0 || io_write_requests != 0)) { |
| 261 | my_io_perf_t io_perf_write; |
| 262 | io_perf_write.init(); |
| 263 | io_perf_write.bytes = io_write_bytes; |
| 264 | io_perf_write.requests = io_write_requests; |
| 265 | m_shared_io_perf_write->sum(io_perf_write); |
| 266 | m_stats->table_io_perf_write.sum(io_perf_write); |
| 267 | io_write_bytes = 0; |
| 268 | io_write_requests = 0; |
| 269 | } |
| 270 | |
| 271 | if (m_stats) { |
| 272 | if (rocksdb::get_perf_context()->internal_key_skipped_count != 0) { |
| 273 | m_stats->key_skipped += |
| 274 | rocksdb::get_perf_context()->internal_key_skipped_count; |
| 275 | } |
| 276 | |
| 277 | if (rocksdb::get_perf_context()->internal_delete_skipped_count != 0) { |
| 278 | m_stats->delete_skipped += |
| 279 | rocksdb::get_perf_context()->internal_delete_skipped_count; |
| 280 | } |
| 281 | } |
| 282 | #endif |
| 283 | } |
| 284 | |
| 285 | } // namespace myrocks |
| 286 | |