| 1 | /* |
| 2 | * scan_manager.c |
| 3 | * |
| 4 | * Copyright (C) 2019 Aerospike, Inc. |
| 5 | * |
| 6 | * Portions may be licensed to Aerospike, Inc. under one or more contributor |
| 7 | * license agreements. |
| 8 | * |
| 9 | * This program is free software: you can redistribute it and/or modify it under |
| 10 | * the terms of the GNU Affero General Public License as published by the Free |
| 11 | * Software Foundation, either version 3 of the License, or (at your option) any |
| 12 | * later version. |
| 13 | * |
| 14 | * This program is distributed in the hope that it will be useful, but WITHOUT |
| 15 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 16 | * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
| 17 | * details. |
| 18 | * |
| 19 | * You should have received a copy of the GNU Affero General Public License |
| 20 | * along with this program. If not, see http://www.gnu.org/licenses/ |
| 21 | */ |
| 22 | |
| 23 | //========================================================== |
| 24 | // Includes. |
| 25 | // |
| 26 | |
| 27 | #include "base/scan_manager.h" |
| 28 | |
| 29 | #include <stdbool.h> |
| 30 | #include <stdint.h> |
| 31 | |
| 32 | #include "aerospike/as_atomic.h" |
| 33 | #include "citrusleaf/cf_clock.h" |
| 34 | #include "citrusleaf/cf_queue.h" |
| 35 | |
| 36 | #include "cf_mutex.h" |
| 37 | #include "fault.h" |
| 38 | |
| 39 | #include "base/cfg.h" |
| 40 | #include "base/monitor.h" |
| 41 | #include "base/scan_job.h" |
| 42 | |
| 43 | #include "warnings.h" |
| 44 | |
| 45 | |
| 46 | //========================================================== |
| 47 | // Typedefs & constants. |
| 48 | // |
| 49 | |
| 50 | typedef struct find_item_s { |
| 51 | uint64_t trid; |
| 52 | as_scan_job* _job; |
| 53 | bool remove_job; |
| 54 | } find_item; |
| 55 | |
| 56 | typedef struct info_item_s { |
| 57 | as_scan_job** p_job; |
| 58 | } info_item; |
| 59 | |
| 60 | |
| 61 | //========================================================== |
| 62 | // Globals. |
| 63 | // |
| 64 | |
| 65 | uint32_t g_n_threads = 0; |
| 66 | |
| 67 | static as_scan_manager g_mgr; |
| 68 | |
| 69 | |
| 70 | //========================================================== |
| 71 | // Forward declarations. |
| 72 | // |
| 73 | |
| 74 | static void evict_finished_jobs(void); |
| 75 | static int abort_cb(void* buf, void* udata); |
| 76 | static int info_cb(void* buf, void* udata); |
| 77 | static as_scan_job* find_any(uint64_t trid); |
| 78 | static as_scan_job* find_active(uint64_t trid); |
| 79 | static as_scan_job* remove_active(uint64_t trid); |
| 80 | static as_scan_job* find_job(cf_queue* jobs, uint64_t trid, bool remove_job); |
| 81 | static int find_cb(void* buf, void* udata); |
| 82 | |
| 83 | |
| 84 | //========================================================== |
| 85 | // Public API. |
| 86 | // |
| 87 | |
| 88 | void |
| 89 | as_scan_manager_init(void) |
| 90 | { |
| 91 | cf_mutex_init(&g_mgr.lock); |
| 92 | |
| 93 | g_mgr.active_jobs = cf_queue_create(sizeof(as_scan_job*), false); |
| 94 | g_mgr.finished_jobs = cf_queue_create(sizeof(as_scan_job*), false); |
| 95 | } |
| 96 | |
| 97 | int |
| 98 | as_scan_manager_start_job(as_scan_job* _job) |
| 99 | { |
| 100 | cf_mutex_lock(&g_mgr.lock); |
| 101 | |
| 102 | if (g_n_threads >= g_config.n_scan_threads_limit) { |
| 103 | cf_warning(AS_SCAN, "at scan threads limit - can't start new scan" ); |
| 104 | cf_mutex_unlock(&g_mgr.lock); |
| 105 | return AS_SCAN_ERR_FORBIDDEN; |
| 106 | } |
| 107 | |
| 108 | // Make sure trid is unique. |
| 109 | if (find_any(_job->trid)) { |
| 110 | cf_warning(AS_SCAN, "job with trid %lu already active" , _job->trid); |
| 111 | cf_mutex_unlock(&g_mgr.lock); |
| 112 | return AS_SCAN_ERR_PARAMETER; |
| 113 | } |
| 114 | |
| 115 | _job->start_us = cf_getus(); |
| 116 | _job->base_us = _job->start_us; |
| 117 | |
| 118 | cf_queue_push(g_mgr.active_jobs, &_job); |
| 119 | |
| 120 | as_scan_job_add_thread(_job); |
| 121 | |
| 122 | cf_mutex_unlock(&g_mgr.lock); |
| 123 | |
| 124 | return 0; |
| 125 | } |
| 126 | |
| 127 | void |
| 128 | as_scan_manager_add_job_thread(as_scan_job* _job) |
| 129 | { |
| 130 | if (_job->n_threads >= _job->ns->n_single_scan_threads) { |
| 131 | return; |
| 132 | } |
| 133 | |
| 134 | cf_mutex_lock(&g_mgr.lock); |
| 135 | |
| 136 | if (g_n_threads < g_config.n_scan_threads_limit) { |
| 137 | as_scan_job_add_thread(_job); |
| 138 | } |
| 139 | |
| 140 | cf_mutex_unlock(&g_mgr.lock); |
| 141 | } |
| 142 | |
| 143 | void |
| 144 | as_scan_manager_add_max_job_threads(as_scan_job* _job) |
| 145 | { |
| 146 | uint32_t single_max = as_load_uint32(&_job->ns->n_single_scan_threads); |
| 147 | |
| 148 | if (_job->n_threads >= single_max) { |
| 149 | return; |
| 150 | } |
| 151 | |
| 152 | uint32_t = single_max - _job->n_threads; |
| 153 | |
| 154 | cf_mutex_lock(&g_mgr.lock); |
| 155 | |
| 156 | uint32_t all_max = as_load_uint32(&g_config.n_scan_threads_limit); |
| 157 | |
| 158 | if (g_n_threads >= all_max) { |
| 159 | cf_mutex_unlock(&g_mgr.lock); |
| 160 | return; |
| 161 | } |
| 162 | |
| 163 | uint32_t = all_max - g_n_threads; |
| 164 | uint32_t = all_extra > single_extra ? single_extra : all_extra; |
| 165 | |
| 166 | for (uint32_t n = 0; n < n_extra; n++) { |
| 167 | as_scan_job_add_thread(_job); |
| 168 | } |
| 169 | |
| 170 | cf_mutex_unlock(&g_mgr.lock); |
| 171 | } |
| 172 | |
| 173 | void |
| 174 | as_scan_manager_finish_job(as_scan_job* _job) |
| 175 | { |
| 176 | cf_mutex_lock(&g_mgr.lock); |
| 177 | |
| 178 | remove_active(_job->trid); |
| 179 | |
| 180 | _job->finish_us = cf_getus(); |
| 181 | cf_queue_push(g_mgr.finished_jobs, &_job); |
| 182 | evict_finished_jobs(); |
| 183 | |
| 184 | cf_mutex_unlock(&g_mgr.lock); |
| 185 | } |
| 186 | |
| 187 | void |
| 188 | as_scan_manager_abandon_job(as_scan_job* _job, int reason) |
| 189 | { |
| 190 | _job->abandoned = reason; |
| 191 | } |
| 192 | |
| 193 | bool |
| 194 | as_scan_manager_abort_job(uint64_t trid) |
| 195 | { |
| 196 | cf_mutex_lock(&g_mgr.lock); |
| 197 | |
| 198 | as_scan_job* _job = find_active(trid); |
| 199 | |
| 200 | cf_mutex_unlock(&g_mgr.lock); |
| 201 | |
| 202 | if (_job == NULL) { |
| 203 | return false; |
| 204 | } |
| 205 | |
| 206 | _job->abandoned = AS_SCAN_ERR_USER_ABORT; |
| 207 | |
| 208 | return true; |
| 209 | } |
| 210 | |
| 211 | int |
| 212 | as_scan_manager_abort_all_jobs(void) |
| 213 | { |
| 214 | cf_mutex_lock(&g_mgr.lock); |
| 215 | |
| 216 | int n_jobs = cf_queue_sz(g_mgr.active_jobs); |
| 217 | |
| 218 | if (n_jobs > 0) { |
| 219 | cf_queue_reduce(g_mgr.active_jobs, abort_cb, NULL); |
| 220 | } |
| 221 | |
| 222 | cf_mutex_unlock(&g_mgr.lock); |
| 223 | |
| 224 | return n_jobs; |
| 225 | } |
| 226 | |
| 227 | void |
| 228 | as_scan_manager_limit_finished_jobs(void) |
| 229 | { |
| 230 | cf_mutex_lock(&g_mgr.lock); |
| 231 | |
| 232 | evict_finished_jobs(); |
| 233 | |
| 234 | cf_mutex_unlock(&g_mgr.lock); |
| 235 | } |
| 236 | |
| 237 | as_mon_jobstat* |
| 238 | as_scan_manager_get_job_info(uint64_t trid) |
| 239 | { |
| 240 | cf_mutex_lock(&g_mgr.lock); |
| 241 | |
| 242 | as_scan_job* _job = find_any(trid); |
| 243 | |
| 244 | if (_job == NULL) { |
| 245 | cf_mutex_unlock(&g_mgr.lock); |
| 246 | return NULL; |
| 247 | } |
| 248 | |
| 249 | as_mon_jobstat* stat = cf_malloc(sizeof(as_mon_jobstat)); |
| 250 | |
| 251 | memset(stat, 0, sizeof(as_mon_jobstat)); |
| 252 | as_scan_job_info(_job, stat); |
| 253 | |
| 254 | cf_mutex_unlock(&g_mgr.lock); |
| 255 | |
| 256 | return stat; // caller must free this |
| 257 | } |
| 258 | |
| 259 | as_mon_jobstat* |
| 260 | as_scan_manager_get_info(int* size) |
| 261 | { |
| 262 | *size = 0; |
| 263 | |
| 264 | cf_mutex_lock(&g_mgr.lock); |
| 265 | |
| 266 | int n_jobs = cf_queue_sz(g_mgr.active_jobs) + |
| 267 | cf_queue_sz(g_mgr.finished_jobs); |
| 268 | |
| 269 | if (n_jobs == 0) { |
| 270 | cf_mutex_unlock(&g_mgr.lock); |
| 271 | return NULL; |
| 272 | } |
| 273 | |
| 274 | as_scan_job* _jobs[n_jobs]; |
| 275 | info_item item = { _jobs }; |
| 276 | |
| 277 | cf_queue_reduce_reverse(g_mgr.active_jobs, info_cb, &item); |
| 278 | cf_queue_reduce_reverse(g_mgr.finished_jobs, info_cb, &item); |
| 279 | |
| 280 | size_t stats_size = sizeof(as_mon_jobstat) * (uint32_t)n_jobs; |
| 281 | as_mon_jobstat* stats = cf_malloc(stats_size); |
| 282 | |
| 283 | memset(stats, 0, stats_size); |
| 284 | |
| 285 | for (int i = 0; i < n_jobs; i++) { |
| 286 | as_scan_job_info(_jobs[i], &stats[i]); |
| 287 | } |
| 288 | |
| 289 | cf_mutex_unlock(&g_mgr.lock); |
| 290 | |
| 291 | *size = n_jobs; |
| 292 | |
| 293 | return stats; // caller must free this |
| 294 | } |
| 295 | |
| 296 | int |
| 297 | as_scan_manager_get_active_job_count(void) |
| 298 | { |
| 299 | cf_mutex_lock(&g_mgr.lock); |
| 300 | |
| 301 | int n_jobs = cf_queue_sz(g_mgr.active_jobs); |
| 302 | |
| 303 | cf_mutex_unlock(&g_mgr.lock); |
| 304 | |
| 305 | return n_jobs; |
| 306 | } |
| 307 | |
| 308 | |
| 309 | //========================================================== |
| 310 | // Local helpers. |
| 311 | // |
| 312 | |
| 313 | static void |
| 314 | evict_finished_jobs(void) |
| 315 | { |
| 316 | int max_allowed = (int)as_load_uint32(&g_config.scan_max_done); |
| 317 | |
| 318 | while (cf_queue_sz(g_mgr.finished_jobs) > max_allowed) { |
| 319 | as_scan_job* _job; |
| 320 | |
| 321 | cf_queue_pop(g_mgr.finished_jobs, &_job, 0); |
| 322 | as_scan_job_destroy(_job); |
| 323 | } |
| 324 | } |
| 325 | |
| 326 | static int |
| 327 | abort_cb(void* buf, void* udata) |
| 328 | { |
| 329 | (void)udata; |
| 330 | |
| 331 | as_scan_job* _job = *(as_scan_job**)buf; |
| 332 | |
| 333 | _job->abandoned = AS_SCAN_ERR_USER_ABORT; |
| 334 | |
| 335 | return 0; |
| 336 | } |
| 337 | |
| 338 | static int |
| 339 | info_cb(void* buf, void* udata) |
| 340 | { |
| 341 | as_scan_job* _job = *(as_scan_job**)buf; |
| 342 | info_item* item = (info_item*)udata; |
| 343 | |
| 344 | *item->p_job++ = _job; |
| 345 | |
| 346 | return 0; |
| 347 | } |
| 348 | |
| 349 | static as_scan_job* |
| 350 | find_any(uint64_t trid) |
| 351 | { |
| 352 | as_scan_job* _job = find_job(g_mgr.active_jobs, trid, false); |
| 353 | |
| 354 | if (_job == NULL) { |
| 355 | _job = find_job(g_mgr.finished_jobs, trid, false); |
| 356 | } |
| 357 | |
| 358 | return _job; |
| 359 | } |
| 360 | |
| 361 | static as_scan_job* |
| 362 | find_active(uint64_t trid) |
| 363 | { |
| 364 | return find_job(g_mgr.active_jobs, trid, false); |
| 365 | } |
| 366 | |
| 367 | static as_scan_job* |
| 368 | remove_active(uint64_t trid) |
| 369 | { |
| 370 | return find_job(g_mgr.active_jobs, trid, true); |
| 371 | } |
| 372 | |
| 373 | static as_scan_job* |
| 374 | find_job(cf_queue* jobs, uint64_t trid, bool remove_job) |
| 375 | { |
| 376 | find_item item = { trid, NULL, remove_job }; |
| 377 | |
| 378 | cf_queue_reduce(jobs, find_cb, &item); |
| 379 | |
| 380 | return item._job; |
| 381 | } |
| 382 | |
| 383 | static int |
| 384 | find_cb(void* buf, void* udata) |
| 385 | { |
| 386 | as_scan_job* _job = *(as_scan_job**)buf; |
| 387 | find_item* match = (find_item*)udata; |
| 388 | |
| 389 | if (match->trid == _job->trid) { |
| 390 | match->_job = _job; |
| 391 | return match->remove_job ? -2 : -1; |
| 392 | } |
| 393 | |
| 394 | return 0; |
| 395 | } |
| 396 | |