| 1 | /* |
| 2 | * linear_hist.c |
| 3 | * |
| 4 | * Copyright (C) 2016 Aerospike, Inc. |
| 5 | * |
| 6 | * Portions may be licensed to Aerospike, Inc. under one or more contributor |
| 7 | * license agreements. |
| 8 | * |
| 9 | * This program is free software: you can redistribute it and/or modify it under |
| 10 | * the terms of the GNU Affero General Public License as published by the Free |
| 11 | * Software Foundation, either version 3 of the License, or (at your option) any |
| 12 | * later version. |
| 13 | * |
| 14 | * This program is distributed in the hope that it will be useful, but WITHOUT |
| 15 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 16 | * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
| 17 | * details. |
| 18 | * |
| 19 | * You should have received a copy of the GNU Affero General Public License |
| 20 | * along with this program. If not, see http://www.gnu.org/licenses/ |
| 21 | */ |
| 22 | |
| 23 | //========================================================== |
| 24 | // Includes. |
| 25 | // |
| 26 | |
| 27 | #include "linear_hist.h" |
| 28 | |
| 29 | #include <stddef.h> |
| 30 | #include <stdint.h> |
| 31 | #include <stdio.h> |
| 32 | #include <string.h> |
| 33 | |
| 34 | #include "citrusleaf/alloc.h" |
| 35 | |
| 36 | #include "cf_mutex.h" |
| 37 | #include "dynbuf.h" |
| 38 | #include "fault.h" |
| 39 | |
| 40 | |
| 41 | //========================================================== |
| 42 | // Typedefs & constants. |
| 43 | // |
| 44 | |
| 45 | #define LINEAR_HIST_NAME_SIZE 512 |
| 46 | |
| 47 | #define LINEAR_HIST_TAG_SECONDS "seconds" |
| 48 | #define LINEAR_HIST_TAG_SIZE "bytes" |
| 49 | |
| 50 | struct linear_hist_s { |
| 51 | char name[LINEAR_HIST_NAME_SIZE]; |
| 52 | const char* scale_tag; |
| 53 | |
| 54 | cf_mutex info_lock; |
| 55 | char* info_snapshot; |
| 56 | |
| 57 | uint32_t num_buckets; |
| 58 | uint64_t *counts; |
| 59 | |
| 60 | uint32_t start; |
| 61 | uint32_t bucket_width; |
| 62 | }; |
| 63 | |
| 64 | // e.g. units=bytes:hist-width=131072:bucket-width=1024:buckets= |
| 65 | #define PREFIX_SIZE (5 + 1 + 5 + 1 + 10 + 1 + 10 + 1 + 12 + 1 + 10 + 1 + 7 + 1) |
| 66 | |
| 67 | |
| 68 | //========================================================== |
| 69 | // Public API. |
| 70 | // |
| 71 | |
| 72 | //------------------------------------------------ |
| 73 | // Create a linear histogram. |
| 74 | // |
| 75 | linear_hist* |
| 76 | linear_hist_create(const char *name, linear_hist_scale scale, uint32_t start, |
| 77 | uint32_t max_offset, uint32_t num_buckets) |
| 78 | { |
| 79 | cf_assert(name, AS_INFO, "null histogram name" ); |
| 80 | cf_assert(strlen(name) < LINEAR_HIST_NAME_SIZE, AS_INFO, |
| 81 | "bad histogram name %s" , name); |
| 82 | cf_assert(scale >= 0 && scale < LINEAR_HIST_SCALE_MAX_PLUS_1, AS_INFO, |
| 83 | "bad histogram scale %d" , scale); |
| 84 | cf_assert(start + max_offset >= start, AS_INFO, "max_offset overflow" ); |
| 85 | cf_assert(num_buckets != 0, AS_INFO, "num_buckets 0" ); |
| 86 | |
| 87 | linear_hist *h = cf_malloc(sizeof(linear_hist)); |
| 88 | |
| 89 | strcpy(h->name, name); |
| 90 | |
| 91 | switch (scale) { |
| 92 | case LINEAR_HIST_SECONDS: |
| 93 | h->scale_tag = LINEAR_HIST_TAG_SECONDS; |
| 94 | break; |
| 95 | case LINEAR_HIST_SIZE: |
| 96 | h->scale_tag = LINEAR_HIST_TAG_SIZE; |
| 97 | break; |
| 98 | default: |
| 99 | cf_crash(AS_INFO, "%s: unrecognized histogram scale %d" , name, scale); |
| 100 | break; |
| 101 | } |
| 102 | |
| 103 | cf_mutex_init(&h->info_lock); |
| 104 | h->info_snapshot = NULL; |
| 105 | |
| 106 | h->num_buckets = num_buckets; |
| 107 | h->counts = cf_malloc(sizeof(uint64_t) * num_buckets); |
| 108 | |
| 109 | linear_hist_clear(h, start, max_offset); |
| 110 | |
| 111 | return h; |
| 112 | } |
| 113 | |
| 114 | //------------------------------------------------ |
| 115 | // Destroy a linear histogram. |
| 116 | // |
| 117 | void |
| 118 | linear_hist_destroy(linear_hist *h) |
| 119 | { |
| 120 | cf_mutex_destroy(&h->info_lock); |
| 121 | cf_free(h->counts); |
| 122 | cf_free(h); |
| 123 | } |
| 124 | |
| 125 | //------------------------------------------------ |
| 126 | // Clear, re-scale/re-size a linear histogram. |
| 127 | // |
| 128 | void |
| 129 | linear_hist_reset(linear_hist *h, uint32_t start, uint32_t max_offset, |
| 130 | uint32_t num_buckets) |
| 131 | { |
| 132 | cf_assert(num_buckets != 0, AS_INFO, "num_buckets 0" ); |
| 133 | |
| 134 | if (h->num_buckets == num_buckets) { |
| 135 | linear_hist_clear(h, start, max_offset); |
| 136 | return; |
| 137 | } |
| 138 | |
| 139 | h->num_buckets = num_buckets; |
| 140 | h->counts = cf_realloc(h->counts, sizeof(uint64_t) * num_buckets); |
| 141 | linear_hist_clear(h, start, max_offset); |
| 142 | } |
| 143 | |
| 144 | //------------------------------------------------ |
| 145 | // Clear and (re-)scale a linear histogram. |
| 146 | // |
| 147 | void |
| 148 | linear_hist_clear(linear_hist *h, uint32_t start, uint32_t max_offset) |
| 149 | { |
| 150 | h->start = start; |
| 151 | h->bucket_width = (max_offset + (h->num_buckets - 1)) / h->num_buckets; |
| 152 | |
| 153 | // Only needed to protect against max_offset 0. |
| 154 | if (h->bucket_width == 0) { |
| 155 | h->bucket_width = 1; |
| 156 | } |
| 157 | |
| 158 | memset((void *)h->counts, 0, sizeof(uint64_t) * h->num_buckets); |
| 159 | } |
| 160 | |
| 161 | //------------------------------------------------ |
| 162 | // Access method for total count. |
| 163 | // |
| 164 | uint64_t |
| 165 | linear_hist_get_total(linear_hist *h) |
| 166 | { |
| 167 | uint64_t total_count = 0; |
| 168 | |
| 169 | for (uint32_t i = 0; i < h->num_buckets; i++) { |
| 170 | total_count += h->counts[i]; |
| 171 | } |
| 172 | |
| 173 | return total_count; |
| 174 | } |
| 175 | |
| 176 | //------------------------------------------------ |
| 177 | // Merge h2 into h1. |
| 178 | // |
| 179 | void |
| 180 | linear_hist_merge(linear_hist *h1, linear_hist *h2) |
| 181 | { |
| 182 | if (! (h1->num_buckets == h2->num_buckets && h1->start == h2->start && |
| 183 | h1->bucket_width == h2->bucket_width)) { |
| 184 | cf_crash(AS_INFO, "linear_hist_merge - dissimilar histograms" ); |
| 185 | } |
| 186 | |
| 187 | for (uint32_t i = 0; i < h1->num_buckets; i++) { |
| 188 | h1->counts[i] += h2->counts[i]; |
| 189 | } |
| 190 | } |
| 191 | |
| 192 | //------------------------------------------------ |
| 193 | // Insert a data point. Points out of range will |
| 194 | // end up in the bucket at the appropriate end. |
| 195 | // |
| 196 | void |
| 197 | linear_hist_insert_data_point(linear_hist *h, uint32_t point) |
| 198 | { |
| 199 | int32_t offset = (int32_t)(point - h->start); |
| 200 | int32_t bucket = 0; |
| 201 | |
| 202 | if (offset > 0) { |
| 203 | bucket = offset / h->bucket_width; |
| 204 | |
| 205 | if (bucket >= (int32_t)h->num_buckets) { |
| 206 | bucket = h->num_buckets - 1; |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | h->counts[bucket]++; |
| 211 | } |
| 212 | |
| 213 | //------------------------------------------------ |
| 214 | // Get the low edge of the "threshold" bucket - |
| 215 | // the bucket in which the specified percentage of |
| 216 | // total count is exceeded (accumulating from low |
| 217 | // bucket). |
| 218 | // |
| 219 | uint64_t |
| 220 | linear_hist_get_threshold_for_fraction(linear_hist *h, uint32_t tenths_pct, |
| 221 | linear_hist_threshold *p_threshold) |
| 222 | { |
| 223 | return linear_hist_get_threshold_for_subtotal(h, |
| 224 | (linear_hist_get_total(h) * (uint64_t)tenths_pct) / 1000, |
| 225 | p_threshold); |
| 226 | } |
| 227 | |
| 228 | //------------------------------------------------ |
| 229 | // Get the low edge of the "threshold" bucket - |
| 230 | // the bucket in which the specified subtotal |
| 231 | // count is exceeded (accumulating from low |
| 232 | // bucket). |
| 233 | // |
| 234 | uint64_t |
| 235 | linear_hist_get_threshold_for_subtotal(linear_hist *h, uint64_t subtotal, |
| 236 | linear_hist_threshold *p_threshold) |
| 237 | { |
| 238 | p_threshold->bucket_width = h->bucket_width; |
| 239 | p_threshold->target_count = subtotal; |
| 240 | |
| 241 | uint64_t count = 0; |
| 242 | uint32_t i; |
| 243 | |
| 244 | for (i = 0; i < h->num_buckets; i++) { |
| 245 | count += h->counts[i]; |
| 246 | |
| 247 | if (count > subtotal) { |
| 248 | break; |
| 249 | } |
| 250 | } |
| 251 | |
| 252 | if (i == h->num_buckets) { |
| 253 | // This means subtotal >= h->total_count. |
| 254 | p_threshold->value = 0xFFFFffff; |
| 255 | p_threshold->bucket_index = 0; // irrelevant |
| 256 | p_threshold->bucket_count = 0; // irrelevant |
| 257 | return count; |
| 258 | } |
| 259 | |
| 260 | p_threshold->value = h->start + (i * h->bucket_width); |
| 261 | p_threshold->bucket_index = i; |
| 262 | p_threshold->bucket_count = h->counts[i]; |
| 263 | |
| 264 | // Return subtotal of everything below "threshold" bucket. |
| 265 | return count - h->counts[i]; |
| 266 | } |
| 267 | |
| 268 | //------------------------------------------------ |
| 269 | // Dump a linear histogram to log. |
| 270 | // |
| 271 | // Note - DO NOT change the log output format in |
| 272 | // this method - public documentation assumes this |
| 273 | // format. |
| 274 | // |
| 275 | void |
| 276 | linear_hist_dump(linear_hist *h) |
| 277 | { |
| 278 | uint32_t i = h->num_buckets; |
| 279 | uint32_t j = 0; |
| 280 | uint32_t k = 0; |
| 281 | uint64_t total_count = 0; |
| 282 | |
| 283 | for (uint32_t b = 0; b < h->num_buckets; b++) { |
| 284 | if (h->counts[b] != 0) { |
| 285 | if (i > b) { |
| 286 | i = b; |
| 287 | } |
| 288 | |
| 289 | j = b; |
| 290 | k++; |
| 291 | total_count += h->counts[b]; |
| 292 | } |
| 293 | } |
| 294 | |
| 295 | char buf[100]; |
| 296 | int pos = 0; |
| 297 | int n = 0; |
| 298 | |
| 299 | buf[0] = '\0'; |
| 300 | |
| 301 | cf_debug(AS_NSUP, "linear histogram dump: %s [%u %u]/[%u] (%lu total)" , |
| 302 | h->name, h->start, h->start + (h->num_buckets * h->bucket_width), |
| 303 | h->bucket_width, total_count); |
| 304 | |
| 305 | if (k > 100) { |
| 306 | // For now, just don't bother if there's too much to dump. |
| 307 | cf_debug(AS_NSUP, "... (%u buckets with non-zero count)" , k); |
| 308 | return; |
| 309 | } |
| 310 | |
| 311 | for ( ; i <= j; i++) { |
| 312 | if (h->counts[i] == 0) { // print only non-zero columns |
| 313 | continue; |
| 314 | } |
| 315 | |
| 316 | int bytes = sprintf(buf + pos, " (%02u: %010lu)" , i, h->counts[i]); |
| 317 | |
| 318 | if (bytes <= 0) { |
| 319 | cf_debug(AS_NSUP, "linear histogram dump error" ); |
| 320 | return; |
| 321 | } |
| 322 | |
| 323 | pos += bytes; |
| 324 | |
| 325 | if ((n & 3) == 3) { // maximum of 4 printed columns per log line |
| 326 | cf_debug(AS_NSUP, "%s" , buf); |
| 327 | pos = 0; |
| 328 | buf[0] = '\0'; |
| 329 | } |
| 330 | |
| 331 | n++; |
| 332 | } |
| 333 | |
| 334 | if (pos > 0) { |
| 335 | cf_debug(AS_NSUP, "%s" , buf); |
| 336 | } |
| 337 | } |
| 338 | |
| 339 | //------------------------------------------------ |
| 340 | // Save a linear histogram "snapshot". |
| 341 | // |
| 342 | void |
| 343 | linear_hist_save_info(linear_hist *h) |
| 344 | { |
| 345 | // For now, just don't bother if there's too much to save. |
| 346 | if (h->num_buckets > 1024) { |
| 347 | return; |
| 348 | } |
| 349 | |
| 350 | cf_mutex_lock(&h->info_lock); |
| 351 | |
| 352 | size_t size = PREFIX_SIZE + (h->num_buckets * (20 + 1)) + 1; |
| 353 | |
| 354 | // Allocate such that all histograms incur minimum penalty. |
| 355 | h->info_snapshot = cf_realloc(h->info_snapshot, size); |
| 356 | |
| 357 | int prefix_len = sprintf(h->info_snapshot, |
| 358 | "units=%s:hist-width=%u:bucket-width=%u:buckets=" , |
| 359 | h->scale_tag, h->num_buckets * h->bucket_width, h->bucket_width); |
| 360 | char *at = h->info_snapshot + prefix_len; |
| 361 | |
| 362 | for (uint32_t b = 0; b < h->num_buckets; b++) { |
| 363 | uint64_t count = h->counts[b]; |
| 364 | |
| 365 | at += sprintf(at, "%lu," , count); |
| 366 | } |
| 367 | |
| 368 | *(at - 1) = 0; |
| 369 | |
| 370 | cf_mutex_unlock(&h->info_lock); |
| 371 | } |
| 372 | |
| 373 | //------------------------------------------------ |
| 374 | // Append a linear histogram "snapshot" to db. |
| 375 | // |
| 376 | void |
| 377 | linear_hist_get_info(linear_hist *h, cf_dyn_buf *db) |
| 378 | { |
| 379 | cf_mutex_lock(&h->info_lock); |
| 380 | cf_dyn_buf_append_string(db, h->info_snapshot ? h->info_snapshot : "" ); |
| 381 | cf_mutex_unlock(&h->info_lock); |
| 382 | } |
| 383 | |