| 1 | /* | 
| 2 |  * linear_hist.c | 
| 3 |  * | 
| 4 |  * Copyright (C) 2016 Aerospike, Inc. | 
| 5 |  * | 
| 6 |  * Portions may be licensed to Aerospike, Inc. under one or more contributor | 
| 7 |  * license agreements. | 
| 8 |  * | 
| 9 |  * This program is free software: you can redistribute it and/or modify it under | 
| 10 |  * the terms of the GNU Affero General Public License as published by the Free | 
| 11 |  * Software Foundation, either version 3 of the License, or (at your option) any | 
| 12 |  * later version. | 
| 13 |  * | 
| 14 |  * This program is distributed in the hope that it will be useful, but WITHOUT | 
| 15 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | 
| 16 |  * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | 
| 17 |  * details. | 
| 18 |  * | 
| 19 |  * You should have received a copy of the GNU Affero General Public License | 
| 20 |  * along with this program.  If not, see http://www.gnu.org/licenses/ | 
| 21 |  */ | 
| 22 |  | 
| 23 | //========================================================== | 
| 24 | // Includes. | 
| 25 | // | 
| 26 |  | 
| 27 | #include "linear_hist.h" | 
| 28 |  | 
| 29 | #include <stddef.h> | 
| 30 | #include <stdint.h> | 
| 31 | #include <stdio.h> | 
| 32 | #include <string.h> | 
| 33 |  | 
| 34 | #include "citrusleaf/alloc.h" | 
| 35 |  | 
| 36 | #include "cf_mutex.h" | 
| 37 | #include "dynbuf.h" | 
| 38 | #include "fault.h" | 
| 39 |  | 
| 40 |  | 
| 41 | //========================================================== | 
| 42 | // Typedefs & constants. | 
| 43 | // | 
| 44 |  | 
| 45 | #define LINEAR_HIST_NAME_SIZE 512 | 
| 46 |  | 
| 47 | #define LINEAR_HIST_TAG_SECONDS	"seconds" | 
| 48 | #define LINEAR_HIST_TAG_SIZE	"bytes" | 
| 49 |  | 
| 50 | struct linear_hist_s { | 
| 51 | 	char name[LINEAR_HIST_NAME_SIZE]; | 
| 52 | 	const char* scale_tag; | 
| 53 |  | 
| 54 | 	cf_mutex info_lock; | 
| 55 | 	char* info_snapshot; | 
| 56 |  | 
| 57 | 	uint32_t num_buckets; | 
| 58 | 	uint64_t *counts; | 
| 59 |  | 
| 60 | 	uint32_t start; | 
| 61 | 	uint32_t bucket_width; | 
| 62 | }; | 
| 63 |  | 
| 64 | // e.g. units=bytes:hist-width=131072:bucket-width=1024:buckets= | 
| 65 | #define PREFIX_SIZE (5 + 1 + 5 + 1 + 10 + 1 + 10 + 1 + 12 + 1 + 10 + 1 + 7 + 1) | 
| 66 |  | 
| 67 |  | 
| 68 | //========================================================== | 
| 69 | // Public API. | 
| 70 | // | 
| 71 |  | 
| 72 | //------------------------------------------------ | 
| 73 | // Create a linear histogram. | 
| 74 | // | 
| 75 | linear_hist* | 
| 76 | linear_hist_create(const char *name, linear_hist_scale scale, uint32_t start, | 
| 77 | 		uint32_t max_offset, uint32_t num_buckets) | 
| 78 | { | 
| 79 | 	cf_assert(name, AS_INFO, "null histogram name" ); | 
| 80 | 	cf_assert(strlen(name) < LINEAR_HIST_NAME_SIZE, AS_INFO, | 
| 81 | 			"bad histogram name %s" , name); | 
| 82 | 	cf_assert(scale >= 0 && scale < LINEAR_HIST_SCALE_MAX_PLUS_1, AS_INFO, | 
| 83 | 			"bad histogram scale %d" , scale); | 
| 84 | 	cf_assert(start + max_offset >= start, AS_INFO, "max_offset overflow" ); | 
| 85 | 	cf_assert(num_buckets != 0, AS_INFO, "num_buckets 0" ); | 
| 86 |  | 
| 87 | 	linear_hist *h = cf_malloc(sizeof(linear_hist)); | 
| 88 |  | 
| 89 | 	strcpy(h->name, name); | 
| 90 |  | 
| 91 | 	switch (scale) { | 
| 92 | 	case LINEAR_HIST_SECONDS: | 
| 93 | 		h->scale_tag = LINEAR_HIST_TAG_SECONDS; | 
| 94 | 		break; | 
| 95 | 	case LINEAR_HIST_SIZE: | 
| 96 | 		h->scale_tag = LINEAR_HIST_TAG_SIZE; | 
| 97 | 		break; | 
| 98 | 	default: | 
| 99 | 		cf_crash(AS_INFO, "%s: unrecognized histogram scale %d" , name, scale); | 
| 100 | 		break; | 
| 101 | 	} | 
| 102 |  | 
| 103 | 	cf_mutex_init(&h->info_lock); | 
| 104 | 	h->info_snapshot = NULL; | 
| 105 |  | 
| 106 | 	h->num_buckets = num_buckets; | 
| 107 | 	h->counts = cf_malloc(sizeof(uint64_t) * num_buckets); | 
| 108 |  | 
| 109 | 	linear_hist_clear(h, start, max_offset); | 
| 110 |  | 
| 111 | 	return h; | 
| 112 | } | 
| 113 |  | 
| 114 | //------------------------------------------------ | 
| 115 | // Destroy a linear histogram. | 
| 116 | // | 
| 117 | void | 
| 118 | linear_hist_destroy(linear_hist *h) | 
| 119 | { | 
| 120 | 	cf_mutex_destroy(&h->info_lock); | 
| 121 | 	cf_free(h->counts); | 
| 122 | 	cf_free(h); | 
| 123 | } | 
| 124 |  | 
| 125 | //------------------------------------------------ | 
| 126 | // Clear, re-scale/re-size a linear histogram. | 
| 127 | // | 
| 128 | void | 
| 129 | linear_hist_reset(linear_hist *h, uint32_t start, uint32_t max_offset, | 
| 130 | 		uint32_t num_buckets) | 
| 131 | { | 
| 132 | 	cf_assert(num_buckets != 0, AS_INFO, "num_buckets 0" ); | 
| 133 |  | 
| 134 | 	if (h->num_buckets == num_buckets) { | 
| 135 | 		linear_hist_clear(h, start, max_offset); | 
| 136 | 		return; | 
| 137 | 	} | 
| 138 |  | 
| 139 | 	h->num_buckets = num_buckets; | 
| 140 | 	h->counts = cf_realloc(h->counts, sizeof(uint64_t) * num_buckets); | 
| 141 | 	linear_hist_clear(h, start, max_offset); | 
| 142 | } | 
| 143 |  | 
| 144 | //------------------------------------------------ | 
| 145 | // Clear and (re-)scale a linear histogram. | 
| 146 | // | 
| 147 | void | 
| 148 | linear_hist_clear(linear_hist *h, uint32_t start, uint32_t max_offset) | 
| 149 | { | 
| 150 | 	h->start = start; | 
| 151 | 	h->bucket_width = (max_offset + (h->num_buckets - 1)) / h->num_buckets; | 
| 152 |  | 
| 153 | 	// Only needed to protect against max_offset 0. | 
| 154 | 	if (h->bucket_width == 0) { | 
| 155 | 		h->bucket_width = 1; | 
| 156 | 	} | 
| 157 |  | 
| 158 | 	memset((void *)h->counts, 0, sizeof(uint64_t) * h->num_buckets); | 
| 159 | } | 
| 160 |  | 
| 161 | //------------------------------------------------ | 
| 162 | // Access method for total count. | 
| 163 | // | 
| 164 | uint64_t | 
| 165 | linear_hist_get_total(linear_hist *h) | 
| 166 | { | 
| 167 | 	uint64_t total_count = 0; | 
| 168 |  | 
| 169 | 	for (uint32_t i = 0; i < h->num_buckets; i++) { | 
| 170 | 		total_count += h->counts[i]; | 
| 171 | 	} | 
| 172 |  | 
| 173 | 	return total_count; | 
| 174 | } | 
| 175 |  | 
| 176 | //------------------------------------------------ | 
| 177 | // Merge h2 into h1. | 
| 178 | // | 
| 179 | void | 
| 180 | linear_hist_merge(linear_hist *h1, linear_hist *h2) | 
| 181 | { | 
| 182 | 	if (! (h1->num_buckets == h2->num_buckets && h1->start == h2->start && | 
| 183 | 			h1->bucket_width == h2->bucket_width)) { | 
| 184 | 		cf_crash(AS_INFO, "linear_hist_merge - dissimilar histograms" ); | 
| 185 | 	} | 
| 186 |  | 
| 187 | 	for (uint32_t i = 0; i < h1->num_buckets; i++) { | 
| 188 | 		h1->counts[i] += h2->counts[i]; | 
| 189 | 	} | 
| 190 | } | 
| 191 |  | 
| 192 | //------------------------------------------------ | 
| 193 | // Insert a data point. Points out of range will | 
| 194 | // end up in the bucket at the appropriate end. | 
| 195 | // | 
| 196 | void | 
| 197 | linear_hist_insert_data_point(linear_hist *h, uint32_t point) | 
| 198 | { | 
| 199 | 	int32_t offset = (int32_t)(point - h->start); | 
| 200 | 	int32_t bucket = 0; | 
| 201 |  | 
| 202 | 	if (offset > 0) { | 
| 203 | 		bucket = offset / h->bucket_width; | 
| 204 |  | 
| 205 | 		if (bucket >= (int32_t)h->num_buckets) { | 
| 206 | 			bucket = h->num_buckets - 1; | 
| 207 | 		} | 
| 208 | 	} | 
| 209 |  | 
| 210 | 	h->counts[bucket]++; | 
| 211 | } | 
| 212 |  | 
| 213 | //------------------------------------------------ | 
| 214 | // Get the low edge of the "threshold" bucket - | 
| 215 | // the bucket in which the specified percentage of | 
| 216 | // total count is exceeded (accumulating from low | 
| 217 | // bucket). | 
| 218 | // | 
| 219 | uint64_t | 
| 220 | linear_hist_get_threshold_for_fraction(linear_hist *h, uint32_t tenths_pct, | 
| 221 | 		linear_hist_threshold *p_threshold) | 
| 222 | { | 
| 223 | 	return linear_hist_get_threshold_for_subtotal(h, | 
| 224 | 			(linear_hist_get_total(h) * (uint64_t)tenths_pct) / 1000, | 
| 225 | 			p_threshold); | 
| 226 | } | 
| 227 |  | 
| 228 | //------------------------------------------------ | 
| 229 | // Get the low edge of the "threshold" bucket - | 
| 230 | // the bucket in which the specified subtotal | 
| 231 | // count is exceeded (accumulating from low | 
| 232 | // bucket). | 
| 233 | // | 
| 234 | uint64_t | 
| 235 | linear_hist_get_threshold_for_subtotal(linear_hist *h, uint64_t subtotal, | 
| 236 | 		linear_hist_threshold *p_threshold) | 
| 237 | { | 
| 238 | 	p_threshold->bucket_width = h->bucket_width; | 
| 239 | 	p_threshold->target_count = subtotal; | 
| 240 |  | 
| 241 | 	uint64_t count = 0; | 
| 242 | 	uint32_t i; | 
| 243 |  | 
| 244 | 	for (i = 0; i < h->num_buckets; i++) { | 
| 245 | 		count += h->counts[i]; | 
| 246 |  | 
| 247 | 		if (count > subtotal) { | 
| 248 | 			break; | 
| 249 | 		} | 
| 250 | 	} | 
| 251 |  | 
| 252 | 	if (i == h->num_buckets) { | 
| 253 | 		// This means subtotal >= h->total_count. | 
| 254 | 		p_threshold->value = 0xFFFFffff; | 
| 255 | 		p_threshold->bucket_index = 0; // irrelevant | 
| 256 | 		p_threshold->bucket_count = 0; // irrelevant | 
| 257 | 		return count; | 
| 258 | 	} | 
| 259 |  | 
| 260 | 	p_threshold->value = h->start + (i * h->bucket_width); | 
| 261 | 	p_threshold->bucket_index = i; | 
| 262 | 	p_threshold->bucket_count = h->counts[i]; | 
| 263 |  | 
| 264 | 	// Return subtotal of everything below "threshold" bucket. | 
| 265 | 	return count - h->counts[i]; | 
| 266 | } | 
| 267 |  | 
| 268 | //------------------------------------------------ | 
| 269 | // Dump a linear histogram to log. | 
| 270 | // | 
| 271 | // Note - DO NOT change the log output format in | 
| 272 | // this method - public documentation assumes this | 
| 273 | // format. | 
| 274 | // | 
| 275 | void | 
| 276 | linear_hist_dump(linear_hist *h) | 
| 277 | { | 
| 278 | 	uint32_t i = h->num_buckets; | 
| 279 | 	uint32_t j = 0; | 
| 280 | 	uint32_t k = 0; | 
| 281 | 	uint64_t total_count = 0; | 
| 282 |  | 
| 283 | 	for (uint32_t b = 0; b < h->num_buckets; b++) { | 
| 284 | 		if (h->counts[b] != 0) { | 
| 285 | 			if (i > b) { | 
| 286 | 				i = b; | 
| 287 | 			} | 
| 288 |  | 
| 289 | 			j = b; | 
| 290 | 			k++; | 
| 291 | 			total_count += h->counts[b]; | 
| 292 | 		} | 
| 293 | 	} | 
| 294 |  | 
| 295 | 	char buf[100]; | 
| 296 | 	int pos = 0; | 
| 297 | 	int n = 0; | 
| 298 |  | 
| 299 | 	buf[0] = '\0'; | 
| 300 |  | 
| 301 | 	cf_debug(AS_NSUP, "linear histogram dump: %s [%u %u]/[%u] (%lu total)" , | 
| 302 | 			h->name, h->start, h->start + (h->num_buckets * h->bucket_width), | 
| 303 | 			h->bucket_width, total_count); | 
| 304 |  | 
| 305 | 	if (k > 100) { | 
| 306 | 		// For now, just don't bother if there's too much to dump. | 
| 307 | 		cf_debug(AS_NSUP, "... (%u buckets with non-zero count)" , k); | 
| 308 | 		return; | 
| 309 | 	} | 
| 310 |  | 
| 311 | 	for ( ; i <= j; i++) { | 
| 312 | 		if (h->counts[i] == 0) { // print only non-zero columns | 
| 313 | 			continue; | 
| 314 | 		} | 
| 315 |  | 
| 316 | 		int bytes = sprintf(buf + pos, " (%02u: %010lu)" , i, h->counts[i]); | 
| 317 |  | 
| 318 | 		if (bytes <= 0) { | 
| 319 | 			cf_debug(AS_NSUP, "linear histogram dump error" ); | 
| 320 | 			return; | 
| 321 | 		} | 
| 322 |  | 
| 323 | 		pos += bytes; | 
| 324 |  | 
| 325 | 		if ((n & 3) == 3) { // maximum of 4 printed columns per log line | 
| 326 | 			 cf_debug(AS_NSUP, "%s" , buf); | 
| 327 | 			 pos = 0; | 
| 328 | 			 buf[0] = '\0'; | 
| 329 | 		} | 
| 330 |  | 
| 331 | 		n++; | 
| 332 | 	} | 
| 333 |  | 
| 334 | 	if (pos > 0) { | 
| 335 | 		cf_debug(AS_NSUP, "%s" , buf); | 
| 336 | 	} | 
| 337 | } | 
| 338 |  | 
| 339 | //------------------------------------------------ | 
| 340 | // Save a linear histogram "snapshot". | 
| 341 | // | 
| 342 | void | 
| 343 | linear_hist_save_info(linear_hist *h) | 
| 344 | { | 
| 345 | 	// For now, just don't bother if there's too much to save. | 
| 346 | 	if (h->num_buckets > 1024) { | 
| 347 | 		return; | 
| 348 | 	} | 
| 349 |  | 
| 350 | 	cf_mutex_lock(&h->info_lock); | 
| 351 |  | 
| 352 | 	size_t size = PREFIX_SIZE + (h->num_buckets * (20 + 1)) + 1; | 
| 353 |  | 
| 354 | 	// Allocate such that all histograms incur minimum penalty. | 
| 355 | 	h->info_snapshot = cf_realloc(h->info_snapshot, size); | 
| 356 |  | 
| 357 | 	int prefix_len = sprintf(h->info_snapshot, | 
| 358 | 			"units=%s:hist-width=%u:bucket-width=%u:buckets=" , | 
| 359 | 			h->scale_tag, h->num_buckets * h->bucket_width, h->bucket_width); | 
| 360 | 	char *at = h->info_snapshot + prefix_len; | 
| 361 |  | 
| 362 | 	for (uint32_t b = 0; b < h->num_buckets; b++) { | 
| 363 | 		uint64_t count = h->counts[b]; | 
| 364 |  | 
| 365 | 		at += sprintf(at, "%lu," , count); | 
| 366 | 	} | 
| 367 |  | 
| 368 | 	*(at - 1) = 0; | 
| 369 |  | 
| 370 | 	cf_mutex_unlock(&h->info_lock); | 
| 371 | } | 
| 372 |  | 
| 373 | //------------------------------------------------ | 
| 374 | // Append a linear histogram "snapshot" to db. | 
| 375 | // | 
| 376 | void | 
| 377 | linear_hist_get_info(linear_hist *h, cf_dyn_buf *db) | 
| 378 | { | 
| 379 | 	cf_mutex_lock(&h->info_lock); | 
| 380 | 	cf_dyn_buf_append_string(db, h->info_snapshot ? h->info_snapshot : "" ); | 
| 381 | 	cf_mutex_unlock(&h->info_lock); | 
| 382 | } | 
| 383 |  |