1 | /* |
2 | * linear_hist.c |
3 | * |
4 | * Copyright (C) 2016 Aerospike, Inc. |
5 | * |
6 | * Portions may be licensed to Aerospike, Inc. under one or more contributor |
7 | * license agreements. |
8 | * |
9 | * This program is free software: you can redistribute it and/or modify it under |
10 | * the terms of the GNU Affero General Public License as published by the Free |
11 | * Software Foundation, either version 3 of the License, or (at your option) any |
12 | * later version. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, but WITHOUT |
15 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
16 | * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
17 | * details. |
18 | * |
19 | * You should have received a copy of the GNU Affero General Public License |
20 | * along with this program. If not, see http://www.gnu.org/licenses/ |
21 | */ |
22 | |
23 | //========================================================== |
24 | // Includes. |
25 | // |
26 | |
27 | #include "linear_hist.h" |
28 | |
29 | #include <stddef.h> |
30 | #include <stdint.h> |
31 | #include <stdio.h> |
32 | #include <string.h> |
33 | |
34 | #include "citrusleaf/alloc.h" |
35 | |
36 | #include "cf_mutex.h" |
37 | #include "dynbuf.h" |
38 | #include "fault.h" |
39 | |
40 | |
41 | //========================================================== |
42 | // Typedefs & constants. |
43 | // |
44 | |
45 | #define LINEAR_HIST_NAME_SIZE 512 |
46 | |
47 | #define LINEAR_HIST_TAG_SECONDS "seconds" |
48 | #define LINEAR_HIST_TAG_SIZE "bytes" |
49 | |
50 | struct linear_hist_s { |
51 | char name[LINEAR_HIST_NAME_SIZE]; |
52 | const char* scale_tag; |
53 | |
54 | cf_mutex info_lock; |
55 | char* info_snapshot; |
56 | |
57 | uint32_t num_buckets; |
58 | uint64_t *counts; |
59 | |
60 | uint32_t start; |
61 | uint32_t bucket_width; |
62 | }; |
63 | |
64 | // e.g. units=bytes:hist-width=131072:bucket-width=1024:buckets= |
65 | #define PREFIX_SIZE (5 + 1 + 5 + 1 + 10 + 1 + 10 + 1 + 12 + 1 + 10 + 1 + 7 + 1) |
66 | |
67 | |
68 | //========================================================== |
69 | // Public API. |
70 | // |
71 | |
72 | //------------------------------------------------ |
73 | // Create a linear histogram. |
74 | // |
75 | linear_hist* |
76 | linear_hist_create(const char *name, linear_hist_scale scale, uint32_t start, |
77 | uint32_t max_offset, uint32_t num_buckets) |
78 | { |
79 | cf_assert(name, AS_INFO, "null histogram name" ); |
80 | cf_assert(strlen(name) < LINEAR_HIST_NAME_SIZE, AS_INFO, |
81 | "bad histogram name %s" , name); |
82 | cf_assert(scale >= 0 && scale < LINEAR_HIST_SCALE_MAX_PLUS_1, AS_INFO, |
83 | "bad histogram scale %d" , scale); |
84 | cf_assert(start + max_offset >= start, AS_INFO, "max_offset overflow" ); |
85 | cf_assert(num_buckets != 0, AS_INFO, "num_buckets 0" ); |
86 | |
87 | linear_hist *h = cf_malloc(sizeof(linear_hist)); |
88 | |
89 | strcpy(h->name, name); |
90 | |
91 | switch (scale) { |
92 | case LINEAR_HIST_SECONDS: |
93 | h->scale_tag = LINEAR_HIST_TAG_SECONDS; |
94 | break; |
95 | case LINEAR_HIST_SIZE: |
96 | h->scale_tag = LINEAR_HIST_TAG_SIZE; |
97 | break; |
98 | default: |
99 | cf_crash(AS_INFO, "%s: unrecognized histogram scale %d" , name, scale); |
100 | break; |
101 | } |
102 | |
103 | cf_mutex_init(&h->info_lock); |
104 | h->info_snapshot = NULL; |
105 | |
106 | h->num_buckets = num_buckets; |
107 | h->counts = cf_malloc(sizeof(uint64_t) * num_buckets); |
108 | |
109 | linear_hist_clear(h, start, max_offset); |
110 | |
111 | return h; |
112 | } |
113 | |
114 | //------------------------------------------------ |
115 | // Destroy a linear histogram. |
116 | // |
117 | void |
118 | linear_hist_destroy(linear_hist *h) |
119 | { |
120 | cf_mutex_destroy(&h->info_lock); |
121 | cf_free(h->counts); |
122 | cf_free(h); |
123 | } |
124 | |
125 | //------------------------------------------------ |
126 | // Clear, re-scale/re-size a linear histogram. |
127 | // |
128 | void |
129 | linear_hist_reset(linear_hist *h, uint32_t start, uint32_t max_offset, |
130 | uint32_t num_buckets) |
131 | { |
132 | cf_assert(num_buckets != 0, AS_INFO, "num_buckets 0" ); |
133 | |
134 | if (h->num_buckets == num_buckets) { |
135 | linear_hist_clear(h, start, max_offset); |
136 | return; |
137 | } |
138 | |
139 | h->num_buckets = num_buckets; |
140 | h->counts = cf_realloc(h->counts, sizeof(uint64_t) * num_buckets); |
141 | linear_hist_clear(h, start, max_offset); |
142 | } |
143 | |
144 | //------------------------------------------------ |
145 | // Clear and (re-)scale a linear histogram. |
146 | // |
147 | void |
148 | linear_hist_clear(linear_hist *h, uint32_t start, uint32_t max_offset) |
149 | { |
150 | h->start = start; |
151 | h->bucket_width = (max_offset + (h->num_buckets - 1)) / h->num_buckets; |
152 | |
153 | // Only needed to protect against max_offset 0. |
154 | if (h->bucket_width == 0) { |
155 | h->bucket_width = 1; |
156 | } |
157 | |
158 | memset((void *)h->counts, 0, sizeof(uint64_t) * h->num_buckets); |
159 | } |
160 | |
161 | //------------------------------------------------ |
162 | // Access method for total count. |
163 | // |
164 | uint64_t |
165 | linear_hist_get_total(linear_hist *h) |
166 | { |
167 | uint64_t total_count = 0; |
168 | |
169 | for (uint32_t i = 0; i < h->num_buckets; i++) { |
170 | total_count += h->counts[i]; |
171 | } |
172 | |
173 | return total_count; |
174 | } |
175 | |
176 | //------------------------------------------------ |
177 | // Merge h2 into h1. |
178 | // |
179 | void |
180 | linear_hist_merge(linear_hist *h1, linear_hist *h2) |
181 | { |
182 | if (! (h1->num_buckets == h2->num_buckets && h1->start == h2->start && |
183 | h1->bucket_width == h2->bucket_width)) { |
184 | cf_crash(AS_INFO, "linear_hist_merge - dissimilar histograms" ); |
185 | } |
186 | |
187 | for (uint32_t i = 0; i < h1->num_buckets; i++) { |
188 | h1->counts[i] += h2->counts[i]; |
189 | } |
190 | } |
191 | |
192 | //------------------------------------------------ |
193 | // Insert a data point. Points out of range will |
194 | // end up in the bucket at the appropriate end. |
195 | // |
196 | void |
197 | linear_hist_insert_data_point(linear_hist *h, uint32_t point) |
198 | { |
199 | int32_t offset = (int32_t)(point - h->start); |
200 | int32_t bucket = 0; |
201 | |
202 | if (offset > 0) { |
203 | bucket = offset / h->bucket_width; |
204 | |
205 | if (bucket >= (int32_t)h->num_buckets) { |
206 | bucket = h->num_buckets - 1; |
207 | } |
208 | } |
209 | |
210 | h->counts[bucket]++; |
211 | } |
212 | |
213 | //------------------------------------------------ |
214 | // Get the low edge of the "threshold" bucket - |
215 | // the bucket in which the specified percentage of |
216 | // total count is exceeded (accumulating from low |
217 | // bucket). |
218 | // |
219 | uint64_t |
220 | linear_hist_get_threshold_for_fraction(linear_hist *h, uint32_t tenths_pct, |
221 | linear_hist_threshold *p_threshold) |
222 | { |
223 | return linear_hist_get_threshold_for_subtotal(h, |
224 | (linear_hist_get_total(h) * (uint64_t)tenths_pct) / 1000, |
225 | p_threshold); |
226 | } |
227 | |
228 | //------------------------------------------------ |
229 | // Get the low edge of the "threshold" bucket - |
230 | // the bucket in which the specified subtotal |
231 | // count is exceeded (accumulating from low |
232 | // bucket). |
233 | // |
234 | uint64_t |
235 | linear_hist_get_threshold_for_subtotal(linear_hist *h, uint64_t subtotal, |
236 | linear_hist_threshold *p_threshold) |
237 | { |
238 | p_threshold->bucket_width = h->bucket_width; |
239 | p_threshold->target_count = subtotal; |
240 | |
241 | uint64_t count = 0; |
242 | uint32_t i; |
243 | |
244 | for (i = 0; i < h->num_buckets; i++) { |
245 | count += h->counts[i]; |
246 | |
247 | if (count > subtotal) { |
248 | break; |
249 | } |
250 | } |
251 | |
252 | if (i == h->num_buckets) { |
253 | // This means subtotal >= h->total_count. |
254 | p_threshold->value = 0xFFFFffff; |
255 | p_threshold->bucket_index = 0; // irrelevant |
256 | p_threshold->bucket_count = 0; // irrelevant |
257 | return count; |
258 | } |
259 | |
260 | p_threshold->value = h->start + (i * h->bucket_width); |
261 | p_threshold->bucket_index = i; |
262 | p_threshold->bucket_count = h->counts[i]; |
263 | |
264 | // Return subtotal of everything below "threshold" bucket. |
265 | return count - h->counts[i]; |
266 | } |
267 | |
268 | //------------------------------------------------ |
269 | // Dump a linear histogram to log. |
270 | // |
271 | // Note - DO NOT change the log output format in |
272 | // this method - public documentation assumes this |
273 | // format. |
274 | // |
275 | void |
276 | linear_hist_dump(linear_hist *h) |
277 | { |
278 | uint32_t i = h->num_buckets; |
279 | uint32_t j = 0; |
280 | uint32_t k = 0; |
281 | uint64_t total_count = 0; |
282 | |
283 | for (uint32_t b = 0; b < h->num_buckets; b++) { |
284 | if (h->counts[b] != 0) { |
285 | if (i > b) { |
286 | i = b; |
287 | } |
288 | |
289 | j = b; |
290 | k++; |
291 | total_count += h->counts[b]; |
292 | } |
293 | } |
294 | |
295 | char buf[100]; |
296 | int pos = 0; |
297 | int n = 0; |
298 | |
299 | buf[0] = '\0'; |
300 | |
301 | cf_debug(AS_NSUP, "linear histogram dump: %s [%u %u]/[%u] (%lu total)" , |
302 | h->name, h->start, h->start + (h->num_buckets * h->bucket_width), |
303 | h->bucket_width, total_count); |
304 | |
305 | if (k > 100) { |
306 | // For now, just don't bother if there's too much to dump. |
307 | cf_debug(AS_NSUP, "... (%u buckets with non-zero count)" , k); |
308 | return; |
309 | } |
310 | |
311 | for ( ; i <= j; i++) { |
312 | if (h->counts[i] == 0) { // print only non-zero columns |
313 | continue; |
314 | } |
315 | |
316 | int bytes = sprintf(buf + pos, " (%02u: %010lu)" , i, h->counts[i]); |
317 | |
318 | if (bytes <= 0) { |
319 | cf_debug(AS_NSUP, "linear histogram dump error" ); |
320 | return; |
321 | } |
322 | |
323 | pos += bytes; |
324 | |
325 | if ((n & 3) == 3) { // maximum of 4 printed columns per log line |
326 | cf_debug(AS_NSUP, "%s" , buf); |
327 | pos = 0; |
328 | buf[0] = '\0'; |
329 | } |
330 | |
331 | n++; |
332 | } |
333 | |
334 | if (pos > 0) { |
335 | cf_debug(AS_NSUP, "%s" , buf); |
336 | } |
337 | } |
338 | |
339 | //------------------------------------------------ |
340 | // Save a linear histogram "snapshot". |
341 | // |
342 | void |
343 | linear_hist_save_info(linear_hist *h) |
344 | { |
345 | // For now, just don't bother if there's too much to save. |
346 | if (h->num_buckets > 1024) { |
347 | return; |
348 | } |
349 | |
350 | cf_mutex_lock(&h->info_lock); |
351 | |
352 | size_t size = PREFIX_SIZE + (h->num_buckets * (20 + 1)) + 1; |
353 | |
354 | // Allocate such that all histograms incur minimum penalty. |
355 | h->info_snapshot = cf_realloc(h->info_snapshot, size); |
356 | |
357 | int prefix_len = sprintf(h->info_snapshot, |
358 | "units=%s:hist-width=%u:bucket-width=%u:buckets=" , |
359 | h->scale_tag, h->num_buckets * h->bucket_width, h->bucket_width); |
360 | char *at = h->info_snapshot + prefix_len; |
361 | |
362 | for (uint32_t b = 0; b < h->num_buckets; b++) { |
363 | uint64_t count = h->counts[b]; |
364 | |
365 | at += sprintf(at, "%lu," , count); |
366 | } |
367 | |
368 | *(at - 1) = 0; |
369 | |
370 | cf_mutex_unlock(&h->info_lock); |
371 | } |
372 | |
373 | //------------------------------------------------ |
374 | // Append a linear histogram "snapshot" to db. |
375 | // |
376 | void |
377 | linear_hist_get_info(linear_hist *h, cf_dyn_buf *db) |
378 | { |
379 | cf_mutex_lock(&h->info_lock); |
380 | cf_dyn_buf_append_string(db, h->info_snapshot ? h->info_snapshot : "" ); |
381 | cf_mutex_unlock(&h->info_lock); |
382 | } |
383 | |