1 | /* |
2 | * hist.c |
3 | * |
4 | * Copyright (C) 2008-2016 Aerospike, Inc. |
5 | * |
6 | * Portions may be licensed to Aerospike, Inc. under one or more contributor |
7 | * license agreements. |
8 | * |
9 | * This program is free software: you can redistribute it and/or modify it under |
10 | * the terms of the GNU Affero General Public License as published by the Free |
11 | * Software Foundation, either version 3 of the License, or (at your option) any |
12 | * later version. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, but WITHOUT |
15 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
16 | * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
17 | * details. |
18 | * |
19 | * You should have received a copy of the GNU Affero General Public License |
20 | * along with this program. If not, see http://www.gnu.org/licenses/ |
21 | */ |
22 | |
23 | //========================================================== |
24 | // Includes. |
25 | // |
26 | |
27 | #include "hist.h" |
28 | |
29 | #include <stdbool.h> |
30 | #include <stddef.h> |
31 | #include <stdint.h> |
32 | #include <stdio.h> |
33 | #include <string.h> |
34 | |
35 | #include "aerospike/as_atomic.h" |
36 | #include "citrusleaf/alloc.h" |
37 | #include "citrusleaf/cf_clock.h" |
38 | |
39 | #include "cf_mutex.h" |
40 | #include "dynbuf.h" |
41 | #include "fault.h" |
42 | |
43 | |
44 | //========================================================== |
45 | // Typedefs & constants. |
46 | // |
47 | |
48 | // e.g. units=bytes:[128K-256K)=12345:[256K-512K)=54321 ... \0 |
49 | #define SNAPSHOT_SIZE (5 + 1 + 5 + 1 + (N_BUCKETS * (1 + 11 + 1 + 20)) + 1) |
50 | |
51 | // TODO - may want different tags for units other than bytes. |
52 | static const char *SNAPSHOT_TAGS[] = { |
53 | "0" , |
54 | "1" , "2" , "4" , "8" , "16" , "32" , "64" , "128" , "256" , "512" , |
55 | "1K" , "2K" , "4K" , "8K" , "16K" , "32K" , "64K" , "128K" , "256K" , "512K" , |
56 | "1M" , "2M" , "4M" , "8M" , "16M" , "32M" , "64M" , "128M" , "256M" , "512M" , |
57 | "1G" , "2G" , "4G" , "8G" , "16G" , "32G" , "64G" , "128G" , "256G" , "512G" , |
58 | "1T" , "2T" , "4T" , "8T" , "16T" , "32T" , "64T" , "128T" , "256T" , "512T" , |
59 | "1P" , "2P" , "4P" , "8P" , "16P" , "32P" , "64P" , "128P" , "256P" , "512P" , |
60 | "1E" , "2E" , "4E" , "8E" , "inf" |
61 | }; |
62 | |
63 | COMPILER_ASSERT(sizeof(SNAPSHOT_TAGS) / sizeof(const char *) == N_BUCKETS + 1); |
64 | |
65 | //------------------------------------------------ |
66 | // BYTE_MSB[n] returns the position of the most |
67 | // significant bit. If no bits are set (n = 0) it |
68 | // returns 0. Otherwise the positions are 1 ... 8 |
69 | // from low to high, so e.g. n = 13 returns 4: |
70 | // |
71 | // bits: 0 0 0 0 1 1 0 1 |
72 | // position: 8 7 6 5 [4] 3 2 1 |
73 | // |
74 | static const char BYTE_MSB[] = { |
75 | 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, |
76 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
77 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
78 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
79 | |
80 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
81 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
82 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
83 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
84 | |
85 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
86 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
87 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
88 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
89 | |
90 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
91 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
92 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
93 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 |
94 | }; |
95 | |
96 | |
97 | //========================================================== |
98 | // Inlines & macros. |
99 | // |
100 | |
101 | //------------------------------------------------ |
102 | // Returns the position of the most significant |
103 | // bit of n. Positions are 1 ... 64 from low to |
104 | // high, so: |
105 | // |
106 | // n msb(n) |
107 | // -------- ------ |
108 | // 0 0 |
109 | // 1 1 |
110 | // 2 ... 3 2 |
111 | // 4 ... 7 3 |
112 | // 8 ... 15 4 |
113 | // etc. |
114 | // |
115 | static inline int |
116 | msb(uint64_t n) |
117 | { |
118 | int shift = 0; |
119 | |
120 | while (true) { |
121 | uint64_t n_div_256 = n >> 8; |
122 | |
123 | if (n_div_256 == 0) { |
124 | return shift + (int)BYTE_MSB[n]; |
125 | } |
126 | |
127 | n = n_div_256; |
128 | shift += 8; |
129 | } |
130 | |
131 | // Should never get here. |
132 | cf_crash(AS_INFO, "end of msb()" ); |
133 | return -1; |
134 | } |
135 | |
136 | |
137 | //========================================================== |
138 | // Public API. |
139 | // |
140 | |
141 | //------------------------------------------------ |
142 | // Create a histogram. There's no destroy(), but |
143 | // you can just cf_free() the histogram. |
144 | // |
145 | histogram * |
146 | histogram_create(const char *name, histogram_scale scale) |
147 | { |
148 | cf_assert(name, AS_INFO, "null histogram name" ); |
149 | cf_assert(strlen(name) < HISTOGRAM_NAME_SIZE, AS_INFO, |
150 | "bad histogram name %s" , name); |
151 | cf_assert(scale >= 0 && scale < HIST_SCALE_MAX_PLUS_1, AS_INFO, |
152 | "bad histogram scale %d" , scale); |
153 | |
154 | histogram *h = cf_malloc(sizeof(histogram)); |
155 | |
156 | strcpy(h->name, name); |
157 | memset((void *)h->counts, 0, sizeof(h->counts)); |
158 | |
159 | // If histogram_insert_data_point() is called for a size or count histogram, |
160 | // the divide by 0 will crash - consider that a high-performance assert. |
161 | |
162 | switch (scale) { |
163 | case HIST_MILLISECONDS: |
164 | h->scale_tag = HIST_TAG_MILLISECONDS; |
165 | h->time_div = 1000 * 1000; |
166 | break; |
167 | case HIST_MICROSECONDS: |
168 | h->scale_tag = HIST_TAG_MICROSECONDS; |
169 | h->time_div = 1000; |
170 | break; |
171 | case HIST_SIZE: |
172 | h->scale_tag = HIST_TAG_SIZE; |
173 | h->time_div = 0; |
174 | break; |
175 | case HIST_COUNT: |
176 | h->scale_tag = HIST_TAG_COUNT; |
177 | h->time_div = 0; |
178 | break; |
179 | default: |
180 | cf_crash(AS_INFO, "%s: unrecognized histogram scale %d" , name, scale); |
181 | break; |
182 | } |
183 | |
184 | cf_mutex_init(&h->info_lock); |
185 | h->info_snapshot = NULL; |
186 | |
187 | return h; |
188 | } |
189 | |
190 | //------------------------------------------------ |
191 | // Clear a histogram. |
192 | // |
193 | void |
194 | histogram_clear(histogram *h) |
195 | { |
196 | for (int i = 0; i < N_BUCKETS; i++) { |
197 | as_store_uint64(&h->counts[i], 0); |
198 | } |
199 | } |
200 | |
201 | //------------------------------------------------ |
202 | // Dump a histogram to log. |
203 | // |
204 | // Note - DO NOT change the log output format in |
205 | // this method - tools such as as_log_latency |
206 | // assume this format. |
207 | // |
208 | void |
209 | histogram_dump(histogram *h) |
210 | { |
211 | int b; |
212 | uint64_t counts[N_BUCKETS]; |
213 | |
214 | for (b = 0; b < N_BUCKETS; b++) { |
215 | counts[b] = as_load_uint64(&h->counts[b]); |
216 | } |
217 | |
218 | int i = N_BUCKETS; |
219 | int j = 0; |
220 | uint64_t total_count = 0; |
221 | |
222 | for (b = 0; b < N_BUCKETS; b++) { |
223 | if (counts[b] != 0) { |
224 | if (i > b) { |
225 | i = b; |
226 | } |
227 | |
228 | j = b; |
229 | total_count += counts[b]; |
230 | } |
231 | } |
232 | |
233 | char buf[100]; |
234 | int pos = 0; |
235 | int k = 0; |
236 | |
237 | buf[0] = '\0'; |
238 | |
239 | cf_info(AS_INFO, "histogram dump: %s (%lu total) %s" , h->name, total_count, |
240 | h->scale_tag); |
241 | |
242 | for ( ; i <= j; i++) { |
243 | if (counts[i] == 0) { // print only non-zero columns |
244 | continue; |
245 | } |
246 | |
247 | int bytes = sprintf(buf + pos, " (%02d: %010lu)" , i, counts[i]); |
248 | |
249 | if (bytes <= 0) { |
250 | cf_info(AS_INFO, "histogram dump error" ); |
251 | return; |
252 | } |
253 | |
254 | pos += bytes; |
255 | |
256 | if ((k & 3) == 3) { // maximum of 4 printed columns per log line |
257 | cf_info(AS_INFO, "%s" , buf); |
258 | pos = 0; |
259 | buf[0] = '\0'; |
260 | } |
261 | |
262 | k++; |
263 | } |
264 | |
265 | if (pos > 0) { |
266 | cf_info(AS_INFO, "%s" , buf); |
267 | } |
268 | } |
269 | |
270 | //------------------------------------------------ |
271 | // Insert a time interval data point. The interval |
272 | // is time elapsed since start_ns, converted to |
273 | // milliseconds or microseconds as appropriate. |
274 | // Assumes start_ns was obtained via cf_getns() |
275 | // some time ago. Generates a histogram with |
276 | // either: |
277 | // |
278 | // bucket millisecond range |
279 | // ------ ----------------- |
280 | // 0 0 to 1 (more exactly, 0.999999) |
281 | // 1 1 to 2 (more exactly, 1.999999) |
282 | // 2 2 to 4 (more exactly, 3.999999) |
283 | // 3 4 to 8 (more exactly, 7.999999) |
284 | // 4 8 to 16 (more exactly, 15.999999) |
285 | // etc. |
286 | // |
287 | // or: |
288 | // |
289 | // bucket microsecond range |
290 | // ------ ----------------- |
291 | // 0 0 to 1 (more exactly, 0.999) |
292 | // 1 1 to 2 (more exactly, 1.999) |
293 | // 2 2 to 4 (more exactly, 3.999) |
294 | // 3 4 to 8 (more exactly, 7.999) |
295 | // 4 8 to 16 (more exactly, 15.999) |
296 | // etc. |
297 | // |
298 | uint64_t |
299 | histogram_insert_data_point(histogram *h, uint64_t start_ns) |
300 | { |
301 | uint64_t end_ns = cf_getns(); |
302 | uint64_t delta_t = (end_ns - start_ns) / h->time_div; |
303 | |
304 | int bucket = 0; |
305 | |
306 | if (delta_t != 0) { |
307 | bucket = msb(delta_t); |
308 | |
309 | if (start_ns > end_ns) { |
310 | // Either the clock went backwards, or wrapped. (Assume the former, |
311 | // since it takes ~580 years from 0 to wrap.) |
312 | cf_warning(AS_INFO, "%s - clock went backwards: start %lu end %lu" , |
313 | h->name, start_ns, end_ns); |
314 | bucket = 0; |
315 | } |
316 | } |
317 | |
318 | as_incr_uint64(&h->counts[bucket]); |
319 | |
320 | return end_ns; |
321 | } |
322 | |
323 | //------------------------------------------------ |
324 | // Insert a raw data point. Generates a histogram |
325 | // with: |
326 | // |
327 | // bucket value range |
328 | // ------ ----------- |
329 | // 0 0 |
330 | // 1 1 |
331 | // 2 2, 3 |
332 | // 3 4 to 7 |
333 | // 4 8 to 15 |
334 | // etc. |
335 | // |
336 | void |
337 | histogram_insert_raw(histogram *h, uint64_t value) |
338 | { |
339 | as_incr_uint64(&h->counts[msb(value)]); |
340 | } |
341 | |
342 | //------------------------------------------------ |
343 | // Same as above, but not thread safe. |
344 | // |
345 | void |
346 | histogram_insert_raw_unsafe(histogram *h, uint64_t value) |
347 | { |
348 | h->counts[msb(value)]++; |
349 | } |
350 | |
351 | //------------------------------------------------ |
352 | // Save a snapshot of this histogram. This should |
353 | // be done rarely, preferably from one thread. |
354 | // |
355 | void |
356 | histogram_save_info(histogram *h) |
357 | { |
358 | cf_mutex_lock(&h->info_lock); |
359 | |
360 | // Allocate lazily so all histograms don't incur the ~2K penalty. |
361 | if (! h->info_snapshot) { |
362 | h->info_snapshot = cf_malloc(SNAPSHOT_SIZE); |
363 | } |
364 | |
365 | int prefix_len = sprintf(h->info_snapshot, "units=%s" , h->scale_tag); |
366 | char *at = h->info_snapshot + prefix_len; |
367 | |
368 | for (uint32_t b = 0; b < N_BUCKETS; b++) { |
369 | uint64_t count = h->counts[b]; |
370 | |
371 | if (count != 0) { |
372 | at += sprintf(at, ":[%s-%s)=%lu" , SNAPSHOT_TAGS[b], |
373 | SNAPSHOT_TAGS[b + 1], count); |
374 | } |
375 | } |
376 | |
377 | cf_mutex_unlock(&h->info_lock); |
378 | } |
379 | |
380 | //------------------------------------------------ |
381 | // Retrieve a snapshot of this histogram. |
382 | // |
383 | void |
384 | histogram_get_info(histogram *h, cf_dyn_buf *db) |
385 | { |
386 | cf_mutex_lock(&h->info_lock); |
387 | cf_dyn_buf_append_string(db, h->info_snapshot ? h->info_snapshot : "" ); |
388 | cf_mutex_unlock(&h->info_lock); |
389 | } |
390 | |