1 | /* |
2 | * qdist.c - QEMU helpers for handling frequency distributions of data. |
3 | * |
4 | * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> |
5 | * |
6 | * License: GNU GPL, version 2 or later. |
7 | * See the COPYING file in the top-level directory. |
8 | */ |
9 | #include "qemu/osdep.h" |
10 | #include "qemu/qdist.h" |
11 | |
12 | #include <math.h> |
13 | #ifndef NAN |
14 | #define NAN (0.0 / 0.0) |
15 | #endif |
16 | |
17 | #define QDIST_EMPTY_STR "(empty)" |
18 | |
19 | void qdist_init(struct qdist *dist) |
20 | { |
21 | dist->entries = g_new(struct qdist_entry, 1); |
22 | dist->size = 1; |
23 | dist->n = 0; |
24 | } |
25 | |
26 | void qdist_destroy(struct qdist *dist) |
27 | { |
28 | g_free(dist->entries); |
29 | } |
30 | |
31 | static inline int qdist_cmp_double(double a, double b) |
32 | { |
33 | if (a > b) { |
34 | return 1; |
35 | } else if (a < b) { |
36 | return -1; |
37 | } |
38 | return 0; |
39 | } |
40 | |
41 | static int qdist_cmp(const void *ap, const void *bp) |
42 | { |
43 | const struct qdist_entry *a = ap; |
44 | const struct qdist_entry *b = bp; |
45 | |
46 | return qdist_cmp_double(a->x, b->x); |
47 | } |
48 | |
49 | void qdist_add(struct qdist *dist, double x, long count) |
50 | { |
51 | struct qdist_entry *entry = NULL; |
52 | |
53 | if (dist->n) { |
54 | struct qdist_entry e; |
55 | |
56 | e.x = x; |
57 | entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp); |
58 | } |
59 | |
60 | if (entry) { |
61 | entry->count += count; |
62 | return; |
63 | } |
64 | |
65 | if (unlikely(dist->n == dist->size)) { |
66 | dist->size *= 2; |
67 | dist->entries = g_renew(struct qdist_entry, dist->entries, dist->size); |
68 | } |
69 | dist->n++; |
70 | entry = &dist->entries[dist->n - 1]; |
71 | entry->x = x; |
72 | entry->count = count; |
73 | qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp); |
74 | } |
75 | |
76 | void qdist_inc(struct qdist *dist, double x) |
77 | { |
78 | qdist_add(dist, x, 1); |
79 | } |
80 | |
81 | /* |
82 | * Unicode for block elements. See: |
83 | * https://en.wikipedia.org/wiki/Block_Elements |
84 | */ |
85 | static const gunichar qdist_blocks[] = { |
86 | 0x2581, |
87 | 0x2582, |
88 | 0x2583, |
89 | 0x2584, |
90 | 0x2585, |
91 | 0x2586, |
92 | 0x2587, |
93 | 0x2588 |
94 | }; |
95 | |
96 | #define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks) |
97 | |
98 | /* |
99 | * Print a distribution into a string. |
100 | * |
101 | * This function assumes that appropriate binning has been done on the input; |
102 | * see qdist_bin__internal() and qdist_pr_plain(). |
103 | * |
104 | * Callers must free the returned string with g_free(). |
105 | */ |
106 | static char *qdist_pr_internal(const struct qdist *dist) |
107 | { |
108 | double min, max; |
109 | GString *s = g_string_new("" ); |
110 | size_t i; |
111 | |
112 | /* if only one entry, its printout will be either full or empty */ |
113 | if (dist->n == 1) { |
114 | if (dist->entries[0].count) { |
115 | g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]); |
116 | } else { |
117 | g_string_append_c(s, ' '); |
118 | } |
119 | goto out; |
120 | } |
121 | |
122 | /* get min and max counts */ |
123 | min = dist->entries[0].count; |
124 | max = min; |
125 | for (i = 0; i < dist->n; i++) { |
126 | struct qdist_entry *e = &dist->entries[i]; |
127 | |
128 | if (e->count < min) { |
129 | min = e->count; |
130 | } |
131 | if (e->count > max) { |
132 | max = e->count; |
133 | } |
134 | } |
135 | |
136 | for (i = 0; i < dist->n; i++) { |
137 | struct qdist_entry *e = &dist->entries[i]; |
138 | int index; |
139 | |
140 | /* make an exception with 0; instead of using block[0], print a space */ |
141 | if (e->count) { |
142 | /* divide first to avoid loss of precision when e->count == max */ |
143 | index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1); |
144 | g_string_append_unichar(s, qdist_blocks[index]); |
145 | } else { |
146 | g_string_append_c(s, ' '); |
147 | } |
148 | } |
149 | out: |
150 | return g_string_free(s, FALSE); |
151 | } |
152 | |
153 | /* |
154 | * Bin the distribution in @from into @n bins of consecutive, non-overlapping |
155 | * intervals, copying the result to @to. |
156 | * |
157 | * This function is internal to qdist: only this file and test code should |
158 | * ever call it. |
159 | * |
160 | * Note: calling this function on an already-binned qdist is a bug. |
161 | * |
162 | * If @n == 0 or @from->n == 1, use @from->n. |
163 | */ |
164 | void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n) |
165 | { |
166 | double xmin, xmax; |
167 | double step; |
168 | size_t i, j; |
169 | |
170 | qdist_init(to); |
171 | |
172 | if (from->n == 0) { |
173 | return; |
174 | } |
175 | if (n == 0 || from->n == 1) { |
176 | n = from->n; |
177 | } |
178 | |
179 | /* set equally-sized bins between @from's left and right */ |
180 | xmin = qdist_xmin(from); |
181 | xmax = qdist_xmax(from); |
182 | step = (xmax - xmin) / n; |
183 | |
184 | if (n == from->n) { |
185 | /* if @from's entries are equally spaced, no need to re-bin */ |
186 | for (i = 0; i < from->n; i++) { |
187 | if (from->entries[i].x != xmin + i * step) { |
188 | goto rebin; |
189 | } |
190 | } |
191 | /* they're equally spaced, so copy the dist and bail out */ |
192 | to->entries = g_renew(struct qdist_entry, to->entries, n); |
193 | to->n = from->n; |
194 | memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n); |
195 | return; |
196 | } |
197 | |
198 | rebin: |
199 | j = 0; |
200 | for (i = 0; i < n; i++) { |
201 | double x; |
202 | double left, right; |
203 | |
204 | left = xmin + i * step; |
205 | right = xmin + (i + 1) * step; |
206 | |
207 | /* Add x, even if it might not get any counts later */ |
208 | x = left; |
209 | qdist_add(to, x, 0); |
210 | |
211 | /* |
212 | * To avoid double-counting we capture [left, right) ranges, except for |
213 | * the righmost bin, which captures a [left, right] range. |
214 | */ |
215 | while (j < from->n && (from->entries[j].x < right || i == n - 1)) { |
216 | struct qdist_entry *o = &from->entries[j]; |
217 | |
218 | qdist_add(to, x, o->count); |
219 | j++; |
220 | } |
221 | } |
222 | } |
223 | |
224 | /* |
225 | * Print @dist into a string, after re-binning it into @n bins of consecutive, |
226 | * non-overlapping intervals. |
227 | * |
228 | * If @n == 0, use @orig->n. |
229 | * |
230 | * Callers must free the returned string with g_free(). |
231 | */ |
232 | char *qdist_pr_plain(const struct qdist *dist, size_t n) |
233 | { |
234 | struct qdist binned; |
235 | char *ret; |
236 | |
237 | if (dist->n == 0) { |
238 | return g_strdup(QDIST_EMPTY_STR); |
239 | } |
240 | qdist_bin__internal(&binned, dist, n); |
241 | ret = qdist_pr_internal(&binned); |
242 | qdist_destroy(&binned); |
243 | return ret; |
244 | } |
245 | |
246 | static char *qdist_pr_label(const struct qdist *dist, size_t n_bins, |
247 | uint32_t opt, bool is_left) |
248 | { |
249 | const char *percent; |
250 | const char *lparen; |
251 | const char *rparen; |
252 | GString *s; |
253 | double x1, x2, step; |
254 | double x; |
255 | double n; |
256 | int dec; |
257 | |
258 | s = g_string_new("" ); |
259 | if (!(opt & QDIST_PR_LABELS)) { |
260 | goto out; |
261 | } |
262 | |
263 | dec = opt & QDIST_PR_NODECIMAL ? 0 : 1; |
264 | percent = opt & QDIST_PR_PERCENT ? "%" : "" ; |
265 | |
266 | n = n_bins ? n_bins : dist->n; |
267 | x = is_left ? qdist_xmin(dist) : qdist_xmax(dist); |
268 | step = (qdist_xmax(dist) - qdist_xmin(dist)) / n; |
269 | |
270 | if (opt & QDIST_PR_100X) { |
271 | x *= 100.0; |
272 | step *= 100.0; |
273 | } |
274 | if (opt & QDIST_PR_NOBINRANGE) { |
275 | lparen = rparen = "" ; |
276 | x1 = x; |
277 | x2 = x; /* unnecessary, but a dumb compiler might not figure it out */ |
278 | } else { |
279 | lparen = "[" ; |
280 | rparen = is_left ? ")" : "]" ; |
281 | if (is_left) { |
282 | x1 = x; |
283 | x2 = x + step; |
284 | } else { |
285 | x1 = x - step; |
286 | x2 = x; |
287 | } |
288 | } |
289 | g_string_append_printf(s, "%s%.*f" , lparen, dec, x1); |
290 | if (!(opt & QDIST_PR_NOBINRANGE)) { |
291 | g_string_append_printf(s, ",%.*f%s" , dec, x2, rparen); |
292 | } |
293 | g_string_append(s, percent); |
294 | out: |
295 | return g_string_free(s, FALSE); |
296 | } |
297 | |
298 | /* |
299 | * Print the distribution's histogram into a string. |
300 | * |
301 | * See also: qdist_pr_plain(). |
302 | * |
303 | * Callers must free the returned string with g_free(). |
304 | */ |
305 | char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt) |
306 | { |
307 | const char *border = opt & QDIST_PR_BORDER ? "|" : "" ; |
308 | char *llabel, *rlabel; |
309 | char *hgram; |
310 | GString *s; |
311 | |
312 | if (dist->n == 0) { |
313 | return g_strdup(QDIST_EMPTY_STR); |
314 | } |
315 | |
316 | s = g_string_new("" ); |
317 | |
318 | llabel = qdist_pr_label(dist, n_bins, opt, true); |
319 | rlabel = qdist_pr_label(dist, n_bins, opt, false); |
320 | hgram = qdist_pr_plain(dist, n_bins); |
321 | g_string_append_printf(s, "%s%s%s%s%s" , |
322 | llabel, border, hgram, border, rlabel); |
323 | g_free(llabel); |
324 | g_free(rlabel); |
325 | g_free(hgram); |
326 | |
327 | return g_string_free(s, FALSE); |
328 | } |
329 | |
330 | static inline double qdist_x(const struct qdist *dist, int index) |
331 | { |
332 | if (dist->n == 0) { |
333 | return NAN; |
334 | } |
335 | return dist->entries[index].x; |
336 | } |
337 | |
338 | double qdist_xmin(const struct qdist *dist) |
339 | { |
340 | return qdist_x(dist, 0); |
341 | } |
342 | |
343 | double qdist_xmax(const struct qdist *dist) |
344 | { |
345 | return qdist_x(dist, dist->n - 1); |
346 | } |
347 | |
348 | size_t qdist_unique_entries(const struct qdist *dist) |
349 | { |
350 | return dist->n; |
351 | } |
352 | |
353 | unsigned long qdist_sample_count(const struct qdist *dist) |
354 | { |
355 | unsigned long count = 0; |
356 | size_t i; |
357 | |
358 | for (i = 0; i < dist->n; i++) { |
359 | struct qdist_entry *e = &dist->entries[i]; |
360 | |
361 | count += e->count; |
362 | } |
363 | return count; |
364 | } |
365 | |
366 | static double qdist_pairwise_avg(const struct qdist *dist, size_t index, |
367 | size_t n, unsigned long count) |
368 | { |
369 | /* amortize the recursion by using a base case > 2 */ |
370 | if (n <= 8) { |
371 | size_t i; |
372 | double ret = 0; |
373 | |
374 | for (i = 0; i < n; i++) { |
375 | struct qdist_entry *e = &dist->entries[index + i]; |
376 | |
377 | ret += e->x * e->count / count; |
378 | } |
379 | return ret; |
380 | } else { |
381 | size_t n2 = n / 2; |
382 | |
383 | return qdist_pairwise_avg(dist, index, n2, count) + |
384 | qdist_pairwise_avg(dist, index + n2, n - n2, count); |
385 | } |
386 | } |
387 | |
388 | double qdist_avg(const struct qdist *dist) |
389 | { |
390 | unsigned long count; |
391 | |
392 | count = qdist_sample_count(dist); |
393 | if (!count) { |
394 | return NAN; |
395 | } |
396 | return qdist_pairwise_avg(dist, 0, dist->n, count); |
397 | } |
398 | |