1/*
2 * qdist.c - QEMU helpers for handling frequency distributions of data.
3 *
4 * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
5 *
6 * License: GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 */
9#include "qemu/osdep.h"
10#include "qemu/qdist.h"
11
12#include <math.h>
13#ifndef NAN
14#define NAN (0.0 / 0.0)
15#endif
16
17#define QDIST_EMPTY_STR "(empty)"
18
19void qdist_init(struct qdist *dist)
20{
21 dist->entries = g_new(struct qdist_entry, 1);
22 dist->size = 1;
23 dist->n = 0;
24}
25
26void qdist_destroy(struct qdist *dist)
27{
28 g_free(dist->entries);
29}
30
31static inline int qdist_cmp_double(double a, double b)
32{
33 if (a > b) {
34 return 1;
35 } else if (a < b) {
36 return -1;
37 }
38 return 0;
39}
40
41static int qdist_cmp(const void *ap, const void *bp)
42{
43 const struct qdist_entry *a = ap;
44 const struct qdist_entry *b = bp;
45
46 return qdist_cmp_double(a->x, b->x);
47}
48
49void qdist_add(struct qdist *dist, double x, long count)
50{
51 struct qdist_entry *entry = NULL;
52
53 if (dist->n) {
54 struct qdist_entry e;
55
56 e.x = x;
57 entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp);
58 }
59
60 if (entry) {
61 entry->count += count;
62 return;
63 }
64
65 if (unlikely(dist->n == dist->size)) {
66 dist->size *= 2;
67 dist->entries = g_renew(struct qdist_entry, dist->entries, dist->size);
68 }
69 dist->n++;
70 entry = &dist->entries[dist->n - 1];
71 entry->x = x;
72 entry->count = count;
73 qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp);
74}
75
76void qdist_inc(struct qdist *dist, double x)
77{
78 qdist_add(dist, x, 1);
79}
80
81/*
82 * Unicode for block elements. See:
83 * https://en.wikipedia.org/wiki/Block_Elements
84 */
85static const gunichar qdist_blocks[] = {
86 0x2581,
87 0x2582,
88 0x2583,
89 0x2584,
90 0x2585,
91 0x2586,
92 0x2587,
93 0x2588
94};
95
96#define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)
97
98/*
99 * Print a distribution into a string.
100 *
101 * This function assumes that appropriate binning has been done on the input;
102 * see qdist_bin__internal() and qdist_pr_plain().
103 *
104 * Callers must free the returned string with g_free().
105 */
106static char *qdist_pr_internal(const struct qdist *dist)
107{
108 double min, max;
109 GString *s = g_string_new("");
110 size_t i;
111
112 /* if only one entry, its printout will be either full or empty */
113 if (dist->n == 1) {
114 if (dist->entries[0].count) {
115 g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]);
116 } else {
117 g_string_append_c(s, ' ');
118 }
119 goto out;
120 }
121
122 /* get min and max counts */
123 min = dist->entries[0].count;
124 max = min;
125 for (i = 0; i < dist->n; i++) {
126 struct qdist_entry *e = &dist->entries[i];
127
128 if (e->count < min) {
129 min = e->count;
130 }
131 if (e->count > max) {
132 max = e->count;
133 }
134 }
135
136 for (i = 0; i < dist->n; i++) {
137 struct qdist_entry *e = &dist->entries[i];
138 int index;
139
140 /* make an exception with 0; instead of using block[0], print a space */
141 if (e->count) {
142 /* divide first to avoid loss of precision when e->count == max */
143 index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1);
144 g_string_append_unichar(s, qdist_blocks[index]);
145 } else {
146 g_string_append_c(s, ' ');
147 }
148 }
149 out:
150 return g_string_free(s, FALSE);
151}
152
153/*
154 * Bin the distribution in @from into @n bins of consecutive, non-overlapping
155 * intervals, copying the result to @to.
156 *
157 * This function is internal to qdist: only this file and test code should
158 * ever call it.
159 *
160 * Note: calling this function on an already-binned qdist is a bug.
161 *
162 * If @n == 0 or @from->n == 1, use @from->n.
163 */
164void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n)
165{
166 double xmin, xmax;
167 double step;
168 size_t i, j;
169
170 qdist_init(to);
171
172 if (from->n == 0) {
173 return;
174 }
175 if (n == 0 || from->n == 1) {
176 n = from->n;
177 }
178
179 /* set equally-sized bins between @from's left and right */
180 xmin = qdist_xmin(from);
181 xmax = qdist_xmax(from);
182 step = (xmax - xmin) / n;
183
184 if (n == from->n) {
185 /* if @from's entries are equally spaced, no need to re-bin */
186 for (i = 0; i < from->n; i++) {
187 if (from->entries[i].x != xmin + i * step) {
188 goto rebin;
189 }
190 }
191 /* they're equally spaced, so copy the dist and bail out */
192 to->entries = g_renew(struct qdist_entry, to->entries, n);
193 to->n = from->n;
194 memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n);
195 return;
196 }
197
198 rebin:
199 j = 0;
200 for (i = 0; i < n; i++) {
201 double x;
202 double left, right;
203
204 left = xmin + i * step;
205 right = xmin + (i + 1) * step;
206
207 /* Add x, even if it might not get any counts later */
208 x = left;
209 qdist_add(to, x, 0);
210
211 /*
212 * To avoid double-counting we capture [left, right) ranges, except for
213 * the righmost bin, which captures a [left, right] range.
214 */
215 while (j < from->n && (from->entries[j].x < right || i == n - 1)) {
216 struct qdist_entry *o = &from->entries[j];
217
218 qdist_add(to, x, o->count);
219 j++;
220 }
221 }
222}
223
224/*
225 * Print @dist into a string, after re-binning it into @n bins of consecutive,
226 * non-overlapping intervals.
227 *
228 * If @n == 0, use @orig->n.
229 *
230 * Callers must free the returned string with g_free().
231 */
232char *qdist_pr_plain(const struct qdist *dist, size_t n)
233{
234 struct qdist binned;
235 char *ret;
236
237 if (dist->n == 0) {
238 return g_strdup(QDIST_EMPTY_STR);
239 }
240 qdist_bin__internal(&binned, dist, n);
241 ret = qdist_pr_internal(&binned);
242 qdist_destroy(&binned);
243 return ret;
244}
245
246static char *qdist_pr_label(const struct qdist *dist, size_t n_bins,
247 uint32_t opt, bool is_left)
248{
249 const char *percent;
250 const char *lparen;
251 const char *rparen;
252 GString *s;
253 double x1, x2, step;
254 double x;
255 double n;
256 int dec;
257
258 s = g_string_new("");
259 if (!(opt & QDIST_PR_LABELS)) {
260 goto out;
261 }
262
263 dec = opt & QDIST_PR_NODECIMAL ? 0 : 1;
264 percent = opt & QDIST_PR_PERCENT ? "%" : "";
265
266 n = n_bins ? n_bins : dist->n;
267 x = is_left ? qdist_xmin(dist) : qdist_xmax(dist);
268 step = (qdist_xmax(dist) - qdist_xmin(dist)) / n;
269
270 if (opt & QDIST_PR_100X) {
271 x *= 100.0;
272 step *= 100.0;
273 }
274 if (opt & QDIST_PR_NOBINRANGE) {
275 lparen = rparen = "";
276 x1 = x;
277 x2 = x; /* unnecessary, but a dumb compiler might not figure it out */
278 } else {
279 lparen = "[";
280 rparen = is_left ? ")" : "]";
281 if (is_left) {
282 x1 = x;
283 x2 = x + step;
284 } else {
285 x1 = x - step;
286 x2 = x;
287 }
288 }
289 g_string_append_printf(s, "%s%.*f", lparen, dec, x1);
290 if (!(opt & QDIST_PR_NOBINRANGE)) {
291 g_string_append_printf(s, ",%.*f%s", dec, x2, rparen);
292 }
293 g_string_append(s, percent);
294 out:
295 return g_string_free(s, FALSE);
296}
297
298/*
299 * Print the distribution's histogram into a string.
300 *
301 * See also: qdist_pr_plain().
302 *
303 * Callers must free the returned string with g_free().
304 */
305char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt)
306{
307 const char *border = opt & QDIST_PR_BORDER ? "|" : "";
308 char *llabel, *rlabel;
309 char *hgram;
310 GString *s;
311
312 if (dist->n == 0) {
313 return g_strdup(QDIST_EMPTY_STR);
314 }
315
316 s = g_string_new("");
317
318 llabel = qdist_pr_label(dist, n_bins, opt, true);
319 rlabel = qdist_pr_label(dist, n_bins, opt, false);
320 hgram = qdist_pr_plain(dist, n_bins);
321 g_string_append_printf(s, "%s%s%s%s%s",
322 llabel, border, hgram, border, rlabel);
323 g_free(llabel);
324 g_free(rlabel);
325 g_free(hgram);
326
327 return g_string_free(s, FALSE);
328}
329
330static inline double qdist_x(const struct qdist *dist, int index)
331{
332 if (dist->n == 0) {
333 return NAN;
334 }
335 return dist->entries[index].x;
336}
337
338double qdist_xmin(const struct qdist *dist)
339{
340 return qdist_x(dist, 0);
341}
342
343double qdist_xmax(const struct qdist *dist)
344{
345 return qdist_x(dist, dist->n - 1);
346}
347
348size_t qdist_unique_entries(const struct qdist *dist)
349{
350 return dist->n;
351}
352
353unsigned long qdist_sample_count(const struct qdist *dist)
354{
355 unsigned long count = 0;
356 size_t i;
357
358 for (i = 0; i < dist->n; i++) {
359 struct qdist_entry *e = &dist->entries[i];
360
361 count += e->count;
362 }
363 return count;
364}
365
366static double qdist_pairwise_avg(const struct qdist *dist, size_t index,
367 size_t n, unsigned long count)
368{
369 /* amortize the recursion by using a base case > 2 */
370 if (n <= 8) {
371 size_t i;
372 double ret = 0;
373
374 for (i = 0; i < n; i++) {
375 struct qdist_entry *e = &dist->entries[index + i];
376
377 ret += e->x * e->count / count;
378 }
379 return ret;
380 } else {
381 size_t n2 = n / 2;
382
383 return qdist_pairwise_avg(dist, index, n2, count) +
384 qdist_pairwise_avg(dist, index + n2, n - n2, count);
385 }
386}
387
388double qdist_avg(const struct qdist *dist)
389{
390 unsigned long count;
391
392 count = qdist_sample_count(dist);
393 if (!count) {
394 return NAN;
395 }
396 return qdist_pairwise_avg(dist, 0, dist->n, count);
397}
398