1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | /* (c) M.L. Kersten |
10 | * Performance tracing |
11 | * The stethoscope/tachograph and tomograph performance monitors have exclusive access |
12 | * to a single event stream, which avoids concurrency conflicts amongst clients. |
13 | * It also avoid cluthered event records on the stream. Since this event stream is owned |
14 | * by a client, we should ensure that the profiler is automatically |
15 | * reset once the owner leaves. |
16 | */ |
17 | #include "monetdb_config.h" |
18 | #include "mutils.h" /* mercurial_revision */ |
19 | #include "msabaoth.h" /* msab_getUUID */ |
20 | #include "mal_authorize.h" |
21 | #include "mal_function.h" |
22 | #include "mal_listing.h" |
23 | #include "mal_profiler.h" |
24 | #include "mal_runtime.h" |
25 | #include "mal_utils.h" |
26 | #include "mal_resource.h" |
27 | |
28 | #ifdef HAVE_SYS_TIME_H |
29 | #include <sys/time.h> |
30 | #endif |
31 | |
32 | #include <string.h> |
33 | |
34 | static str myname = 0; // avoid tracing the profiler module |
35 | |
36 | /* The JSON rendering can be either using '\n' separators between |
37 | * each key:value pair or as a single line. |
38 | * The current stethoscope implementation requires the first option and |
39 | * also the term rendering PRET to be set to '' |
40 | */ |
41 | |
42 | #define PRETTIFY "\n" |
43 | //#define PRETTIFY |
44 | |
45 | /* When the MAL block contains a BARRIER block we may end up with tons |
46 | * of profiler events. To avoid this, we stop emitting the events |
47 | * when we reached the HIGHWATERMARK. Leaving a message in the log. */ |
48 | #define HIGHWATERMARK 5 |
49 | |
50 | |
51 | int malProfileMode = 0; /* global flag to indicate profiling mode */ |
52 | static oid malprofileruser; /* keep track on who has claimed the channel */ |
53 | |
54 | static struct timeval startup_time; |
55 | |
56 | static ATOMIC_TYPE hbdelay = ATOMIC_VAR_INIT(0); |
57 | |
58 | #ifdef HAVE_SYS_RESOURCE_H |
59 | struct rusage infoUsage; |
60 | static struct rusage prevUsage; |
61 | #endif |
62 | |
63 | #define LOGLEN 8192 |
64 | #define lognew() loglen = 0; logbase = logbuffer; *logbase = 0; |
65 | |
66 | #define logadd(...) \ |
67 | do { \ |
68 | char tmp_buff[LOGLEN]; \ |
69 | int tmp_len = 0; \ |
70 | tmp_len = snprintf(tmp_buff, LOGLEN, __VA_ARGS__); \ |
71 | if (loglen + tmp_len < LOGLEN) \ |
72 | loglen += snprintf(logbase+loglen, LOGLEN - loglen, __VA_ARGS__); \ |
73 | else { \ |
74 | logjsonInternal(logbuffer); \ |
75 | lognew(); \ |
76 | loglen += snprintf(logbase+loglen, LOGLEN - loglen, __VA_ARGS__); \ |
77 | } \ |
78 | } while (0) |
79 | |
80 | // The heart beat events should be sent to all outstanding channels. |
81 | static void logjsonInternal(char *logbuffer) |
82 | { |
83 | size_t len; |
84 | len = strlen(logbuffer); |
85 | |
86 | MT_lock_set(&mal_profileLock); |
87 | if (maleventstream) { |
88 | // upon request the log record is sent over the profile stream |
89 | (void) mnstr_write(maleventstream, logbuffer, 1, len); |
90 | (void) mnstr_flush(maleventstream); |
91 | } |
92 | MT_lock_unset(&mal_profileLock); |
93 | } |
94 | |
95 | static char * |
96 | truncate_string(char *inp) |
97 | { |
98 | size_t len; |
99 | char *ret; |
100 | size_t ret_len = LOGLEN/2; |
101 | size_t padding = 64; |
102 | |
103 | len = strlen(inp); |
104 | ret = (char *)GDKmalloc(ret_len + 1); |
105 | if (ret == NULL) { |
106 | return NULL; |
107 | } |
108 | |
109 | snprintf(ret, ret_len + 1, "%.*s...<truncated>...%.*s" , |
110 | (int) (ret_len/2), inp, (int) (ret_len/2 - padding), |
111 | inp + (len - ret_len/2 + padding)); |
112 | |
113 | return ret; |
114 | } |
115 | |
116 | /* JSON rendering method of performance data. |
117 | * The eventparser may assume this layout for ease of parsing |
118 | EXAMPLE: |
119 | { |
120 | "event":6 , |
121 | "time":"15:37:13.799706", |
122 | "thread":3, |
123 | "function":"user.s3_1", |
124 | "pc":1, |
125 | "tag":10397, |
126 | "state":"start", |
127 | "usec":0, |
128 | "rss":215, |
129 | "size":0, |
130 | "oublock":8, |
131 | "stmt":"X_41=0@0:void := querylog.define(\"select count(*) from tables;\":str,\"default_pipe\":str,30:int);", |
132 | "short":"define( \"select count(*) from tables;\",\"default_pipe\",30 )", |
133 | "prereq":[] |
134 | } |
135 | */ |
136 | static void |
137 | renderProfilerEvent(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, int start) |
138 | { |
139 | char logbuffer[LOGLEN], *logbase; |
140 | size_t loglen; |
141 | str stmt, c; |
142 | str stmtq; |
143 | lng usec; |
144 | uint64_t microseconds; |
145 | |
146 | /* ignore generation of events for instructions that are called too often |
147 | * they may appear when BARRIER blocks are executed |
148 | * The default parameter should be sufficient for most practical cases. |
149 | */ |
150 | if( !start && pci->calls > HIGHWATERMARK){ |
151 | if( pci->calls == 10000 || pci->calls == 100000 || pci->calls == 1000000 || pci->calls == 10000000) |
152 | fprintf(stderr, "#Profiler too many calls %d\n" , pci->calls); |
153 | return; |
154 | } |
155 | |
156 | /* The stream of events can be complete read by the DBA, |
157 | * all other users can only see events assigned to their account |
158 | */ |
159 | if( malprofileruser!= MAL_ADMIN && malprofileruser != cntxt->user) |
160 | return; |
161 | |
162 | usec= GDKusec(); |
163 | microseconds = (uint64_t)usec - ((uint64_t)startup_time.tv_sec*1000000 - (uint64_t)startup_time.tv_usec); |
164 | /* make profile event tuple */ |
165 | lognew(); |
166 | logadd("{" PRETTIFY); // fill in later with the event counter |
167 | /* TODO: This could probably be optimized somehow to avoid the |
168 | * function call to mercurial_revision(). |
169 | */ |
170 | logadd("\"version\":\"" VERSION" (hg id: %s)\"," PRETTIFY, mercurial_revision()); |
171 | logadd("\"source\":\"trace\"," PRETTIFY); |
172 | |
173 | logadd("\"user_id\":" OIDFMT"," PRETTIFY, cntxt->user); |
174 | logadd("\"clk\":" LLFMT"," PRETTIFY, usec); |
175 | logadd("\"ctime\":%" PRIu64"," PRETTIFY, microseconds); |
176 | logadd("\"thread\":%d," PRETTIFY, THRgettid()); |
177 | |
178 | logadd("\"function\":\"%s.%s\"," PRETTIFY, getModuleId(getInstrPtr(mb, 0)), getFunctionId(getInstrPtr(mb, 0))); |
179 | logadd("\"pc\":%d," PRETTIFY, mb?getPC(mb,pci):0); |
180 | logadd("\"tag\":" OIDFMT"," PRETTIFY, stk?stk->tag:0); |
181 | logadd("\"module\":\"%s\"," PRETTIFY, pci->modname ? pci->modname : "" ); |
182 | if (pci->modname && strcmp(pci->modname, "user" ) == 0) { |
183 | oid caller_tag = 0; |
184 | if(stk && stk->up) { |
185 | caller_tag = stk->up->tag; |
186 | } |
187 | logadd("\"caller\":" OIDFMT"," PRETTIFY, caller_tag); |
188 | } |
189 | logadd("\"instruction\":\"%s\"," PRETTIFY, pci->fcnname ? pci->fcnname : "" ); |
190 | if (!GDKinmemory()) { |
191 | char *uuid; |
192 | if ((c = msab_getUUID(&uuid)) == NULL) { |
193 | logadd("\"session\":\"%s\"," PRETTIFY, uuid); |
194 | free(uuid); |
195 | } else |
196 | free(c); |
197 | } |
198 | |
199 | if( start){ |
200 | logadd("\"state\":\"start\"," PRETTIFY); |
201 | // determine the Estimated Time of Completion |
202 | if ( pci->calls){ |
203 | logadd("\"usec\":" LLFMT"," PRETTIFY, pci->totticks/pci->calls); |
204 | } else{ |
205 | logadd("\"usec\":" LLFMT"," PRETTIFY, pci->ticks); |
206 | } |
207 | } else { |
208 | logadd("\"state\":\"done\"," PRETTIFY); |
209 | logadd("\"usec\":" LLFMT"," PRETTIFY, pci->ticks); |
210 | } |
211 | logadd("\"rss\":%zu," PRETTIFY, MT_getrss()/1024/1024); |
212 | logadd("\"size\":" LLFMT "," PRETTIFY, pci? pci->wbytes/1024/1024:0); // result size |
213 | |
214 | #ifdef NUMAprofiling |
215 | logadd("\"numa\":[" ); |
216 | if(mb) |
217 | for( i= pci->retc ; i < pci->argc; i++) |
218 | if( !isVarConstant(mb, getArg(pci,i)) && mb->var[getArg(pci,i)]->worker) |
219 | logadd("%c %d" , (i?',':' '), mb->var[getArg(pci,i)]->worker); |
220 | logadd("]," PRETTIFY); |
221 | #endif |
222 | |
223 | #ifdef HAVE_SYS_RESOURCE_H |
224 | getrusage(RUSAGE_SELF, &infoUsage); |
225 | if(infoUsage.ru_inblock - prevUsage.ru_inblock) |
226 | logadd("\"inblock\":%ld," PRETTIFY, infoUsage.ru_inblock - prevUsage.ru_inblock); |
227 | if(infoUsage.ru_oublock - prevUsage.ru_oublock) |
228 | logadd("\"oublock\":%ld," PRETTIFY, infoUsage.ru_oublock - prevUsage.ru_oublock); |
229 | if(infoUsage.ru_majflt - prevUsage.ru_majflt) |
230 | logadd("\"majflt\":%ld," PRETTIFY, infoUsage.ru_majflt - prevUsage.ru_majflt); |
231 | if(infoUsage.ru_nswap - prevUsage.ru_nswap) |
232 | logadd("\"nswap\":%ld," PRETTIFY, infoUsage.ru_nswap - prevUsage.ru_nswap); |
233 | if(infoUsage.ru_nvcsw - prevUsage.ru_nvcsw) |
234 | logadd("\"nvcsw\":%ld," PRETTIFY, infoUsage.ru_nvcsw - prevUsage.ru_nvcsw +infoUsage.ru_nivcsw - prevUsage.ru_nivcsw); |
235 | prevUsage = infoUsage; |
236 | #endif |
237 | |
238 | if( mb){ |
239 | char prereq[BUFSIZ]; |
240 | size_t len; |
241 | int i,j,k,comma; |
242 | InstrPtr q; |
243 | char *truncated; |
244 | |
245 | /* generate actual call statement */ |
246 | stmt = instruction2str(mb, stk, pci, LIST_MAL_ALL); |
247 | if (stmt) { |
248 | c = stmt; |
249 | |
250 | while (*c && isspace((unsigned char)*c)) |
251 | c++; |
252 | if( *c){ |
253 | stmtq = mal_quote(c, strlen(c)); |
254 | if (stmtq && strlen(stmtq) > LOGLEN/2) { |
255 | truncated = truncate_string(stmtq); |
256 | GDKfree(stmtq); |
257 | stmtq = truncated; |
258 | } |
259 | if (stmtq != NULL) { |
260 | logadd("\"stmt\":\"%s\"," PRETTIFY, stmtq); |
261 | GDKfree(stmtq); |
262 | } |
263 | } |
264 | GDKfree(stmt); |
265 | } |
266 | |
267 | // ship the beautified version as well |
268 | |
269 | stmt = shortStmtRendering(mb, stk, pci); |
270 | stmtq = mal_quote(stmt, strlen(stmt)); |
271 | if (stmtq && strlen(stmtq) > LOGLEN/2) { |
272 | truncated = truncate_string(stmtq); |
273 | GDKfree(stmtq); |
274 | stmtq = truncated; |
275 | } |
276 | if (stmtq != NULL) { |
277 | logadd("\"short\":\"%s\"," PRETTIFY, stmtq); |
278 | GDKfree(stmtq); |
279 | } |
280 | GDKfree(stmt); |
281 | |
282 | |
283 | // collect the prerequisite pre-requisite statements |
284 | prereq[0]='['; |
285 | prereq[1]=0; |
286 | len = 1; |
287 | comma=0; |
288 | for(i= pci->retc; i < pci->argc; i++){ |
289 | for( j = pci->pc-1; j > 0; j--){ |
290 | q= getInstrPtr(mb,j); |
291 | for( k=0; k < q->retc; k++) |
292 | if( getArg(q,k) == getArg(pci,i)) |
293 | break; |
294 | if( k < q->retc){ |
295 | snprintf(prereq + len, BUFSIZ-len,"%s%d" , (comma?"," :"" ), j); |
296 | len = strlen(prereq); |
297 | comma++; |
298 | break; |
299 | } |
300 | } |
301 | } |
302 | #define MALARGUMENTDETAILS |
303 | #ifdef MALARGUMENTDETAILS |
304 | logadd("\"prereq\":%s]," PRETTIFY, prereq); |
305 | #else |
306 | logadd("\"prereq\":%s]" PRETTIFY, prereq); |
307 | #endif |
308 | |
309 | /* EXAMPLE MAL statement argument decomposition |
310 | * The eventparser may assume this layout for ease of parsing |
311 | { |
312 | ... as above ... |
313 | "result":{"clk":"173297139,"pc":1,"index":0,,"name":"X_6","type":"void","value":"0@0","eol":0} |
314 | ... |
315 | "argument":{"clk":173297139,"pc":1,"index":"2","type":"str","value":"\"default_pipe\"","eol":0}, |
316 | } |
317 | This information can be used to determine memory footprint and variable life times. |
318 | */ |
319 | |
320 | #define PRET |
321 | #ifdef MALARGUMENTDETAILS |
322 | // Also show details of the arguments for modelling |
323 | if(mb){ |
324 | logadd("\"ret\":[" ); |
325 | for( j=0; j< pci->argc; j++){ |
326 | int tpe = getVarType(mb, getArg(pci,j)); |
327 | str tname = 0, cv; |
328 | lng total = 0; |
329 | BUN cnt = 0; |
330 | bat bid=0; |
331 | |
332 | if( j == pci->retc ){ |
333 | logadd("]," PRETTIFY"\"arg\":[" ); |
334 | } |
335 | logadd("{" ); |
336 | logadd("\"index\":%d," PRET, j); |
337 | logadd("\"name\":\"%s\"," PRET, getVarName(mb, getArg(pci,j))); |
338 | if( getVarSTC(mb,getArg(pci,j))){ |
339 | InstrPtr stc = getInstrPtr(mb, getVarSTC(mb,getArg(pci,j))); |
340 | if(stc && strcmp(getModuleId(stc),"sql" ) ==0 && strncmp(getFunctionId(stc),"bind" ,4)==0) |
341 | logadd("\"alias\":\"%s.%s.%s\"," PRET, |
342 | getVarConstant(mb, getArg(stc,stc->retc +1)).val.sval, |
343 | getVarConstant(mb, getArg(stc,stc->retc +2)).val.sval, |
344 | getVarConstant(mb, getArg(stc,stc->retc +3)).val.sval); |
345 | } |
346 | if( isaBatType(tpe) ){ |
347 | BAT *d= BATdescriptor( bid = stk->stk[getArg(pci,j)].val.bval); |
348 | tname = getTypeName(getBatType(tpe)); |
349 | logadd("\"type\":\"bat[:%s]\"," PRET, tname); |
350 | if( d) { |
351 | BAT *v; |
352 | cnt = BATcount(d); |
353 | if( isVIEW(d)){ |
354 | logadd("\"view\":\"true\"," PRET); |
355 | logadd("\"parent\":%d," PRET, VIEWtparent(d)); |
356 | logadd("\"seqbase\":" BUNFMT"," PRET, d->hseqbase); |
357 | logadd("\"hghbase\":" BUNFMT"," PRET, d->hseqbase + cnt); |
358 | v= BBPquickdesc(VIEWtparent(d), false); |
359 | logadd("\"kind\":\"%s\"," PRET, (v && !v->batTransient ? "persistent" : "transient" )); |
360 | } else |
361 | logadd("\"kind\":\"%s\"," PRET, ( d->batTransient ? "transient" : "persistent" )); |
362 | total += cnt * d->twidth; |
363 | total += heapinfo(d->tvheap, d->batCacheid); |
364 | total += hashinfo(d->thash, d->batCacheid); |
365 | total += IMPSimprintsize(d); |
366 | BBPunfix(d->batCacheid); |
367 | } |
368 | logadd("\"bid\":%d," PRET, bid); |
369 | logadd("\"count\":" BUNFMT"," PRET, cnt); |
370 | logadd("\"size\":" LLFMT"," PRET, total); |
371 | } else{ |
372 | char *truncated = NULL; |
373 | tname = getTypeName(tpe); |
374 | logadd("\"type\":\"%s\"," PRET, tname); |
375 | cv = VALformat(&stk->stk[getArg(pci,j)]); |
376 | stmtq = cv ? mal_quote(cv, strlen(cv)) : NULL; |
377 | if (stmtq != NULL && strlen(stmtq) > LOGLEN/2) { |
378 | truncated = truncate_string(stmtq); |
379 | GDKfree(stmtq); |
380 | stmtq = truncated; |
381 | } |
382 | if (stmtq == NULL) { |
383 | logadd("\"value\":\"(null)\"," PRET); |
384 | } else { |
385 | logadd("\"value\":\"%s\"," , stmtq); |
386 | } |
387 | GDKfree(cv); |
388 | GDKfree(stmtq); |
389 | } |
390 | logadd("\"eol\":%d" PRET, getVarEolife(mb,getArg(pci,j))); |
391 | logadd("\"used\":%d" PRET, isVarUsed(mb,getArg(pci,j))); |
392 | logadd("\"fixed\":%d" PRET, isVarFixed(mb,getArg(pci,j))); |
393 | logadd("\"udf\":%d" PRET, isVarUDFtype(mb,getArg(pci,j))); |
394 | GDKfree(tname); |
395 | logadd("}%s" , (j< pci->argc-1 && j != pci->retc -1?"," :"" )); |
396 | } |
397 | logadd("]" PRETTIFY); // end marker for arguments |
398 | } |
399 | } |
400 | #endif |
401 | logadd("}\n" ); // end marker |
402 | logjsonInternal(logbuffer); |
403 | } |
404 | |
405 | /* the OS details on cpu load are read from /proc/stat |
406 | * We should use an OS define to react to the maximal cores |
407 | */ |
408 | |
409 | static struct{ |
410 | lng user, nice, system, idle, iowait; |
411 | double load; |
412 | } corestat[256]; |
413 | |
414 | static int |
415 | getCPULoad(char cpuload[BUFSIZ]){ |
416 | int cpu, len = 0, i; |
417 | lng user, nice, system, idle, iowait; |
418 | size_t n; |
419 | char buf[BUFSIZ+1], *s; |
420 | static FILE *proc= NULL; |
421 | lng newload; |
422 | |
423 | if (proc == NULL || ferror(proc)) |
424 | proc = fopen("/proc/stat" ,"r" ); |
425 | else rewind(proc); |
426 | if (proc == NULL) { |
427 | /* unexpected */ |
428 | return -1; |
429 | } |
430 | /* read complete file to avoid concurrent write issues */ |
431 | if ((n = fread(buf, 1, BUFSIZ,proc)) == 0 ) |
432 | return -1; |
433 | buf[n] = 0; |
434 | for (s= buf; *s; s++) { |
435 | if (strncmp(s,"cpu" ,3)== 0){ |
436 | s +=3; |
437 | if (*s == ' ') { |
438 | s++; |
439 | cpu = 255; // the cpu totals stored here |
440 | } else { |
441 | cpu = atoi(s); |
442 | if (cpu < 0 || cpu > 255) |
443 | cpu = 255; |
444 | } |
445 | s= strchr(s,' '); |
446 | if (s == NULL) /* unexpected format of file */ |
447 | break; |
448 | |
449 | while( *s && isspace((unsigned char)*s)) s++; |
450 | i= sscanf(s,LLSCN" " LLSCN" " LLSCN" " LLSCN" " LLSCN, &user, &nice, &system, &idle, &iowait); |
451 | if ( i != 5 ) |
452 | goto skip; |
453 | newload = (user - corestat[cpu].user + nice - corestat[cpu].nice + system - corestat[cpu].system); |
454 | if ( newload) |
455 | corestat[cpu].load = (double) newload / (newload + idle - corestat[cpu].idle + iowait - corestat[cpu].iowait); |
456 | corestat[cpu].user = user; |
457 | corestat[cpu].nice = nice; |
458 | corestat[cpu].system = system; |
459 | corestat[cpu].idle = idle; |
460 | corestat[cpu].iowait = iowait; |
461 | } |
462 | skip: |
463 | while (*s && *s != '\n') |
464 | s++; |
465 | } |
466 | |
467 | if(cpuload == 0) |
468 | return 0; |
469 | // identify core processing |
470 | len += snprintf(cpuload, BUFSIZ, "[" ); |
471 | for (cpu = 0; cpuload && cpu < 255 && corestat[cpu].user; cpu++) { |
472 | len +=snprintf(cpuload + len, BUFSIZ - len, "%s%.2f" , (cpu?"," :"" ), corestat[cpu].load); |
473 | } |
474 | (void) snprintf(cpuload + len, BUFSIZ - len, "]" ); |
475 | return 0; |
476 | } |
477 | |
478 | void |
479 | profilerHeartbeatEvent(char *alter) |
480 | { |
481 | char cpuload[BUFSIZ]; |
482 | char logbuffer[LOGLEN], *logbase; |
483 | int loglen; |
484 | lng usec; |
485 | uint64_t microseconds; |
486 | |
487 | if (ATOMIC_GET(&hbdelay) == 0 || maleventstream == 0) |
488 | return; |
489 | usec = GDKusec(); |
490 | microseconds = (uint64_t)startup_time.tv_sec*1000000 + (uint64_t)startup_time.tv_usec + (uint64_t)usec; |
491 | |
492 | /* get CPU load on beat boundaries only */ |
493 | if ( getCPULoad(cpuload) ) |
494 | return; |
495 | |
496 | lognew(); |
497 | logadd("{" PRETTIFY); // fill in later with the event counter |
498 | logadd("\"source\":\"heartbeat\"," PRETTIFY); |
499 | if (GDKinmemory()) { |
500 | char *uuid, *err; |
501 | if ((err = msab_getUUID(&uuid)) == NULL) { |
502 | logadd("\"session\":\"%s\"," PRETTIFY, uuid); |
503 | free(uuid); |
504 | } else |
505 | free(err); |
506 | } |
507 | logadd("\"clk\":" LLFMT"," PRETTIFY,usec); |
508 | logadd("\"ctime\":%" PRIu64"," PRETTIFY, microseconds); |
509 | logadd("\"rss\":%zu," PRETTIFY, MT_getrss()/1024/1024); |
510 | #ifdef HAVE_SYS_RESOURCE_H |
511 | getrusage(RUSAGE_SELF, &infoUsage); |
512 | if(infoUsage.ru_inblock - prevUsage.ru_inblock) |
513 | logadd("\"inblock\":%ld," PRETTIFY, infoUsage.ru_inblock - prevUsage.ru_inblock); |
514 | if(infoUsage.ru_oublock - prevUsage.ru_oublock) |
515 | logadd("\"oublock\":%ld," PRETTIFY, infoUsage.ru_oublock - prevUsage.ru_oublock); |
516 | if(infoUsage.ru_majflt - prevUsage.ru_majflt) |
517 | logadd("\"majflt\":%ld," PRETTIFY, infoUsage.ru_majflt - prevUsage.ru_majflt); |
518 | if(infoUsage.ru_nswap - prevUsage.ru_nswap) |
519 | logadd("\"nswap\":%ld," PRETTIFY, infoUsage.ru_nswap - prevUsage.ru_nswap); |
520 | if(infoUsage.ru_nvcsw - prevUsage.ru_nvcsw) |
521 | logadd("\"nvcsw\":%ld," PRETTIFY, infoUsage.ru_nvcsw - prevUsage.ru_nvcsw +infoUsage.ru_nivcsw - prevUsage.ru_nivcsw); |
522 | prevUsage = infoUsage; |
523 | #endif |
524 | logadd("\"state\":\"%s\"," PRETTIFY,alter); |
525 | logadd("\"cpuload\":%s" PRETTIFY,cpuload); |
526 | logadd("}\n" ); // end marker |
527 | logjsonInternal(logbuffer); |
528 | } |
529 | |
530 | void |
531 | profilerEvent(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, int start) |
532 | { |
533 | (void) cntxt; |
534 | if (stk == NULL) return; |
535 | if (pci == NULL) return; |
536 | if (getModuleId(pci) == myname) // ignore profiler commands from monitoring |
537 | return; |
538 | |
539 | if( maleventstream) { |
540 | renderProfilerEvent(cntxt, mb, stk, pci, start); |
541 | if ( !start && pci->pc ==0) |
542 | profilerHeartbeatEvent("ping" ); |
543 | if ( start && pci->token == ENDsymbol) |
544 | profilerHeartbeatEvent("ping" ); |
545 | } |
546 | } |
547 | |
548 | /* The first scheme dumps the events on a stream (and in the pool) |
549 | */ |
550 | str |
551 | openProfilerStream(Client cntxt) |
552 | { |
553 | int j; |
554 | |
555 | #ifdef HAVE_SYS_RESOURCE_H |
556 | getrusage(RUSAGE_SELF, &infoUsage); |
557 | prevUsage = infoUsage; |
558 | #endif |
559 | if (myname == 0){ |
560 | myname = putName("profiler" ); |
561 | logjsonInternal(monet_characteristics); |
562 | } |
563 | if( maleventstream){ |
564 | /* The DBA can always grab the stream, others have to wait */ |
565 | if ( cntxt->user == MAL_ADMIN) |
566 | closeProfilerStream(cntxt); |
567 | else |
568 | throw(MAL,"profiler.start" ,"Profiler already running, stream not available" ); |
569 | } |
570 | malProfileMode = -1; |
571 | maleventstream = cntxt->fdout; |
572 | malprofileruser = cntxt->user; |
573 | |
574 | // Ignore the JSON rendering mode, use compiled time version |
575 | |
576 | /* show all in progress instructions for stethoscope startup */ |
577 | /* this code is not thread safe, because the inprogress administration may change concurrently */ |
578 | MT_lock_set(&mal_delayLock); |
579 | for(j = 0; j <THREADS; j++) |
580 | if( workingset[j].mb) |
581 | /* show the event */ |
582 | profilerEvent(workingset[j].cntxt, workingset[j].mb, workingset[j].stk, workingset[j].pci, 1); |
583 | MT_lock_unset(&mal_delayLock); |
584 | return MAL_SUCCEED; |
585 | } |
586 | |
587 | str |
588 | closeProfilerStream(Client cntxt) |
589 | { |
590 | (void) cntxt; |
591 | maleventstream = NULL; |
592 | malProfileMode = 0; |
593 | malprofileruser = 0; |
594 | return MAL_SUCCEED; |
595 | } |
596 | |
597 | /* the second scheme is to collect the profile |
598 | * events in a local table for direct SQL inspection |
599 | */ |
600 | str |
601 | startProfiler(Client cntxt) |
602 | { |
603 | #ifdef HAVE_SYS_RESOURCE_H |
604 | getrusage(RUSAGE_SELF, &infoUsage); |
605 | prevUsage = infoUsage; |
606 | #endif |
607 | (void) cntxt; |
608 | |
609 | if( maleventstream){ |
610 | throw(MAL,"profiler.start" ,"Profiler already running, stream not available" ); |
611 | } |
612 | MT_lock_set(&mal_profileLock ); |
613 | if (myname == 0){ |
614 | myname = putName("profiler" ); |
615 | } |
616 | malProfileMode = 1; |
617 | MT_lock_unset(&mal_profileLock); |
618 | logjsonInternal(monet_characteristics); |
619 | // reset the trace table |
620 | clearTrace(cntxt); |
621 | |
622 | return MAL_SUCCEED; |
623 | } |
624 | |
625 | /* SQL tracing is simplified, because it only collects the events in the temporary table. |
626 | */ |
627 | str |
628 | startTrace(Client cntxt) |
629 | { |
630 | cntxt->sqlprofiler = TRUE; |
631 | clearTrace(cntxt); |
632 | return MAL_SUCCEED; |
633 | } |
634 | |
635 | str |
636 | stopTrace(Client cntxt) |
637 | { |
638 | cntxt->sqlprofiler = FALSE; |
639 | return MAL_SUCCEED; |
640 | } |
641 | |
642 | str |
643 | stopProfiler(Client cntxt) |
644 | { |
645 | MT_lock_set(&mal_profileLock); |
646 | malProfileMode = 0; |
647 | setHeartbeat(0); // stop heartbeat |
648 | if( cntxt) |
649 | closeProfilerStream(cntxt); |
650 | MT_lock_unset(&mal_profileLock); |
651 | return MAL_SUCCEED; |
652 | } |
653 | |
654 | /* |
655 | * SQL profile traces |
656 | * The events being captured are stored in client specific BATs. |
657 | * They are made persistent to accumate information over |
658 | * multiple sessions. This means it has to be explicitly reset |
659 | * to avoid disc overflow using profiler.reset(). |
660 | * |
661 | * The information returned for the trace is purposely limited |
662 | * to the ticks and the final MAL instruction. |
663 | * For more detailed analysis, the stethoscope should be used. |
664 | */ |
665 | |
666 | static void |
667 | _cleanupProfiler(Client cntxt) |
668 | { |
669 | BBPunfix(cntxt->profticks->batCacheid); |
670 | BBPunfix(cntxt->profstmt->batCacheid); |
671 | cntxt->profticks = cntxt->profstmt = NULL; |
672 | } |
673 | |
674 | static BAT * |
675 | TRACEcreate(int tt) |
676 | { |
677 | BAT *b; |
678 | |
679 | b = COLnew(0, tt, 1 << 10, TRANSIENT); |
680 | if (b == NULL) |
681 | return NULL; |
682 | return b; |
683 | } |
684 | |
685 | static void |
686 | initTrace(Client cntxt) |
687 | { |
688 | MT_lock_set(&mal_profileLock); |
689 | if (cntxt->profticks) { |
690 | MT_lock_unset(&mal_profileLock); |
691 | return; /* already initialized */ |
692 | } |
693 | cntxt->profticks = TRACEcreate( TYPE_lng); |
694 | cntxt->profstmt = TRACEcreate( TYPE_str); |
695 | if ( cntxt->profticks == NULL || cntxt->profstmt == NULL ) |
696 | _cleanupProfiler(cntxt); |
697 | MT_lock_unset(&mal_profileLock); |
698 | } |
699 | |
700 | int |
701 | TRACEtable(Client cntxt, BAT **r) |
702 | { |
703 | initTrace(cntxt); |
704 | MT_lock_set(&mal_profileLock); |
705 | if (cntxt->profticks == NULL) { |
706 | MT_lock_unset(&mal_profileLock); |
707 | return -1; /* not initialized */ |
708 | } |
709 | r[0] = COLcopy(cntxt->profticks, cntxt->profticks->ttype, false, TRANSIENT); |
710 | r[1] = COLcopy(cntxt->profstmt, cntxt->profstmt->ttype, false, TRANSIENT); |
711 | MT_lock_unset(&mal_profileLock); |
712 | return 2; |
713 | } |
714 | |
715 | BAT * |
716 | getTrace(Client cntxt, const char *nme) |
717 | { |
718 | BAT *bn = NULL; |
719 | |
720 | MT_lock_set(&mal_profileLock); |
721 | if (cntxt->profticks) { |
722 | if (strcmp(nme, "usec" ) == 0){ |
723 | bn = COLcopy(cntxt->profticks, cntxt->profticks->ttype, false, TRANSIENT); |
724 | } else if (strcmp(nme, "stmt" ) == 0){ |
725 | bn = COLcopy(cntxt->profstmt, cntxt->profstmt->ttype, false, TRANSIENT); |
726 | } |
727 | } |
728 | MT_lock_unset(&mal_profileLock); |
729 | return bn; |
730 | } |
731 | |
732 | void |
733 | clearTrace(Client cntxt) |
734 | { |
735 | (void) cntxt; |
736 | MT_lock_set(&mal_profileLock); |
737 | if (cntxt->profticks == NULL) { |
738 | MT_lock_unset(&mal_profileLock); |
739 | initTrace(cntxt); |
740 | return; /* not initialized */ |
741 | } |
742 | /* drop all trace tables */ |
743 | _cleanupProfiler(cntxt); |
744 | MT_lock_unset(&mal_profileLock); |
745 | initTrace(cntxt); |
746 | } |
747 | |
748 | str |
749 | cleanupTraces(Client cntxt) |
750 | { |
751 | clearTrace(cntxt); |
752 | return MAL_SUCCEED; |
753 | } |
754 | |
755 | void |
756 | sqlProfilerEvent(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) |
757 | { |
758 | str stmt, c; |
759 | int errors = 0; |
760 | |
761 | if (cntxt->profticks == NULL) |
762 | return; |
763 | |
764 | /* generate actual call statement */ |
765 | stmt = instruction2str(mb, stk, pci, LIST_MAL_ALL); |
766 | c = stmt; |
767 | |
768 | while (c && *c && (isspace((unsigned char)*c) || *c == '!')) |
769 | c++; |
770 | |
771 | // keep it a short transaction |
772 | MT_lock_set(&mal_profileLock); |
773 | if (cntxt->profticks == NULL) { |
774 | MT_lock_unset(&mal_profileLock); |
775 | return; |
776 | } |
777 | errors += BUNappend(cntxt->profticks, &pci->ticks, false) != GDK_SUCCEED; |
778 | errors += BUNappend(cntxt->profstmt, c, false) != GDK_SUCCEED; |
779 | if (errors > 0) { |
780 | /* stop profiling if an error occurred */ |
781 | cntxt->sqlprofiler = FALSE; |
782 | } |
783 | |
784 | MT_lock_unset(&mal_profileLock); |
785 | GDKfree(stmt); |
786 | } |
787 | |
788 | lng |
789 | getDiskWrites(void) |
790 | { |
791 | #ifdef HAVE_SYS_RESOURCE_H |
792 | struct rusage infoUsage; |
793 | getrusage(RUSAGE_SELF, &infoUsage); |
794 | return infoUsage.ru_oublock; |
795 | #else |
796 | return 0; |
797 | #endif |
798 | } |
799 | |
800 | lng |
801 | getDiskReads(void) |
802 | { |
803 | #ifdef HAVE_SYS_RESOURCE_H |
804 | struct rusage infoUsage; |
805 | getrusage(RUSAGE_SELF, &infoUsage); |
806 | return infoUsage.ru_inblock; |
807 | #else |
808 | return 0; |
809 | #endif |
810 | } |
811 | |
812 | lng |
813 | getUserTime(void) |
814 | { |
815 | #ifdef HAVE_TIMES |
816 | struct tms newTms; |
817 | times(&newTms); |
818 | return newTms.tms_utime; |
819 | #else |
820 | return 0; |
821 | #endif |
822 | } |
823 | |
824 | lng |
825 | getSystemTime(void) |
826 | { |
827 | #ifdef HAVE_TIMES |
828 | struct tms newTms; |
829 | times(&newTms); |
830 | return newTms.tms_stime; |
831 | #else |
832 | return 0; |
833 | #endif |
834 | } |
835 | |
836 | /* Calculate a pessimistic size of the disk storage */ |
837 | lng |
838 | getDiskSpace(void) |
839 | { |
840 | BAT *b; |
841 | bat i; |
842 | lng size = 0; |
843 | |
844 | for (i = 1; i < getBBPsize(); i++) |
845 | if (BBP_logical(i) && (BBP_refs(i) || BBP_lrefs(i))) { |
846 | b = BATdescriptor(i); |
847 | if (b) { |
848 | size += sizeof(BAT); |
849 | if (!isVIEW(b)) { |
850 | BUN cnt = BATcount(b); |
851 | |
852 | size += tailsize(b, cnt); |
853 | /* the upperbound is used for the heaps */ |
854 | if (b->tvheap) |
855 | size += HEAPvmsize(b->tvheap); |
856 | if (b->thash) |
857 | size += sizeof(BUN) * cnt; |
858 | /* also add the size of an imprint, ordered index or mosaic */ |
859 | if( b->timprints) |
860 | size += IMPSimprintsize(b); |
861 | if( b->torderidx) |
862 | size += HEAPvmsize(b->torderidx); |
863 | } |
864 | BBPunfix(i); |
865 | } |
866 | } |
867 | return size; |
868 | } |
869 | |
870 | |
871 | void profilerGetCPUStat(lng *user, lng *nice, lng *sys, lng *idle, lng *iowait) |
872 | { |
873 | (void) getCPULoad(0); |
874 | *user = corestat[255].user; |
875 | *nice = corestat[255].nice; |
876 | *sys = corestat[255].system; |
877 | *idle = corestat[255].idle; |
878 | *iowait = corestat[255].iowait; |
879 | } |
880 | |
881 | /* the heartbeat process produces a ping event once every X milliseconds */ |
882 | static MT_Id hbthread; |
883 | static ATOMIC_TYPE hbrunning = ATOMIC_VAR_INIT(0); |
884 | |
885 | static void profilerHeartbeat(void *dummy) |
886 | { |
887 | int t; |
888 | const int timeout = GDKdebug & FORCEMITOMASK ? 10 : 25; |
889 | |
890 | (void) dummy; |
891 | for (;;) { |
892 | /* wait until you need this info */ |
893 | MT_thread_setworking("sleeping" ); |
894 | while (ATOMIC_GET(&hbdelay) == 0 || maleventstream == NULL) { |
895 | if (GDKexiting() || !ATOMIC_GET(&hbrunning)) |
896 | return; |
897 | MT_sleep_ms(timeout); |
898 | } |
899 | for (t = (int) ATOMIC_GET(&hbdelay); t > 0; t -= timeout) { |
900 | if (GDKexiting() || !ATOMIC_GET(&hbrunning)) |
901 | return; |
902 | MT_sleep_ms(t > timeout ? timeout : t); |
903 | } |
904 | if (GDKexiting() || !ATOMIC_GET(&hbrunning)) |
905 | return; |
906 | MT_thread_setworking("pinging" ); |
907 | profilerHeartbeatEvent( "ping" ); |
908 | } |
909 | } |
910 | |
911 | void setHeartbeat(int delay) |
912 | { |
913 | if (delay < 0 ){ |
914 | ATOMIC_SET(&hbrunning, 0); |
915 | if (hbthread) |
916 | MT_join_thread(hbthread); |
917 | return; |
918 | } |
919 | if ( delay > 0 && delay <= 10) |
920 | delay = 10; |
921 | ATOMIC_SET(&hbdelay, delay); |
922 | } |
923 | |
924 | /* TODO getprofilerlimit and setprofilerlimit functions */ |
925 | |
926 | int getprofilerlimit(void) |
927 | { |
928 | return 0; |
929 | } |
930 | |
931 | void setprofilerlimit(int limit) |
932 | { |
933 | (void) limit; |
934 | } |
935 | |
936 | void initProfiler(void) |
937 | { |
938 | gettimeofday(&startup_time, NULL); |
939 | } |
940 | |
941 | void initHeartbeat(void) |
942 | { |
943 | ATOMIC_SET(&hbrunning, 1); |
944 | if (MT_create_thread(&hbthread, profilerHeartbeat, NULL, MT_THR_JOINABLE, |
945 | "heartbeat" ) < 0) { |
946 | /* it didn't happen */ |
947 | hbthread = 0; |
948 | ATOMIC_SET(&hbrunning, 0); |
949 | } |
950 | } |
951 | |