1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9/* (c) M.L. Kersten
10 * Performance tracing
11 * The stethoscope/tachograph and tomograph performance monitors have exclusive access
12 * to a single event stream, which avoids concurrency conflicts amongst clients.
13 * It also avoid cluthered event records on the stream. Since this event stream is owned
14 * by a client, we should ensure that the profiler is automatically
15 * reset once the owner leaves.
16 */
17#include "monetdb_config.h"
18#include "mutils.h" /* mercurial_revision */
19#include "msabaoth.h" /* msab_getUUID */
20#include "mal_authorize.h"
21#include "mal_function.h"
22#include "mal_listing.h"
23#include "mal_profiler.h"
24#include "mal_runtime.h"
25#include "mal_utils.h"
26#include "mal_resource.h"
27
28#ifdef HAVE_SYS_TIME_H
29#include <sys/time.h>
30#endif
31
32#include <string.h>
33
34static str myname = 0; // avoid tracing the profiler module
35
36/* The JSON rendering can be either using '\n' separators between
37 * each key:value pair or as a single line.
38 * The current stethoscope implementation requires the first option and
39 * also the term rendering PRET to be set to ''
40 */
41
42#define PRETTIFY "\n"
43//#define PRETTIFY
44
45/* When the MAL block contains a BARRIER block we may end up with tons
46 * of profiler events. To avoid this, we stop emitting the events
47 * when we reached the HIGHWATERMARK. Leaving a message in the log. */
48#define HIGHWATERMARK 5
49
50
51int malProfileMode = 0; /* global flag to indicate profiling mode */
52static oid malprofileruser; /* keep track on who has claimed the channel */
53
54static struct timeval startup_time;
55
56static ATOMIC_TYPE hbdelay = ATOMIC_VAR_INIT(0);
57
58#ifdef HAVE_SYS_RESOURCE_H
59struct rusage infoUsage;
60static struct rusage prevUsage;
61#endif
62
63#define LOGLEN 8192
64#define lognew() loglen = 0; logbase = logbuffer; *logbase = 0;
65
66#define logadd(...) \
67 do { \
68 char tmp_buff[LOGLEN]; \
69 int tmp_len = 0; \
70 tmp_len = snprintf(tmp_buff, LOGLEN, __VA_ARGS__); \
71 if (loglen + tmp_len < LOGLEN) \
72 loglen += snprintf(logbase+loglen, LOGLEN - loglen, __VA_ARGS__); \
73 else { \
74 logjsonInternal(logbuffer); \
75 lognew(); \
76 loglen += snprintf(logbase+loglen, LOGLEN - loglen, __VA_ARGS__); \
77 } \
78 } while (0)
79
80// The heart beat events should be sent to all outstanding channels.
81static void logjsonInternal(char *logbuffer)
82{
83 size_t len;
84 len = strlen(logbuffer);
85
86 MT_lock_set(&mal_profileLock);
87 if (maleventstream) {
88 // upon request the log record is sent over the profile stream
89 (void) mnstr_write(maleventstream, logbuffer, 1, len);
90 (void) mnstr_flush(maleventstream);
91 }
92 MT_lock_unset(&mal_profileLock);
93}
94
95static char *
96truncate_string(char *inp)
97{
98 size_t len;
99 char *ret;
100 size_t ret_len = LOGLEN/2;
101 size_t padding = 64;
102
103 len = strlen(inp);
104 ret = (char *)GDKmalloc(ret_len + 1);
105 if (ret == NULL) {
106 return NULL;
107 }
108
109 snprintf(ret, ret_len + 1, "%.*s...<truncated>...%.*s",
110 (int) (ret_len/2), inp, (int) (ret_len/2 - padding),
111 inp + (len - ret_len/2 + padding));
112
113 return ret;
114}
115
116/* JSON rendering method of performance data.
117 * The eventparser may assume this layout for ease of parsing
118EXAMPLE:
119{
120"event":6 ,
121"time":"15:37:13.799706",
122"thread":3,
123"function":"user.s3_1",
124"pc":1,
125"tag":10397,
126"state":"start",
127"usec":0,
128"rss":215,
129"size":0,
130"oublock":8,
131"stmt":"X_41=0@0:void := querylog.define(\"select count(*) from tables;\":str,\"default_pipe\":str,30:int);",
132"short":"define( \"select count(*) from tables;\",\"default_pipe\",30 )",
133"prereq":[]
134}
135*/
136static void
137renderProfilerEvent(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, int start)
138{
139 char logbuffer[LOGLEN], *logbase;
140 size_t loglen;
141 str stmt, c;
142 str stmtq;
143 lng usec;
144 uint64_t microseconds;
145
146 /* ignore generation of events for instructions that are called too often
147 * they may appear when BARRIER blocks are executed
148 * The default parameter should be sufficient for most practical cases.
149 */
150 if( !start && pci->calls > HIGHWATERMARK){
151 if( pci->calls == 10000 || pci->calls == 100000 || pci->calls == 1000000 || pci->calls == 10000000)
152 fprintf(stderr, "#Profiler too many calls %d\n", pci->calls);
153 return;
154 }
155
156/* The stream of events can be complete read by the DBA,
157 * all other users can only see events assigned to their account
158 */
159 if( malprofileruser!= MAL_ADMIN && malprofileruser != cntxt->user)
160 return;
161
162 usec= GDKusec();
163 microseconds = (uint64_t)usec - ((uint64_t)startup_time.tv_sec*1000000 - (uint64_t)startup_time.tv_usec);
164 /* make profile event tuple */
165 lognew();
166 logadd("{"PRETTIFY); // fill in later with the event counter
167 /* TODO: This could probably be optimized somehow to avoid the
168 * function call to mercurial_revision().
169 */
170 logadd("\"version\":\""VERSION" (hg id: %s)\","PRETTIFY, mercurial_revision());
171 logadd("\"source\":\"trace\","PRETTIFY);
172
173 logadd("\"user_id\":"OIDFMT","PRETTIFY, cntxt->user);
174 logadd("\"clk\":"LLFMT","PRETTIFY, usec);
175 logadd("\"ctime\":%"PRIu64","PRETTIFY, microseconds);
176 logadd("\"thread\":%d,"PRETTIFY, THRgettid());
177
178 logadd("\"function\":\"%s.%s\","PRETTIFY, getModuleId(getInstrPtr(mb, 0)), getFunctionId(getInstrPtr(mb, 0)));
179 logadd("\"pc\":%d,"PRETTIFY, mb?getPC(mb,pci):0);
180 logadd("\"tag\":"OIDFMT","PRETTIFY, stk?stk->tag:0);
181 logadd("\"module\":\"%s\","PRETTIFY, pci->modname ? pci->modname : "" );
182 if (pci->modname && strcmp(pci->modname, "user") == 0) {
183 oid caller_tag = 0;
184 if(stk && stk->up) {
185 caller_tag = stk->up->tag;
186 }
187 logadd("\"caller\":"OIDFMT","PRETTIFY, caller_tag);
188 }
189 logadd("\"instruction\":\"%s\","PRETTIFY, pci->fcnname ? pci->fcnname : "");
190 if (!GDKinmemory()) {
191 char *uuid;
192 if ((c = msab_getUUID(&uuid)) == NULL) {
193 logadd("\"session\":\"%s\","PRETTIFY, uuid);
194 free(uuid);
195 } else
196 free(c);
197 }
198
199 if( start){
200 logadd("\"state\":\"start\","PRETTIFY);
201 // determine the Estimated Time of Completion
202 if ( pci->calls){
203 logadd("\"usec\":"LLFMT","PRETTIFY, pci->totticks/pci->calls);
204 } else{
205 logadd("\"usec\":"LLFMT","PRETTIFY, pci->ticks);
206 }
207 } else {
208 logadd("\"state\":\"done\","PRETTIFY);
209 logadd("\"usec\":"LLFMT","PRETTIFY, pci->ticks);
210 }
211 logadd("\"rss\":%zu,"PRETTIFY, MT_getrss()/1024/1024);
212 logadd("\"size\":"LLFMT ","PRETTIFY, pci? pci->wbytes/1024/1024:0); // result size
213
214#ifdef NUMAprofiling
215 logadd("\"numa\":[");
216 if(mb)
217 for( i= pci->retc ; i < pci->argc; i++)
218 if( !isVarConstant(mb, getArg(pci,i)) && mb->var[getArg(pci,i)]->worker)
219 logadd("%c %d", (i?',':' '), mb->var[getArg(pci,i)]->worker);
220 logadd("],"PRETTIFY);
221#endif
222
223#ifdef HAVE_SYS_RESOURCE_H
224 getrusage(RUSAGE_SELF, &infoUsage);
225 if(infoUsage.ru_inblock - prevUsage.ru_inblock)
226 logadd("\"inblock\":%ld,"PRETTIFY, infoUsage.ru_inblock - prevUsage.ru_inblock);
227 if(infoUsage.ru_oublock - prevUsage.ru_oublock)
228 logadd("\"oublock\":%ld,"PRETTIFY, infoUsage.ru_oublock - prevUsage.ru_oublock);
229 if(infoUsage.ru_majflt - prevUsage.ru_majflt)
230 logadd("\"majflt\":%ld,"PRETTIFY, infoUsage.ru_majflt - prevUsage.ru_majflt);
231 if(infoUsage.ru_nswap - prevUsage.ru_nswap)
232 logadd("\"nswap\":%ld,"PRETTIFY, infoUsage.ru_nswap - prevUsage.ru_nswap);
233 if(infoUsage.ru_nvcsw - prevUsage.ru_nvcsw)
234 logadd("\"nvcsw\":%ld,"PRETTIFY, infoUsage.ru_nvcsw - prevUsage.ru_nvcsw +infoUsage.ru_nivcsw - prevUsage.ru_nivcsw);
235 prevUsage = infoUsage;
236#endif
237
238 if( mb){
239 char prereq[BUFSIZ];
240 size_t len;
241 int i,j,k,comma;
242 InstrPtr q;
243 char *truncated;
244
245 /* generate actual call statement */
246 stmt = instruction2str(mb, stk, pci, LIST_MAL_ALL);
247 if (stmt) {
248 c = stmt;
249
250 while (*c && isspace((unsigned char)*c))
251 c++;
252 if( *c){
253 stmtq = mal_quote(c, strlen(c));
254 if (stmtq && strlen(stmtq) > LOGLEN/2) {
255 truncated = truncate_string(stmtq);
256 GDKfree(stmtq);
257 stmtq = truncated;
258 }
259 if (stmtq != NULL) {
260 logadd("\"stmt\":\"%s\","PRETTIFY, stmtq);
261 GDKfree(stmtq);
262 }
263 }
264 GDKfree(stmt);
265 }
266
267 // ship the beautified version as well
268
269 stmt = shortStmtRendering(mb, stk, pci);
270 stmtq = mal_quote(stmt, strlen(stmt));
271 if (stmtq && strlen(stmtq) > LOGLEN/2) {
272 truncated = truncate_string(stmtq);
273 GDKfree(stmtq);
274 stmtq = truncated;
275 }
276 if (stmtq != NULL) {
277 logadd("\"short\":\"%s\","PRETTIFY, stmtq);
278 GDKfree(stmtq);
279 }
280 GDKfree(stmt);
281
282
283 // collect the prerequisite pre-requisite statements
284 prereq[0]='[';
285 prereq[1]=0;
286 len = 1;
287 comma=0;
288 for(i= pci->retc; i < pci->argc; i++){
289 for( j = pci->pc-1; j > 0; j--){
290 q= getInstrPtr(mb,j);
291 for( k=0; k < q->retc; k++)
292 if( getArg(q,k) == getArg(pci,i))
293 break;
294 if( k < q->retc){
295 snprintf(prereq + len, BUFSIZ-len,"%s%d", (comma?",":""), j);
296 len = strlen(prereq);
297 comma++;
298 break;
299 }
300 }
301 }
302#define MALARGUMENTDETAILS
303#ifdef MALARGUMENTDETAILS
304 logadd("\"prereq\":%s],"PRETTIFY, prereq);
305#else
306 logadd("\"prereq\":%s]"PRETTIFY, prereq);
307#endif
308
309/* EXAMPLE MAL statement argument decomposition
310 * The eventparser may assume this layout for ease of parsing
311{
312... as above ...
313"result":{"clk":"173297139,"pc":1,"index":0,,"name":"X_6","type":"void","value":"0@0","eol":0}
314...
315"argument":{"clk":173297139,"pc":1,"index":"2","type":"str","value":"\"default_pipe\"","eol":0},
316}
317This information can be used to determine memory footprint and variable life times.
318 */
319
320#define PRET
321#ifdef MALARGUMENTDETAILS
322 // Also show details of the arguments for modelling
323 if(mb){
324 logadd("\"ret\":[");
325 for( j=0; j< pci->argc; j++){
326 int tpe = getVarType(mb, getArg(pci,j));
327 str tname = 0, cv;
328 lng total = 0;
329 BUN cnt = 0;
330 bat bid=0;
331
332 if( j == pci->retc ){
333 logadd("],"PRETTIFY"\"arg\":[");
334 }
335 logadd("{");
336 logadd("\"index\":%d,"PRET, j);
337 logadd("\"name\":\"%s\","PRET, getVarName(mb, getArg(pci,j)));
338 if( getVarSTC(mb,getArg(pci,j))){
339 InstrPtr stc = getInstrPtr(mb, getVarSTC(mb,getArg(pci,j)));
340 if(stc && strcmp(getModuleId(stc),"sql") ==0 && strncmp(getFunctionId(stc),"bind",4)==0)
341 logadd("\"alias\":\"%s.%s.%s\","PRET,
342 getVarConstant(mb, getArg(stc,stc->retc +1)).val.sval,
343 getVarConstant(mb, getArg(stc,stc->retc +2)).val.sval,
344 getVarConstant(mb, getArg(stc,stc->retc +3)).val.sval);
345 }
346 if( isaBatType(tpe) ){
347 BAT *d= BATdescriptor( bid = stk->stk[getArg(pci,j)].val.bval);
348 tname = getTypeName(getBatType(tpe));
349 logadd("\"type\":\"bat[:%s]\","PRET, tname);
350 if( d) {
351 BAT *v;
352 cnt = BATcount(d);
353 if( isVIEW(d)){
354 logadd("\"view\":\"true\","PRET);
355 logadd("\"parent\":%d,"PRET, VIEWtparent(d));
356 logadd("\"seqbase\":"BUNFMT","PRET, d->hseqbase);
357 logadd("\"hghbase\":"BUNFMT","PRET, d->hseqbase + cnt);
358 v= BBPquickdesc(VIEWtparent(d), false);
359 logadd("\"kind\":\"%s\","PRET, (v && !v->batTransient ? "persistent" : "transient"));
360 } else
361 logadd("\"kind\":\"%s\","PRET, ( d->batTransient ? "transient" : "persistent"));
362 total += cnt * d->twidth;
363 total += heapinfo(d->tvheap, d->batCacheid);
364 total += hashinfo(d->thash, d->batCacheid);
365 total += IMPSimprintsize(d);
366 BBPunfix(d->batCacheid);
367 }
368 logadd("\"bid\":%d,"PRET, bid);
369 logadd("\"count\":"BUNFMT","PRET, cnt);
370 logadd("\"size\":" LLFMT","PRET, total);
371 } else{
372 char *truncated = NULL;
373 tname = getTypeName(tpe);
374 logadd("\"type\":\"%s\","PRET, tname);
375 cv = VALformat(&stk->stk[getArg(pci,j)]);
376 stmtq = cv ? mal_quote(cv, strlen(cv)) : NULL;
377 if (stmtq != NULL && strlen(stmtq) > LOGLEN/2) {
378 truncated = truncate_string(stmtq);
379 GDKfree(stmtq);
380 stmtq = truncated;
381 }
382 if (stmtq == NULL) {
383 logadd("\"value\":\"(null)\","PRET);
384 } else {
385 logadd("\"value\":\"%s\",", stmtq);
386 }
387 GDKfree(cv);
388 GDKfree(stmtq);
389 }
390 logadd("\"eol\":%d"PRET, getVarEolife(mb,getArg(pci,j)));
391 logadd("\"used\":%d"PRET, isVarUsed(mb,getArg(pci,j)));
392 logadd("\"fixed\":%d"PRET, isVarFixed(mb,getArg(pci,j)));
393 logadd("\"udf\":%d"PRET, isVarUDFtype(mb,getArg(pci,j)));
394 GDKfree(tname);
395 logadd("}%s", (j< pci->argc-1 && j != pci->retc -1?",":""));
396 }
397 logadd("]"PRETTIFY); // end marker for arguments
398 }
399 }
400#endif
401 logadd("}\n"); // end marker
402 logjsonInternal(logbuffer);
403}
404
405/* the OS details on cpu load are read from /proc/stat
406 * We should use an OS define to react to the maximal cores
407 */
408
409static struct{
410 lng user, nice, system, idle, iowait;
411 double load;
412} corestat[256];
413
414static int
415getCPULoad(char cpuload[BUFSIZ]){
416 int cpu, len = 0, i;
417 lng user, nice, system, idle, iowait;
418 size_t n;
419 char buf[BUFSIZ+1], *s;
420 static FILE *proc= NULL;
421 lng newload;
422
423 if (proc == NULL || ferror(proc))
424 proc = fopen("/proc/stat","r");
425 else rewind(proc);
426 if (proc == NULL) {
427 /* unexpected */
428 return -1;
429 }
430 /* read complete file to avoid concurrent write issues */
431 if ((n = fread(buf, 1, BUFSIZ,proc)) == 0 )
432 return -1;
433 buf[n] = 0;
434 for (s= buf; *s; s++) {
435 if (strncmp(s,"cpu",3)== 0){
436 s +=3;
437 if (*s == ' ') {
438 s++;
439 cpu = 255; // the cpu totals stored here
440 } else {
441 cpu = atoi(s);
442 if (cpu < 0 || cpu > 255)
443 cpu = 255;
444 }
445 s= strchr(s,' ');
446 if (s == NULL) /* unexpected format of file */
447 break;
448
449 while( *s && isspace((unsigned char)*s)) s++;
450 i= sscanf(s,LLSCN" "LLSCN" "LLSCN" "LLSCN" "LLSCN, &user, &nice, &system, &idle, &iowait);
451 if ( i != 5 )
452 goto skip;
453 newload = (user - corestat[cpu].user + nice - corestat[cpu].nice + system - corestat[cpu].system);
454 if ( newload)
455 corestat[cpu].load = (double) newload / (newload + idle - corestat[cpu].idle + iowait - corestat[cpu].iowait);
456 corestat[cpu].user = user;
457 corestat[cpu].nice = nice;
458 corestat[cpu].system = system;
459 corestat[cpu].idle = idle;
460 corestat[cpu].iowait = iowait;
461 }
462 skip:
463 while (*s && *s != '\n')
464 s++;
465 }
466
467 if(cpuload == 0)
468 return 0;
469 // identify core processing
470 len += snprintf(cpuload, BUFSIZ, "[");
471 for (cpu = 0; cpuload && cpu < 255 && corestat[cpu].user; cpu++) {
472 len +=snprintf(cpuload + len, BUFSIZ - len, "%s%.2f", (cpu?",":""), corestat[cpu].load);
473 }
474 (void) snprintf(cpuload + len, BUFSIZ - len, "]");
475 return 0;
476}
477
478void
479profilerHeartbeatEvent(char *alter)
480{
481 char cpuload[BUFSIZ];
482 char logbuffer[LOGLEN], *logbase;
483 int loglen;
484 lng usec;
485 uint64_t microseconds;
486
487 if (ATOMIC_GET(&hbdelay) == 0 || maleventstream == 0)
488 return;
489 usec = GDKusec();
490 microseconds = (uint64_t)startup_time.tv_sec*1000000 + (uint64_t)startup_time.tv_usec + (uint64_t)usec;
491
492 /* get CPU load on beat boundaries only */
493 if ( getCPULoad(cpuload) )
494 return;
495
496 lognew();
497 logadd("{"PRETTIFY); // fill in later with the event counter
498 logadd("\"source\":\"heartbeat\","PRETTIFY);
499 if (GDKinmemory()) {
500 char *uuid, *err;
501 if ((err = msab_getUUID(&uuid)) == NULL) {
502 logadd("\"session\":\"%s\","PRETTIFY, uuid);
503 free(uuid);
504 } else
505 free(err);
506 }
507 logadd("\"clk\":"LLFMT","PRETTIFY,usec);
508 logadd("\"ctime\":%"PRIu64","PRETTIFY, microseconds);
509 logadd("\"rss\":%zu,"PRETTIFY, MT_getrss()/1024/1024);
510#ifdef HAVE_SYS_RESOURCE_H
511 getrusage(RUSAGE_SELF, &infoUsage);
512 if(infoUsage.ru_inblock - prevUsage.ru_inblock)
513 logadd("\"inblock\":%ld,"PRETTIFY, infoUsage.ru_inblock - prevUsage.ru_inblock);
514 if(infoUsage.ru_oublock - prevUsage.ru_oublock)
515 logadd("\"oublock\":%ld,"PRETTIFY, infoUsage.ru_oublock - prevUsage.ru_oublock);
516 if(infoUsage.ru_majflt - prevUsage.ru_majflt)
517 logadd("\"majflt\":%ld,"PRETTIFY, infoUsage.ru_majflt - prevUsage.ru_majflt);
518 if(infoUsage.ru_nswap - prevUsage.ru_nswap)
519 logadd("\"nswap\":%ld,"PRETTIFY, infoUsage.ru_nswap - prevUsage.ru_nswap);
520 if(infoUsage.ru_nvcsw - prevUsage.ru_nvcsw)
521 logadd("\"nvcsw\":%ld,"PRETTIFY, infoUsage.ru_nvcsw - prevUsage.ru_nvcsw +infoUsage.ru_nivcsw - prevUsage.ru_nivcsw);
522 prevUsage = infoUsage;
523#endif
524 logadd("\"state\":\"%s\","PRETTIFY,alter);
525 logadd("\"cpuload\":%s"PRETTIFY,cpuload);
526 logadd("}\n"); // end marker
527 logjsonInternal(logbuffer);
528}
529
530void
531profilerEvent(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, int start)
532{
533 (void) cntxt;
534 if (stk == NULL) return;
535 if (pci == NULL) return;
536 if (getModuleId(pci) == myname) // ignore profiler commands from monitoring
537 return;
538
539 if( maleventstream) {
540 renderProfilerEvent(cntxt, mb, stk, pci, start);
541 if ( !start && pci->pc ==0)
542 profilerHeartbeatEvent("ping");
543 if ( start && pci->token == ENDsymbol)
544 profilerHeartbeatEvent("ping");
545 }
546}
547
548/* The first scheme dumps the events on a stream (and in the pool)
549 */
550str
551openProfilerStream(Client cntxt)
552{
553 int j;
554
555#ifdef HAVE_SYS_RESOURCE_H
556 getrusage(RUSAGE_SELF, &infoUsage);
557 prevUsage = infoUsage;
558#endif
559 if (myname == 0){
560 myname = putName("profiler");
561 logjsonInternal(monet_characteristics);
562 }
563 if( maleventstream){
564 /* The DBA can always grab the stream, others have to wait */
565 if ( cntxt->user == MAL_ADMIN)
566 closeProfilerStream(cntxt);
567 else
568 throw(MAL,"profiler.start","Profiler already running, stream not available");
569 }
570 malProfileMode = -1;
571 maleventstream = cntxt->fdout;
572 malprofileruser = cntxt->user;
573
574 // Ignore the JSON rendering mode, use compiled time version
575
576 /* show all in progress instructions for stethoscope startup */
577 /* this code is not thread safe, because the inprogress administration may change concurrently */
578 MT_lock_set(&mal_delayLock);
579 for(j = 0; j <THREADS; j++)
580 if( workingset[j].mb)
581 /* show the event */
582 profilerEvent(workingset[j].cntxt, workingset[j].mb, workingset[j].stk, workingset[j].pci, 1);
583 MT_lock_unset(&mal_delayLock);
584 return MAL_SUCCEED;
585}
586
587str
588closeProfilerStream(Client cntxt)
589{
590 (void) cntxt;
591 maleventstream = NULL;
592 malProfileMode = 0;
593 malprofileruser = 0;
594 return MAL_SUCCEED;
595}
596
597/* the second scheme is to collect the profile
598 * events in a local table for direct SQL inspection
599 */
600str
601startProfiler(Client cntxt)
602{
603#ifdef HAVE_SYS_RESOURCE_H
604 getrusage(RUSAGE_SELF, &infoUsage);
605 prevUsage = infoUsage;
606#endif
607 (void) cntxt;
608
609 if( maleventstream){
610 throw(MAL,"profiler.start","Profiler already running, stream not available");
611 }
612 MT_lock_set(&mal_profileLock );
613 if (myname == 0){
614 myname = putName("profiler");
615 }
616 malProfileMode = 1;
617 MT_lock_unset(&mal_profileLock);
618 logjsonInternal(monet_characteristics);
619 // reset the trace table
620 clearTrace(cntxt);
621
622 return MAL_SUCCEED;
623}
624
625/* SQL tracing is simplified, because it only collects the events in the temporary table.
626 */
627str
628startTrace(Client cntxt)
629{
630 cntxt->sqlprofiler = TRUE;
631 clearTrace(cntxt);
632 return MAL_SUCCEED;
633}
634
635str
636stopTrace(Client cntxt)
637{
638 cntxt->sqlprofiler = FALSE;
639 return MAL_SUCCEED;
640}
641
642str
643stopProfiler(Client cntxt)
644{
645 MT_lock_set(&mal_profileLock);
646 malProfileMode = 0;
647 setHeartbeat(0); // stop heartbeat
648 if( cntxt)
649 closeProfilerStream(cntxt);
650 MT_lock_unset(&mal_profileLock);
651 return MAL_SUCCEED;
652}
653
654/*
655 * SQL profile traces
656 * The events being captured are stored in client specific BATs.
657 * They are made persistent to accumate information over
658 * multiple sessions. This means it has to be explicitly reset
659 * to avoid disc overflow using profiler.reset().
660 *
661 * The information returned for the trace is purposely limited
662 * to the ticks and the final MAL instruction.
663 * For more detailed analysis, the stethoscope should be used.
664 */
665
666static void
667_cleanupProfiler(Client cntxt)
668{
669 BBPunfix(cntxt->profticks->batCacheid);
670 BBPunfix(cntxt->profstmt->batCacheid);
671 cntxt->profticks = cntxt->profstmt = NULL;
672}
673
674static BAT *
675TRACEcreate(int tt)
676{
677 BAT *b;
678
679 b = COLnew(0, tt, 1 << 10, TRANSIENT);
680 if (b == NULL)
681 return NULL;
682 return b;
683}
684
685static void
686initTrace(Client cntxt)
687{
688 MT_lock_set(&mal_profileLock);
689 if (cntxt->profticks) {
690 MT_lock_unset(&mal_profileLock);
691 return; /* already initialized */
692 }
693 cntxt->profticks = TRACEcreate( TYPE_lng);
694 cntxt->profstmt = TRACEcreate( TYPE_str);
695 if ( cntxt->profticks == NULL || cntxt->profstmt == NULL )
696 _cleanupProfiler(cntxt);
697 MT_lock_unset(&mal_profileLock);
698}
699
700int
701TRACEtable(Client cntxt, BAT **r)
702{
703 initTrace(cntxt);
704 MT_lock_set(&mal_profileLock);
705 if (cntxt->profticks == NULL) {
706 MT_lock_unset(&mal_profileLock);
707 return -1; /* not initialized */
708 }
709 r[0] = COLcopy(cntxt->profticks, cntxt->profticks->ttype, false, TRANSIENT);
710 r[1] = COLcopy(cntxt->profstmt, cntxt->profstmt->ttype, false, TRANSIENT);
711 MT_lock_unset(&mal_profileLock);
712 return 2;
713}
714
715BAT *
716getTrace(Client cntxt, const char *nme)
717{
718 BAT *bn = NULL;
719
720 MT_lock_set(&mal_profileLock);
721 if (cntxt->profticks) {
722 if (strcmp(nme, "usec") == 0){
723 bn = COLcopy(cntxt->profticks, cntxt->profticks->ttype, false, TRANSIENT);
724 } else if (strcmp(nme, "stmt") == 0){
725 bn = COLcopy(cntxt->profstmt, cntxt->profstmt->ttype, false, TRANSIENT);
726 }
727 }
728 MT_lock_unset(&mal_profileLock);
729 return bn;
730}
731
732void
733clearTrace(Client cntxt)
734{
735 (void) cntxt;
736 MT_lock_set(&mal_profileLock);
737 if (cntxt->profticks == NULL) {
738 MT_lock_unset(&mal_profileLock);
739 initTrace(cntxt);
740 return; /* not initialized */
741 }
742 /* drop all trace tables */
743 _cleanupProfiler(cntxt);
744 MT_lock_unset(&mal_profileLock);
745 initTrace(cntxt);
746}
747
748str
749cleanupTraces(Client cntxt)
750{
751 clearTrace(cntxt);
752 return MAL_SUCCEED;
753}
754
755void
756sqlProfilerEvent(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
757{
758 str stmt, c;
759 int errors = 0;
760
761 if (cntxt->profticks == NULL)
762 return;
763
764 /* generate actual call statement */
765 stmt = instruction2str(mb, stk, pci, LIST_MAL_ALL);
766 c = stmt;
767
768 while (c && *c && (isspace((unsigned char)*c) || *c == '!'))
769 c++;
770
771 // keep it a short transaction
772 MT_lock_set(&mal_profileLock);
773 if (cntxt->profticks == NULL) {
774 MT_lock_unset(&mal_profileLock);
775 return;
776 }
777 errors += BUNappend(cntxt->profticks, &pci->ticks, false) != GDK_SUCCEED;
778 errors += BUNappend(cntxt->profstmt, c, false) != GDK_SUCCEED;
779 if (errors > 0) {
780 /* stop profiling if an error occurred */
781 cntxt->sqlprofiler = FALSE;
782 }
783
784 MT_lock_unset(&mal_profileLock);
785 GDKfree(stmt);
786}
787
788lng
789getDiskWrites(void)
790{
791#ifdef HAVE_SYS_RESOURCE_H
792 struct rusage infoUsage;
793 getrusage(RUSAGE_SELF, &infoUsage);
794 return infoUsage.ru_oublock;
795#else
796 return 0;
797#endif
798}
799
800lng
801getDiskReads(void)
802{
803#ifdef HAVE_SYS_RESOURCE_H
804 struct rusage infoUsage;
805 getrusage(RUSAGE_SELF, &infoUsage);
806 return infoUsage.ru_inblock;
807#else
808 return 0;
809#endif
810}
811
812lng
813getUserTime(void)
814{
815#ifdef HAVE_TIMES
816 struct tms newTms;
817 times(&newTms);
818 return newTms.tms_utime;
819#else
820 return 0;
821#endif
822}
823
824lng
825getSystemTime(void)
826{
827#ifdef HAVE_TIMES
828 struct tms newTms;
829 times(&newTms);
830 return newTms.tms_stime;
831#else
832 return 0;
833#endif
834}
835
836/* Calculate a pessimistic size of the disk storage */
837lng
838getDiskSpace(void)
839{
840 BAT *b;
841 bat i;
842 lng size = 0;
843
844 for (i = 1; i < getBBPsize(); i++)
845 if (BBP_logical(i) && (BBP_refs(i) || BBP_lrefs(i))) {
846 b = BATdescriptor(i);
847 if (b) {
848 size += sizeof(BAT);
849 if (!isVIEW(b)) {
850 BUN cnt = BATcount(b);
851
852 size += tailsize(b, cnt);
853 /* the upperbound is used for the heaps */
854 if (b->tvheap)
855 size += HEAPvmsize(b->tvheap);
856 if (b->thash)
857 size += sizeof(BUN) * cnt;
858 /* also add the size of an imprint, ordered index or mosaic */
859 if( b->timprints)
860 size += IMPSimprintsize(b);
861 if( b->torderidx)
862 size += HEAPvmsize(b->torderidx);
863 }
864 BBPunfix(i);
865 }
866 }
867 return size;
868}
869
870
871void profilerGetCPUStat(lng *user, lng *nice, lng *sys, lng *idle, lng *iowait)
872{
873 (void) getCPULoad(0);
874 *user = corestat[255].user;
875 *nice = corestat[255].nice;
876 *sys = corestat[255].system;
877 *idle = corestat[255].idle;
878 *iowait = corestat[255].iowait;
879}
880
881/* the heartbeat process produces a ping event once every X milliseconds */
882static MT_Id hbthread;
883static ATOMIC_TYPE hbrunning = ATOMIC_VAR_INIT(0);
884
885static void profilerHeartbeat(void *dummy)
886{
887 int t;
888 const int timeout = GDKdebug & FORCEMITOMASK ? 10 : 25;
889
890 (void) dummy;
891 for (;;) {
892 /* wait until you need this info */
893 MT_thread_setworking("sleeping");
894 while (ATOMIC_GET(&hbdelay) == 0 || maleventstream == NULL) {
895 if (GDKexiting() || !ATOMIC_GET(&hbrunning))
896 return;
897 MT_sleep_ms(timeout);
898 }
899 for (t = (int) ATOMIC_GET(&hbdelay); t > 0; t -= timeout) {
900 if (GDKexiting() || !ATOMIC_GET(&hbrunning))
901 return;
902 MT_sleep_ms(t > timeout ? timeout : t);
903 }
904 if (GDKexiting() || !ATOMIC_GET(&hbrunning))
905 return;
906 MT_thread_setworking("pinging");
907 profilerHeartbeatEvent( "ping");
908 }
909}
910
911void setHeartbeat(int delay)
912{
913 if (delay < 0 ){
914 ATOMIC_SET(&hbrunning, 0);
915 if (hbthread)
916 MT_join_thread(hbthread);
917 return;
918 }
919 if ( delay > 0 && delay <= 10)
920 delay = 10;
921 ATOMIC_SET(&hbdelay, delay);
922}
923
924/* TODO getprofilerlimit and setprofilerlimit functions */
925
926int getprofilerlimit(void)
927{
928 return 0;
929}
930
931void setprofilerlimit(int limit)
932{
933 (void) limit;
934}
935
936void initProfiler(void)
937{
938 gettimeofday(&startup_time, NULL);
939}
940
941void initHeartbeat(void)
942{
943 ATOMIC_SET(&hbrunning, 1);
944 if (MT_create_thread(&hbthread, profilerHeartbeat, NULL, MT_THR_JOINABLE,
945 "heartbeat") < 0) {
946 /* it didn't happen */
947 hbthread = 0;
948 ATOMIC_SET(&hbrunning, 0);
949 }
950}
951