| 1 | /* |
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public |
| 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 5 | * |
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
| 7 | */ |
| 8 | |
| 9 | /* (c) M.L. Kersten |
| 10 | * Performance tracing |
| 11 | * The stethoscope/tachograph and tomograph performance monitors have exclusive access |
| 12 | * to a single event stream, which avoids concurrency conflicts amongst clients. |
| 13 | * It also avoid cluthered event records on the stream. Since this event stream is owned |
| 14 | * by a client, we should ensure that the profiler is automatically |
| 15 | * reset once the owner leaves. |
| 16 | */ |
| 17 | #include "monetdb_config.h" |
| 18 | #include "mutils.h" /* mercurial_revision */ |
| 19 | #include "msabaoth.h" /* msab_getUUID */ |
| 20 | #include "mal_authorize.h" |
| 21 | #include "mal_function.h" |
| 22 | #include "mal_listing.h" |
| 23 | #include "mal_profiler.h" |
| 24 | #include "mal_runtime.h" |
| 25 | #include "mal_utils.h" |
| 26 | #include "mal_resource.h" |
| 27 | |
| 28 | #ifdef HAVE_SYS_TIME_H |
| 29 | #include <sys/time.h> |
| 30 | #endif |
| 31 | |
| 32 | #include <string.h> |
| 33 | |
| 34 | static str myname = 0; // avoid tracing the profiler module |
| 35 | |
| 36 | /* The JSON rendering can be either using '\n' separators between |
| 37 | * each key:value pair or as a single line. |
| 38 | * The current stethoscope implementation requires the first option and |
| 39 | * also the term rendering PRET to be set to '' |
| 40 | */ |
| 41 | |
| 42 | #define PRETTIFY "\n" |
| 43 | //#define PRETTIFY |
| 44 | |
| 45 | /* When the MAL block contains a BARRIER block we may end up with tons |
| 46 | * of profiler events. To avoid this, we stop emitting the events |
| 47 | * when we reached the HIGHWATERMARK. Leaving a message in the log. */ |
| 48 | #define HIGHWATERMARK 5 |
| 49 | |
| 50 | |
| 51 | int malProfileMode = 0; /* global flag to indicate profiling mode */ |
| 52 | static oid malprofileruser; /* keep track on who has claimed the channel */ |
| 53 | |
| 54 | static struct timeval startup_time; |
| 55 | |
| 56 | static ATOMIC_TYPE hbdelay = ATOMIC_VAR_INIT(0); |
| 57 | |
| 58 | #ifdef HAVE_SYS_RESOURCE_H |
| 59 | struct rusage infoUsage; |
| 60 | static struct rusage prevUsage; |
| 61 | #endif |
| 62 | |
| 63 | #define LOGLEN 8192 |
| 64 | #define lognew() loglen = 0; logbase = logbuffer; *logbase = 0; |
| 65 | |
| 66 | #define logadd(...) \ |
| 67 | do { \ |
| 68 | char tmp_buff[LOGLEN]; \ |
| 69 | int tmp_len = 0; \ |
| 70 | tmp_len = snprintf(tmp_buff, LOGLEN, __VA_ARGS__); \ |
| 71 | if (loglen + tmp_len < LOGLEN) \ |
| 72 | loglen += snprintf(logbase+loglen, LOGLEN - loglen, __VA_ARGS__); \ |
| 73 | else { \ |
| 74 | logjsonInternal(logbuffer); \ |
| 75 | lognew(); \ |
| 76 | loglen += snprintf(logbase+loglen, LOGLEN - loglen, __VA_ARGS__); \ |
| 77 | } \ |
| 78 | } while (0) |
| 79 | |
| 80 | // The heart beat events should be sent to all outstanding channels. |
| 81 | static void logjsonInternal(char *logbuffer) |
| 82 | { |
| 83 | size_t len; |
| 84 | len = strlen(logbuffer); |
| 85 | |
| 86 | MT_lock_set(&mal_profileLock); |
| 87 | if (maleventstream) { |
| 88 | // upon request the log record is sent over the profile stream |
| 89 | (void) mnstr_write(maleventstream, logbuffer, 1, len); |
| 90 | (void) mnstr_flush(maleventstream); |
| 91 | } |
| 92 | MT_lock_unset(&mal_profileLock); |
| 93 | } |
| 94 | |
| 95 | static char * |
| 96 | truncate_string(char *inp) |
| 97 | { |
| 98 | size_t len; |
| 99 | char *ret; |
| 100 | size_t ret_len = LOGLEN/2; |
| 101 | size_t padding = 64; |
| 102 | |
| 103 | len = strlen(inp); |
| 104 | ret = (char *)GDKmalloc(ret_len + 1); |
| 105 | if (ret == NULL) { |
| 106 | return NULL; |
| 107 | } |
| 108 | |
| 109 | snprintf(ret, ret_len + 1, "%.*s...<truncated>...%.*s" , |
| 110 | (int) (ret_len/2), inp, (int) (ret_len/2 - padding), |
| 111 | inp + (len - ret_len/2 + padding)); |
| 112 | |
| 113 | return ret; |
| 114 | } |
| 115 | |
| 116 | /* JSON rendering method of performance data. |
| 117 | * The eventparser may assume this layout for ease of parsing |
| 118 | EXAMPLE: |
| 119 | { |
| 120 | "event":6 , |
| 121 | "time":"15:37:13.799706", |
| 122 | "thread":3, |
| 123 | "function":"user.s3_1", |
| 124 | "pc":1, |
| 125 | "tag":10397, |
| 126 | "state":"start", |
| 127 | "usec":0, |
| 128 | "rss":215, |
| 129 | "size":0, |
| 130 | "oublock":8, |
| 131 | "stmt":"X_41=0@0:void := querylog.define(\"select count(*) from tables;\":str,\"default_pipe\":str,30:int);", |
| 132 | "short":"define( \"select count(*) from tables;\",\"default_pipe\",30 )", |
| 133 | "prereq":[] |
| 134 | } |
| 135 | */ |
| 136 | static void |
| 137 | renderProfilerEvent(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, int start) |
| 138 | { |
| 139 | char logbuffer[LOGLEN], *logbase; |
| 140 | size_t loglen; |
| 141 | str stmt, c; |
| 142 | str stmtq; |
| 143 | lng usec; |
| 144 | uint64_t microseconds; |
| 145 | |
| 146 | /* ignore generation of events for instructions that are called too often |
| 147 | * they may appear when BARRIER blocks are executed |
| 148 | * The default parameter should be sufficient for most practical cases. |
| 149 | */ |
| 150 | if( !start && pci->calls > HIGHWATERMARK){ |
| 151 | if( pci->calls == 10000 || pci->calls == 100000 || pci->calls == 1000000 || pci->calls == 10000000) |
| 152 | fprintf(stderr, "#Profiler too many calls %d\n" , pci->calls); |
| 153 | return; |
| 154 | } |
| 155 | |
| 156 | /* The stream of events can be complete read by the DBA, |
| 157 | * all other users can only see events assigned to their account |
| 158 | */ |
| 159 | if( malprofileruser!= MAL_ADMIN && malprofileruser != cntxt->user) |
| 160 | return; |
| 161 | |
| 162 | usec= GDKusec(); |
| 163 | microseconds = (uint64_t)usec - ((uint64_t)startup_time.tv_sec*1000000 - (uint64_t)startup_time.tv_usec); |
| 164 | /* make profile event tuple */ |
| 165 | lognew(); |
| 166 | logadd("{" PRETTIFY); // fill in later with the event counter |
| 167 | /* TODO: This could probably be optimized somehow to avoid the |
| 168 | * function call to mercurial_revision(). |
| 169 | */ |
| 170 | logadd("\"version\":\"" VERSION" (hg id: %s)\"," PRETTIFY, mercurial_revision()); |
| 171 | logadd("\"source\":\"trace\"," PRETTIFY); |
| 172 | |
| 173 | logadd("\"user_id\":" OIDFMT"," PRETTIFY, cntxt->user); |
| 174 | logadd("\"clk\":" LLFMT"," PRETTIFY, usec); |
| 175 | logadd("\"ctime\":%" PRIu64"," PRETTIFY, microseconds); |
| 176 | logadd("\"thread\":%d," PRETTIFY, THRgettid()); |
| 177 | |
| 178 | logadd("\"function\":\"%s.%s\"," PRETTIFY, getModuleId(getInstrPtr(mb, 0)), getFunctionId(getInstrPtr(mb, 0))); |
| 179 | logadd("\"pc\":%d," PRETTIFY, mb?getPC(mb,pci):0); |
| 180 | logadd("\"tag\":" OIDFMT"," PRETTIFY, stk?stk->tag:0); |
| 181 | logadd("\"module\":\"%s\"," PRETTIFY, pci->modname ? pci->modname : "" ); |
| 182 | if (pci->modname && strcmp(pci->modname, "user" ) == 0) { |
| 183 | oid caller_tag = 0; |
| 184 | if(stk && stk->up) { |
| 185 | caller_tag = stk->up->tag; |
| 186 | } |
| 187 | logadd("\"caller\":" OIDFMT"," PRETTIFY, caller_tag); |
| 188 | } |
| 189 | logadd("\"instruction\":\"%s\"," PRETTIFY, pci->fcnname ? pci->fcnname : "" ); |
| 190 | if (!GDKinmemory()) { |
| 191 | char *uuid; |
| 192 | if ((c = msab_getUUID(&uuid)) == NULL) { |
| 193 | logadd("\"session\":\"%s\"," PRETTIFY, uuid); |
| 194 | free(uuid); |
| 195 | } else |
| 196 | free(c); |
| 197 | } |
| 198 | |
| 199 | if( start){ |
| 200 | logadd("\"state\":\"start\"," PRETTIFY); |
| 201 | // determine the Estimated Time of Completion |
| 202 | if ( pci->calls){ |
| 203 | logadd("\"usec\":" LLFMT"," PRETTIFY, pci->totticks/pci->calls); |
| 204 | } else{ |
| 205 | logadd("\"usec\":" LLFMT"," PRETTIFY, pci->ticks); |
| 206 | } |
| 207 | } else { |
| 208 | logadd("\"state\":\"done\"," PRETTIFY); |
| 209 | logadd("\"usec\":" LLFMT"," PRETTIFY, pci->ticks); |
| 210 | } |
| 211 | logadd("\"rss\":%zu," PRETTIFY, MT_getrss()/1024/1024); |
| 212 | logadd("\"size\":" LLFMT "," PRETTIFY, pci? pci->wbytes/1024/1024:0); // result size |
| 213 | |
| 214 | #ifdef NUMAprofiling |
| 215 | logadd("\"numa\":[" ); |
| 216 | if(mb) |
| 217 | for( i= pci->retc ; i < pci->argc; i++) |
| 218 | if( !isVarConstant(mb, getArg(pci,i)) && mb->var[getArg(pci,i)]->worker) |
| 219 | logadd("%c %d" , (i?',':' '), mb->var[getArg(pci,i)]->worker); |
| 220 | logadd("]," PRETTIFY); |
| 221 | #endif |
| 222 | |
| 223 | #ifdef HAVE_SYS_RESOURCE_H |
| 224 | getrusage(RUSAGE_SELF, &infoUsage); |
| 225 | if(infoUsage.ru_inblock - prevUsage.ru_inblock) |
| 226 | logadd("\"inblock\":%ld," PRETTIFY, infoUsage.ru_inblock - prevUsage.ru_inblock); |
| 227 | if(infoUsage.ru_oublock - prevUsage.ru_oublock) |
| 228 | logadd("\"oublock\":%ld," PRETTIFY, infoUsage.ru_oublock - prevUsage.ru_oublock); |
| 229 | if(infoUsage.ru_majflt - prevUsage.ru_majflt) |
| 230 | logadd("\"majflt\":%ld," PRETTIFY, infoUsage.ru_majflt - prevUsage.ru_majflt); |
| 231 | if(infoUsage.ru_nswap - prevUsage.ru_nswap) |
| 232 | logadd("\"nswap\":%ld," PRETTIFY, infoUsage.ru_nswap - prevUsage.ru_nswap); |
| 233 | if(infoUsage.ru_nvcsw - prevUsage.ru_nvcsw) |
| 234 | logadd("\"nvcsw\":%ld," PRETTIFY, infoUsage.ru_nvcsw - prevUsage.ru_nvcsw +infoUsage.ru_nivcsw - prevUsage.ru_nivcsw); |
| 235 | prevUsage = infoUsage; |
| 236 | #endif |
| 237 | |
| 238 | if( mb){ |
| 239 | char prereq[BUFSIZ]; |
| 240 | size_t len; |
| 241 | int i,j,k,comma; |
| 242 | InstrPtr q; |
| 243 | char *truncated; |
| 244 | |
| 245 | /* generate actual call statement */ |
| 246 | stmt = instruction2str(mb, stk, pci, LIST_MAL_ALL); |
| 247 | if (stmt) { |
| 248 | c = stmt; |
| 249 | |
| 250 | while (*c && isspace((unsigned char)*c)) |
| 251 | c++; |
| 252 | if( *c){ |
| 253 | stmtq = mal_quote(c, strlen(c)); |
| 254 | if (stmtq && strlen(stmtq) > LOGLEN/2) { |
| 255 | truncated = truncate_string(stmtq); |
| 256 | GDKfree(stmtq); |
| 257 | stmtq = truncated; |
| 258 | } |
| 259 | if (stmtq != NULL) { |
| 260 | logadd("\"stmt\":\"%s\"," PRETTIFY, stmtq); |
| 261 | GDKfree(stmtq); |
| 262 | } |
| 263 | } |
| 264 | GDKfree(stmt); |
| 265 | } |
| 266 | |
| 267 | // ship the beautified version as well |
| 268 | |
| 269 | stmt = shortStmtRendering(mb, stk, pci); |
| 270 | stmtq = mal_quote(stmt, strlen(stmt)); |
| 271 | if (stmtq && strlen(stmtq) > LOGLEN/2) { |
| 272 | truncated = truncate_string(stmtq); |
| 273 | GDKfree(stmtq); |
| 274 | stmtq = truncated; |
| 275 | } |
| 276 | if (stmtq != NULL) { |
| 277 | logadd("\"short\":\"%s\"," PRETTIFY, stmtq); |
| 278 | GDKfree(stmtq); |
| 279 | } |
| 280 | GDKfree(stmt); |
| 281 | |
| 282 | |
| 283 | // collect the prerequisite pre-requisite statements |
| 284 | prereq[0]='['; |
| 285 | prereq[1]=0; |
| 286 | len = 1; |
| 287 | comma=0; |
| 288 | for(i= pci->retc; i < pci->argc; i++){ |
| 289 | for( j = pci->pc-1; j > 0; j--){ |
| 290 | q= getInstrPtr(mb,j); |
| 291 | for( k=0; k < q->retc; k++) |
| 292 | if( getArg(q,k) == getArg(pci,i)) |
| 293 | break; |
| 294 | if( k < q->retc){ |
| 295 | snprintf(prereq + len, BUFSIZ-len,"%s%d" , (comma?"," :"" ), j); |
| 296 | len = strlen(prereq); |
| 297 | comma++; |
| 298 | break; |
| 299 | } |
| 300 | } |
| 301 | } |
| 302 | #define MALARGUMENTDETAILS |
| 303 | #ifdef MALARGUMENTDETAILS |
| 304 | logadd("\"prereq\":%s]," PRETTIFY, prereq); |
| 305 | #else |
| 306 | logadd("\"prereq\":%s]" PRETTIFY, prereq); |
| 307 | #endif |
| 308 | |
| 309 | /* EXAMPLE MAL statement argument decomposition |
| 310 | * The eventparser may assume this layout for ease of parsing |
| 311 | { |
| 312 | ... as above ... |
| 313 | "result":{"clk":"173297139,"pc":1,"index":0,,"name":"X_6","type":"void","value":"0@0","eol":0} |
| 314 | ... |
| 315 | "argument":{"clk":173297139,"pc":1,"index":"2","type":"str","value":"\"default_pipe\"","eol":0}, |
| 316 | } |
| 317 | This information can be used to determine memory footprint and variable life times. |
| 318 | */ |
| 319 | |
| 320 | #define PRET |
| 321 | #ifdef MALARGUMENTDETAILS |
| 322 | // Also show details of the arguments for modelling |
| 323 | if(mb){ |
| 324 | logadd("\"ret\":[" ); |
| 325 | for( j=0; j< pci->argc; j++){ |
| 326 | int tpe = getVarType(mb, getArg(pci,j)); |
| 327 | str tname = 0, cv; |
| 328 | lng total = 0; |
| 329 | BUN cnt = 0; |
| 330 | bat bid=0; |
| 331 | |
| 332 | if( j == pci->retc ){ |
| 333 | logadd("]," PRETTIFY"\"arg\":[" ); |
| 334 | } |
| 335 | logadd("{" ); |
| 336 | logadd("\"index\":%d," PRET, j); |
| 337 | logadd("\"name\":\"%s\"," PRET, getVarName(mb, getArg(pci,j))); |
| 338 | if( getVarSTC(mb,getArg(pci,j))){ |
| 339 | InstrPtr stc = getInstrPtr(mb, getVarSTC(mb,getArg(pci,j))); |
| 340 | if(stc && strcmp(getModuleId(stc),"sql" ) ==0 && strncmp(getFunctionId(stc),"bind" ,4)==0) |
| 341 | logadd("\"alias\":\"%s.%s.%s\"," PRET, |
| 342 | getVarConstant(mb, getArg(stc,stc->retc +1)).val.sval, |
| 343 | getVarConstant(mb, getArg(stc,stc->retc +2)).val.sval, |
| 344 | getVarConstant(mb, getArg(stc,stc->retc +3)).val.sval); |
| 345 | } |
| 346 | if( isaBatType(tpe) ){ |
| 347 | BAT *d= BATdescriptor( bid = stk->stk[getArg(pci,j)].val.bval); |
| 348 | tname = getTypeName(getBatType(tpe)); |
| 349 | logadd("\"type\":\"bat[:%s]\"," PRET, tname); |
| 350 | if( d) { |
| 351 | BAT *v; |
| 352 | cnt = BATcount(d); |
| 353 | if( isVIEW(d)){ |
| 354 | logadd("\"view\":\"true\"," PRET); |
| 355 | logadd("\"parent\":%d," PRET, VIEWtparent(d)); |
| 356 | logadd("\"seqbase\":" BUNFMT"," PRET, d->hseqbase); |
| 357 | logadd("\"hghbase\":" BUNFMT"," PRET, d->hseqbase + cnt); |
| 358 | v= BBPquickdesc(VIEWtparent(d), false); |
| 359 | logadd("\"kind\":\"%s\"," PRET, (v && !v->batTransient ? "persistent" : "transient" )); |
| 360 | } else |
| 361 | logadd("\"kind\":\"%s\"," PRET, ( d->batTransient ? "transient" : "persistent" )); |
| 362 | total += cnt * d->twidth; |
| 363 | total += heapinfo(d->tvheap, d->batCacheid); |
| 364 | total += hashinfo(d->thash, d->batCacheid); |
| 365 | total += IMPSimprintsize(d); |
| 366 | BBPunfix(d->batCacheid); |
| 367 | } |
| 368 | logadd("\"bid\":%d," PRET, bid); |
| 369 | logadd("\"count\":" BUNFMT"," PRET, cnt); |
| 370 | logadd("\"size\":" LLFMT"," PRET, total); |
| 371 | } else{ |
| 372 | char *truncated = NULL; |
| 373 | tname = getTypeName(tpe); |
| 374 | logadd("\"type\":\"%s\"," PRET, tname); |
| 375 | cv = VALformat(&stk->stk[getArg(pci,j)]); |
| 376 | stmtq = cv ? mal_quote(cv, strlen(cv)) : NULL; |
| 377 | if (stmtq != NULL && strlen(stmtq) > LOGLEN/2) { |
| 378 | truncated = truncate_string(stmtq); |
| 379 | GDKfree(stmtq); |
| 380 | stmtq = truncated; |
| 381 | } |
| 382 | if (stmtq == NULL) { |
| 383 | logadd("\"value\":\"(null)\"," PRET); |
| 384 | } else { |
| 385 | logadd("\"value\":\"%s\"," , stmtq); |
| 386 | } |
| 387 | GDKfree(cv); |
| 388 | GDKfree(stmtq); |
| 389 | } |
| 390 | logadd("\"eol\":%d" PRET, getVarEolife(mb,getArg(pci,j))); |
| 391 | logadd("\"used\":%d" PRET, isVarUsed(mb,getArg(pci,j))); |
| 392 | logadd("\"fixed\":%d" PRET, isVarFixed(mb,getArg(pci,j))); |
| 393 | logadd("\"udf\":%d" PRET, isVarUDFtype(mb,getArg(pci,j))); |
| 394 | GDKfree(tname); |
| 395 | logadd("}%s" , (j< pci->argc-1 && j != pci->retc -1?"," :"" )); |
| 396 | } |
| 397 | logadd("]" PRETTIFY); // end marker for arguments |
| 398 | } |
| 399 | } |
| 400 | #endif |
| 401 | logadd("}\n" ); // end marker |
| 402 | logjsonInternal(logbuffer); |
| 403 | } |
| 404 | |
| 405 | /* the OS details on cpu load are read from /proc/stat |
| 406 | * We should use an OS define to react to the maximal cores |
| 407 | */ |
| 408 | |
| 409 | static struct{ |
| 410 | lng user, nice, system, idle, iowait; |
| 411 | double load; |
| 412 | } corestat[256]; |
| 413 | |
| 414 | static int |
| 415 | getCPULoad(char cpuload[BUFSIZ]){ |
| 416 | int cpu, len = 0, i; |
| 417 | lng user, nice, system, idle, iowait; |
| 418 | size_t n; |
| 419 | char buf[BUFSIZ+1], *s; |
| 420 | static FILE *proc= NULL; |
| 421 | lng newload; |
| 422 | |
| 423 | if (proc == NULL || ferror(proc)) |
| 424 | proc = fopen("/proc/stat" ,"r" ); |
| 425 | else rewind(proc); |
| 426 | if (proc == NULL) { |
| 427 | /* unexpected */ |
| 428 | return -1; |
| 429 | } |
| 430 | /* read complete file to avoid concurrent write issues */ |
| 431 | if ((n = fread(buf, 1, BUFSIZ,proc)) == 0 ) |
| 432 | return -1; |
| 433 | buf[n] = 0; |
| 434 | for (s= buf; *s; s++) { |
| 435 | if (strncmp(s,"cpu" ,3)== 0){ |
| 436 | s +=3; |
| 437 | if (*s == ' ') { |
| 438 | s++; |
| 439 | cpu = 255; // the cpu totals stored here |
| 440 | } else { |
| 441 | cpu = atoi(s); |
| 442 | if (cpu < 0 || cpu > 255) |
| 443 | cpu = 255; |
| 444 | } |
| 445 | s= strchr(s,' '); |
| 446 | if (s == NULL) /* unexpected format of file */ |
| 447 | break; |
| 448 | |
| 449 | while( *s && isspace((unsigned char)*s)) s++; |
| 450 | i= sscanf(s,LLSCN" " LLSCN" " LLSCN" " LLSCN" " LLSCN, &user, &nice, &system, &idle, &iowait); |
| 451 | if ( i != 5 ) |
| 452 | goto skip; |
| 453 | newload = (user - corestat[cpu].user + nice - corestat[cpu].nice + system - corestat[cpu].system); |
| 454 | if ( newload) |
| 455 | corestat[cpu].load = (double) newload / (newload + idle - corestat[cpu].idle + iowait - corestat[cpu].iowait); |
| 456 | corestat[cpu].user = user; |
| 457 | corestat[cpu].nice = nice; |
| 458 | corestat[cpu].system = system; |
| 459 | corestat[cpu].idle = idle; |
| 460 | corestat[cpu].iowait = iowait; |
| 461 | } |
| 462 | skip: |
| 463 | while (*s && *s != '\n') |
| 464 | s++; |
| 465 | } |
| 466 | |
| 467 | if(cpuload == 0) |
| 468 | return 0; |
| 469 | // identify core processing |
| 470 | len += snprintf(cpuload, BUFSIZ, "[" ); |
| 471 | for (cpu = 0; cpuload && cpu < 255 && corestat[cpu].user; cpu++) { |
| 472 | len +=snprintf(cpuload + len, BUFSIZ - len, "%s%.2f" , (cpu?"," :"" ), corestat[cpu].load); |
| 473 | } |
| 474 | (void) snprintf(cpuload + len, BUFSIZ - len, "]" ); |
| 475 | return 0; |
| 476 | } |
| 477 | |
| 478 | void |
| 479 | profilerHeartbeatEvent(char *alter) |
| 480 | { |
| 481 | char cpuload[BUFSIZ]; |
| 482 | char logbuffer[LOGLEN], *logbase; |
| 483 | int loglen; |
| 484 | lng usec; |
| 485 | uint64_t microseconds; |
| 486 | |
| 487 | if (ATOMIC_GET(&hbdelay) == 0 || maleventstream == 0) |
| 488 | return; |
| 489 | usec = GDKusec(); |
| 490 | microseconds = (uint64_t)startup_time.tv_sec*1000000 + (uint64_t)startup_time.tv_usec + (uint64_t)usec; |
| 491 | |
| 492 | /* get CPU load on beat boundaries only */ |
| 493 | if ( getCPULoad(cpuload) ) |
| 494 | return; |
| 495 | |
| 496 | lognew(); |
| 497 | logadd("{" PRETTIFY); // fill in later with the event counter |
| 498 | logadd("\"source\":\"heartbeat\"," PRETTIFY); |
| 499 | if (GDKinmemory()) { |
| 500 | char *uuid, *err; |
| 501 | if ((err = msab_getUUID(&uuid)) == NULL) { |
| 502 | logadd("\"session\":\"%s\"," PRETTIFY, uuid); |
| 503 | free(uuid); |
| 504 | } else |
| 505 | free(err); |
| 506 | } |
| 507 | logadd("\"clk\":" LLFMT"," PRETTIFY,usec); |
| 508 | logadd("\"ctime\":%" PRIu64"," PRETTIFY, microseconds); |
| 509 | logadd("\"rss\":%zu," PRETTIFY, MT_getrss()/1024/1024); |
| 510 | #ifdef HAVE_SYS_RESOURCE_H |
| 511 | getrusage(RUSAGE_SELF, &infoUsage); |
| 512 | if(infoUsage.ru_inblock - prevUsage.ru_inblock) |
| 513 | logadd("\"inblock\":%ld," PRETTIFY, infoUsage.ru_inblock - prevUsage.ru_inblock); |
| 514 | if(infoUsage.ru_oublock - prevUsage.ru_oublock) |
| 515 | logadd("\"oublock\":%ld," PRETTIFY, infoUsage.ru_oublock - prevUsage.ru_oublock); |
| 516 | if(infoUsage.ru_majflt - prevUsage.ru_majflt) |
| 517 | logadd("\"majflt\":%ld," PRETTIFY, infoUsage.ru_majflt - prevUsage.ru_majflt); |
| 518 | if(infoUsage.ru_nswap - prevUsage.ru_nswap) |
| 519 | logadd("\"nswap\":%ld," PRETTIFY, infoUsage.ru_nswap - prevUsage.ru_nswap); |
| 520 | if(infoUsage.ru_nvcsw - prevUsage.ru_nvcsw) |
| 521 | logadd("\"nvcsw\":%ld," PRETTIFY, infoUsage.ru_nvcsw - prevUsage.ru_nvcsw +infoUsage.ru_nivcsw - prevUsage.ru_nivcsw); |
| 522 | prevUsage = infoUsage; |
| 523 | #endif |
| 524 | logadd("\"state\":\"%s\"," PRETTIFY,alter); |
| 525 | logadd("\"cpuload\":%s" PRETTIFY,cpuload); |
| 526 | logadd("}\n" ); // end marker |
| 527 | logjsonInternal(logbuffer); |
| 528 | } |
| 529 | |
| 530 | void |
| 531 | profilerEvent(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, int start) |
| 532 | { |
| 533 | (void) cntxt; |
| 534 | if (stk == NULL) return; |
| 535 | if (pci == NULL) return; |
| 536 | if (getModuleId(pci) == myname) // ignore profiler commands from monitoring |
| 537 | return; |
| 538 | |
| 539 | if( maleventstream) { |
| 540 | renderProfilerEvent(cntxt, mb, stk, pci, start); |
| 541 | if ( !start && pci->pc ==0) |
| 542 | profilerHeartbeatEvent("ping" ); |
| 543 | if ( start && pci->token == ENDsymbol) |
| 544 | profilerHeartbeatEvent("ping" ); |
| 545 | } |
| 546 | } |
| 547 | |
| 548 | /* The first scheme dumps the events on a stream (and in the pool) |
| 549 | */ |
| 550 | str |
| 551 | openProfilerStream(Client cntxt) |
| 552 | { |
| 553 | int j; |
| 554 | |
| 555 | #ifdef HAVE_SYS_RESOURCE_H |
| 556 | getrusage(RUSAGE_SELF, &infoUsage); |
| 557 | prevUsage = infoUsage; |
| 558 | #endif |
| 559 | if (myname == 0){ |
| 560 | myname = putName("profiler" ); |
| 561 | logjsonInternal(monet_characteristics); |
| 562 | } |
| 563 | if( maleventstream){ |
| 564 | /* The DBA can always grab the stream, others have to wait */ |
| 565 | if ( cntxt->user == MAL_ADMIN) |
| 566 | closeProfilerStream(cntxt); |
| 567 | else |
| 568 | throw(MAL,"profiler.start" ,"Profiler already running, stream not available" ); |
| 569 | } |
| 570 | malProfileMode = -1; |
| 571 | maleventstream = cntxt->fdout; |
| 572 | malprofileruser = cntxt->user; |
| 573 | |
| 574 | // Ignore the JSON rendering mode, use compiled time version |
| 575 | |
| 576 | /* show all in progress instructions for stethoscope startup */ |
| 577 | /* this code is not thread safe, because the inprogress administration may change concurrently */ |
| 578 | MT_lock_set(&mal_delayLock); |
| 579 | for(j = 0; j <THREADS; j++) |
| 580 | if( workingset[j].mb) |
| 581 | /* show the event */ |
| 582 | profilerEvent(workingset[j].cntxt, workingset[j].mb, workingset[j].stk, workingset[j].pci, 1); |
| 583 | MT_lock_unset(&mal_delayLock); |
| 584 | return MAL_SUCCEED; |
| 585 | } |
| 586 | |
| 587 | str |
| 588 | closeProfilerStream(Client cntxt) |
| 589 | { |
| 590 | (void) cntxt; |
| 591 | maleventstream = NULL; |
| 592 | malProfileMode = 0; |
| 593 | malprofileruser = 0; |
| 594 | return MAL_SUCCEED; |
| 595 | } |
| 596 | |
| 597 | /* the second scheme is to collect the profile |
| 598 | * events in a local table for direct SQL inspection |
| 599 | */ |
| 600 | str |
| 601 | startProfiler(Client cntxt) |
| 602 | { |
| 603 | #ifdef HAVE_SYS_RESOURCE_H |
| 604 | getrusage(RUSAGE_SELF, &infoUsage); |
| 605 | prevUsage = infoUsage; |
| 606 | #endif |
| 607 | (void) cntxt; |
| 608 | |
| 609 | if( maleventstream){ |
| 610 | throw(MAL,"profiler.start" ,"Profiler already running, stream not available" ); |
| 611 | } |
| 612 | MT_lock_set(&mal_profileLock ); |
| 613 | if (myname == 0){ |
| 614 | myname = putName("profiler" ); |
| 615 | } |
| 616 | malProfileMode = 1; |
| 617 | MT_lock_unset(&mal_profileLock); |
| 618 | logjsonInternal(monet_characteristics); |
| 619 | // reset the trace table |
| 620 | clearTrace(cntxt); |
| 621 | |
| 622 | return MAL_SUCCEED; |
| 623 | } |
| 624 | |
| 625 | /* SQL tracing is simplified, because it only collects the events in the temporary table. |
| 626 | */ |
| 627 | str |
| 628 | startTrace(Client cntxt) |
| 629 | { |
| 630 | cntxt->sqlprofiler = TRUE; |
| 631 | clearTrace(cntxt); |
| 632 | return MAL_SUCCEED; |
| 633 | } |
| 634 | |
| 635 | str |
| 636 | stopTrace(Client cntxt) |
| 637 | { |
| 638 | cntxt->sqlprofiler = FALSE; |
| 639 | return MAL_SUCCEED; |
| 640 | } |
| 641 | |
| 642 | str |
| 643 | stopProfiler(Client cntxt) |
| 644 | { |
| 645 | MT_lock_set(&mal_profileLock); |
| 646 | malProfileMode = 0; |
| 647 | setHeartbeat(0); // stop heartbeat |
| 648 | if( cntxt) |
| 649 | closeProfilerStream(cntxt); |
| 650 | MT_lock_unset(&mal_profileLock); |
| 651 | return MAL_SUCCEED; |
| 652 | } |
| 653 | |
| 654 | /* |
| 655 | * SQL profile traces |
| 656 | * The events being captured are stored in client specific BATs. |
| 657 | * They are made persistent to accumate information over |
| 658 | * multiple sessions. This means it has to be explicitly reset |
| 659 | * to avoid disc overflow using profiler.reset(). |
| 660 | * |
| 661 | * The information returned for the trace is purposely limited |
| 662 | * to the ticks and the final MAL instruction. |
| 663 | * For more detailed analysis, the stethoscope should be used. |
| 664 | */ |
| 665 | |
| 666 | static void |
| 667 | _cleanupProfiler(Client cntxt) |
| 668 | { |
| 669 | BBPunfix(cntxt->profticks->batCacheid); |
| 670 | BBPunfix(cntxt->profstmt->batCacheid); |
| 671 | cntxt->profticks = cntxt->profstmt = NULL; |
| 672 | } |
| 673 | |
| 674 | static BAT * |
| 675 | TRACEcreate(int tt) |
| 676 | { |
| 677 | BAT *b; |
| 678 | |
| 679 | b = COLnew(0, tt, 1 << 10, TRANSIENT); |
| 680 | if (b == NULL) |
| 681 | return NULL; |
| 682 | return b; |
| 683 | } |
| 684 | |
| 685 | static void |
| 686 | initTrace(Client cntxt) |
| 687 | { |
| 688 | MT_lock_set(&mal_profileLock); |
| 689 | if (cntxt->profticks) { |
| 690 | MT_lock_unset(&mal_profileLock); |
| 691 | return; /* already initialized */ |
| 692 | } |
| 693 | cntxt->profticks = TRACEcreate( TYPE_lng); |
| 694 | cntxt->profstmt = TRACEcreate( TYPE_str); |
| 695 | if ( cntxt->profticks == NULL || cntxt->profstmt == NULL ) |
| 696 | _cleanupProfiler(cntxt); |
| 697 | MT_lock_unset(&mal_profileLock); |
| 698 | } |
| 699 | |
| 700 | int |
| 701 | TRACEtable(Client cntxt, BAT **r) |
| 702 | { |
| 703 | initTrace(cntxt); |
| 704 | MT_lock_set(&mal_profileLock); |
| 705 | if (cntxt->profticks == NULL) { |
| 706 | MT_lock_unset(&mal_profileLock); |
| 707 | return -1; /* not initialized */ |
| 708 | } |
| 709 | r[0] = COLcopy(cntxt->profticks, cntxt->profticks->ttype, false, TRANSIENT); |
| 710 | r[1] = COLcopy(cntxt->profstmt, cntxt->profstmt->ttype, false, TRANSIENT); |
| 711 | MT_lock_unset(&mal_profileLock); |
| 712 | return 2; |
| 713 | } |
| 714 | |
| 715 | BAT * |
| 716 | getTrace(Client cntxt, const char *nme) |
| 717 | { |
| 718 | BAT *bn = NULL; |
| 719 | |
| 720 | MT_lock_set(&mal_profileLock); |
| 721 | if (cntxt->profticks) { |
| 722 | if (strcmp(nme, "usec" ) == 0){ |
| 723 | bn = COLcopy(cntxt->profticks, cntxt->profticks->ttype, false, TRANSIENT); |
| 724 | } else if (strcmp(nme, "stmt" ) == 0){ |
| 725 | bn = COLcopy(cntxt->profstmt, cntxt->profstmt->ttype, false, TRANSIENT); |
| 726 | } |
| 727 | } |
| 728 | MT_lock_unset(&mal_profileLock); |
| 729 | return bn; |
| 730 | } |
| 731 | |
| 732 | void |
| 733 | clearTrace(Client cntxt) |
| 734 | { |
| 735 | (void) cntxt; |
| 736 | MT_lock_set(&mal_profileLock); |
| 737 | if (cntxt->profticks == NULL) { |
| 738 | MT_lock_unset(&mal_profileLock); |
| 739 | initTrace(cntxt); |
| 740 | return; /* not initialized */ |
| 741 | } |
| 742 | /* drop all trace tables */ |
| 743 | _cleanupProfiler(cntxt); |
| 744 | MT_lock_unset(&mal_profileLock); |
| 745 | initTrace(cntxt); |
| 746 | } |
| 747 | |
| 748 | str |
| 749 | cleanupTraces(Client cntxt) |
| 750 | { |
| 751 | clearTrace(cntxt); |
| 752 | return MAL_SUCCEED; |
| 753 | } |
| 754 | |
| 755 | void |
| 756 | sqlProfilerEvent(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) |
| 757 | { |
| 758 | str stmt, c; |
| 759 | int errors = 0; |
| 760 | |
| 761 | if (cntxt->profticks == NULL) |
| 762 | return; |
| 763 | |
| 764 | /* generate actual call statement */ |
| 765 | stmt = instruction2str(mb, stk, pci, LIST_MAL_ALL); |
| 766 | c = stmt; |
| 767 | |
| 768 | while (c && *c && (isspace((unsigned char)*c) || *c == '!')) |
| 769 | c++; |
| 770 | |
| 771 | // keep it a short transaction |
| 772 | MT_lock_set(&mal_profileLock); |
| 773 | if (cntxt->profticks == NULL) { |
| 774 | MT_lock_unset(&mal_profileLock); |
| 775 | return; |
| 776 | } |
| 777 | errors += BUNappend(cntxt->profticks, &pci->ticks, false) != GDK_SUCCEED; |
| 778 | errors += BUNappend(cntxt->profstmt, c, false) != GDK_SUCCEED; |
| 779 | if (errors > 0) { |
| 780 | /* stop profiling if an error occurred */ |
| 781 | cntxt->sqlprofiler = FALSE; |
| 782 | } |
| 783 | |
| 784 | MT_lock_unset(&mal_profileLock); |
| 785 | GDKfree(stmt); |
| 786 | } |
| 787 | |
| 788 | lng |
| 789 | getDiskWrites(void) |
| 790 | { |
| 791 | #ifdef HAVE_SYS_RESOURCE_H |
| 792 | struct rusage infoUsage; |
| 793 | getrusage(RUSAGE_SELF, &infoUsage); |
| 794 | return infoUsage.ru_oublock; |
| 795 | #else |
| 796 | return 0; |
| 797 | #endif |
| 798 | } |
| 799 | |
| 800 | lng |
| 801 | getDiskReads(void) |
| 802 | { |
| 803 | #ifdef HAVE_SYS_RESOURCE_H |
| 804 | struct rusage infoUsage; |
| 805 | getrusage(RUSAGE_SELF, &infoUsage); |
| 806 | return infoUsage.ru_inblock; |
| 807 | #else |
| 808 | return 0; |
| 809 | #endif |
| 810 | } |
| 811 | |
| 812 | lng |
| 813 | getUserTime(void) |
| 814 | { |
| 815 | #ifdef HAVE_TIMES |
| 816 | struct tms newTms; |
| 817 | times(&newTms); |
| 818 | return newTms.tms_utime; |
| 819 | #else |
| 820 | return 0; |
| 821 | #endif |
| 822 | } |
| 823 | |
| 824 | lng |
| 825 | getSystemTime(void) |
| 826 | { |
| 827 | #ifdef HAVE_TIMES |
| 828 | struct tms newTms; |
| 829 | times(&newTms); |
| 830 | return newTms.tms_stime; |
| 831 | #else |
| 832 | return 0; |
| 833 | #endif |
| 834 | } |
| 835 | |
| 836 | /* Calculate a pessimistic size of the disk storage */ |
| 837 | lng |
| 838 | getDiskSpace(void) |
| 839 | { |
| 840 | BAT *b; |
| 841 | bat i; |
| 842 | lng size = 0; |
| 843 | |
| 844 | for (i = 1; i < getBBPsize(); i++) |
| 845 | if (BBP_logical(i) && (BBP_refs(i) || BBP_lrefs(i))) { |
| 846 | b = BATdescriptor(i); |
| 847 | if (b) { |
| 848 | size += sizeof(BAT); |
| 849 | if (!isVIEW(b)) { |
| 850 | BUN cnt = BATcount(b); |
| 851 | |
| 852 | size += tailsize(b, cnt); |
| 853 | /* the upperbound is used for the heaps */ |
| 854 | if (b->tvheap) |
| 855 | size += HEAPvmsize(b->tvheap); |
| 856 | if (b->thash) |
| 857 | size += sizeof(BUN) * cnt; |
| 858 | /* also add the size of an imprint, ordered index or mosaic */ |
| 859 | if( b->timprints) |
| 860 | size += IMPSimprintsize(b); |
| 861 | if( b->torderidx) |
| 862 | size += HEAPvmsize(b->torderidx); |
| 863 | } |
| 864 | BBPunfix(i); |
| 865 | } |
| 866 | } |
| 867 | return size; |
| 868 | } |
| 869 | |
| 870 | |
| 871 | void profilerGetCPUStat(lng *user, lng *nice, lng *sys, lng *idle, lng *iowait) |
| 872 | { |
| 873 | (void) getCPULoad(0); |
| 874 | *user = corestat[255].user; |
| 875 | *nice = corestat[255].nice; |
| 876 | *sys = corestat[255].system; |
| 877 | *idle = corestat[255].idle; |
| 878 | *iowait = corestat[255].iowait; |
| 879 | } |
| 880 | |
| 881 | /* the heartbeat process produces a ping event once every X milliseconds */ |
| 882 | static MT_Id hbthread; |
| 883 | static ATOMIC_TYPE hbrunning = ATOMIC_VAR_INIT(0); |
| 884 | |
| 885 | static void profilerHeartbeat(void *dummy) |
| 886 | { |
| 887 | int t; |
| 888 | const int timeout = GDKdebug & FORCEMITOMASK ? 10 : 25; |
| 889 | |
| 890 | (void) dummy; |
| 891 | for (;;) { |
| 892 | /* wait until you need this info */ |
| 893 | MT_thread_setworking("sleeping" ); |
| 894 | while (ATOMIC_GET(&hbdelay) == 0 || maleventstream == NULL) { |
| 895 | if (GDKexiting() || !ATOMIC_GET(&hbrunning)) |
| 896 | return; |
| 897 | MT_sleep_ms(timeout); |
| 898 | } |
| 899 | for (t = (int) ATOMIC_GET(&hbdelay); t > 0; t -= timeout) { |
| 900 | if (GDKexiting() || !ATOMIC_GET(&hbrunning)) |
| 901 | return; |
| 902 | MT_sleep_ms(t > timeout ? timeout : t); |
| 903 | } |
| 904 | if (GDKexiting() || !ATOMIC_GET(&hbrunning)) |
| 905 | return; |
| 906 | MT_thread_setworking("pinging" ); |
| 907 | profilerHeartbeatEvent( "ping" ); |
| 908 | } |
| 909 | } |
| 910 | |
| 911 | void setHeartbeat(int delay) |
| 912 | { |
| 913 | if (delay < 0 ){ |
| 914 | ATOMIC_SET(&hbrunning, 0); |
| 915 | if (hbthread) |
| 916 | MT_join_thread(hbthread); |
| 917 | return; |
| 918 | } |
| 919 | if ( delay > 0 && delay <= 10) |
| 920 | delay = 10; |
| 921 | ATOMIC_SET(&hbdelay, delay); |
| 922 | } |
| 923 | |
| 924 | /* TODO getprofilerlimit and setprofilerlimit functions */ |
| 925 | |
| 926 | int getprofilerlimit(void) |
| 927 | { |
| 928 | return 0; |
| 929 | } |
| 930 | |
| 931 | void setprofilerlimit(int limit) |
| 932 | { |
| 933 | (void) limit; |
| 934 | } |
| 935 | |
| 936 | void initProfiler(void) |
| 937 | { |
| 938 | gettimeofday(&startup_time, NULL); |
| 939 | } |
| 940 | |
| 941 | void initHeartbeat(void) |
| 942 | { |
| 943 | ATOMIC_SET(&hbrunning, 1); |
| 944 | if (MT_create_thread(&hbthread, profilerHeartbeat, NULL, MT_THR_JOINABLE, |
| 945 | "heartbeat" ) < 0) { |
| 946 | /* it didn't happen */ |
| 947 | hbthread = 0; |
| 948 | ATOMIC_SET(&hbrunning, 0); |
| 949 | } |
| 950 | } |
| 951 | |