1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9#include "capi.h"
10#include "cheader.h"
11#include "cheader.text.h"
12
13#include "mtime.h"
14#include "blob.h"
15
16#include <setjmp.h>
17#include <signal.h>
18#include <sys/mman.h>
19#include <unistd.h>
20#include <string.h>
21
22#if defined(__GNUC__) && !defined(__clang__)
23#pragma GCC diagnostic ignored "-Wclobbered"
24#endif
25
26const char *mprotect_enableflag = "enable_mprotect";
27static bool option_enable_mprotect = false;
28const char *longjmp_enableflag = "enable_longjmp";
29static bool option_enable_longjmp = false;
30
31struct _allocated_region;
32typedef struct _allocated_region {
33 struct _allocated_region *next;
34} allocated_region;
35
36struct _mprotected_region;
37typedef struct _mprotected_region {
38 void *addr;
39 size_t len;
40
41 struct _mprotected_region *next;
42} mprotected_region;
43
44static char *mprotect_region(void *addr, size_t len,
45 mprotected_region **regions);
46static char *clear_mprotect(void *addr, size_t len);
47
48static allocated_region *allocated_regions[THREADS];
49static jmp_buf jump_buffer[THREADS];
50
51typedef char *(*jitted_function)(void **inputs, void **outputs,
52 malloc_function_ptr malloc, free_function_ptr free);
53
54struct _cached_functions;
55typedef struct _cached_functions {
56 jitted_function function;
57 BUN expression_hash;
58 char *parameters;
59 void *dll_handle;
60 struct _cached_functions *next;
61} cached_functions;
62
63#define FUNCTION_CACHE_SIZE 128
64
65static cached_functions *function_cache[FUNCTION_CACHE_SIZE];
66static MT_Lock cache_lock = MT_LOCK_INITIALIZER("cache_lock");
67static int cudf_initialized = 0;
68
69static str CUDFeval(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
70 bool grouped);
71
72str CUDFevalStd(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
73{
74 return CUDFeval(cntxt, mb, stk, pci, false);
75}
76
77str CUDFevalAggr(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
78{
79 return CUDFeval(cntxt, mb, stk, pci, true);
80}
81
82str CUDFprelude(void *ret)
83{
84 (void)ret;
85 if (!cudf_initialized) {
86 cudf_initialized = true;
87 option_enable_mprotect = GDKgetenv_istrue(mprotect_enableflag) || GDKgetenv_isyes(mprotect_enableflag);
88 option_enable_longjmp = GDKgetenv_istrue(longjmp_enableflag) || GDKgetenv_isyes(longjmp_enableflag);
89 }
90 return MAL_SUCCEED;
91}
92
93static bool WriteDataToFile(FILE *f, const void *data, size_t data_size)
94{
95 fwrite(data, data_size, 1, f);
96 return (!ferror(f));
97}
98
99static bool WriteTextToFile(FILE *f, const char *data)
100{
101 return WriteDataToFile(f, data, strlen(data));
102}
103
104static void handler(int sig, siginfo_t *si, void *unused)
105{
106 int tid = THRgettid();
107
108 (void)sig;
109 (void)si;
110 (void)unused;
111
112 longjmp(jump_buffer[tid], 1);
113}
114
115static bool can_mprotect_region(void* addr) {
116 if (!option_enable_mprotect) return false;
117 int pagesize = getpagesize();
118 void* page_begin = (void *)((size_t)addr - (size_t)addr % pagesize);
119 return page_begin == addr;
120}
121
122static char *mprotect_region(void *addr, size_t len,
123 mprotected_region **regions)
124{
125 mprotected_region *region;
126 if (len == 0)
127 return NULL;
128
129 assert(can_mprotect_region(addr));
130
131 region = GDKmalloc(sizeof(mprotected_region));
132 if (!region) {
133 return MAL_MALLOC_FAIL;
134 }
135 region->addr = addr;
136 region->len = len;
137 region->next = *regions;
138 *regions = region;
139 return NULL;
140}
141
142static char *clear_mprotect(void *addr, size_t len)
143{
144 if (!addr)
145 return NULL;
146
147 if (mprotect(addr, len, PROT_READ | PROT_WRITE) < 0) {
148 return strerror(errno);
149 }
150 return NULL;
151}
152
153#define ATTEMPT_TO_WRITE_TO_FILE(f, data) \
154 if (!WriteTextToFile(f, data)) { \
155 errno = 0; \
156 msg = createException(MAL, "cudf.eval", "Write error."); \
157 goto wrapup; \
158 }
159
160#define ATTEMPT_TO_WRITE_DATA_TO_FILE(f, data, size) \
161 if (!WriteDataToFile(f, data, size)) { \
162 errno = 0; \
163 msg = createException(MAL, "cudf.eval", "Write error."); \
164 goto wrapup; \
165 }
166
167static void *jump_GDK_malloc(size_t size)
168{
169 if (size == 0)
170 return NULL;
171 void *ptr = GDKmalloc(size);
172 if (!ptr && option_enable_longjmp) {
173 longjmp(jump_buffer[THRgettid()], 2);
174 }
175 return ptr;
176}
177
178static void *add_allocated_region(void *ptr)
179{
180 allocated_region *region;
181 int tid = THRgettid();
182 region = (allocated_region *)ptr;
183 region->next = allocated_regions[tid];
184 allocated_regions[tid] = region;
185 return (char *)ptr + sizeof(allocated_region);
186}
187
188static void *wrapped_GDK_malloc(size_t size)
189{
190 if (size == 0)
191 return NULL;
192 void *ptr = jump_GDK_malloc(size + sizeof(allocated_region));
193 return add_allocated_region(ptr);
194}
195
196static void wrapped_GDK_free(void* ptr) {
197 (void) ptr;
198 return;
199}
200
201static void *wrapped_GDK_malloc_nojump(size_t size)
202{
203 if (size == 0)
204 return NULL;
205 void *ptr = GDKmalloc(size + sizeof(allocated_region));
206 if (!ptr) {
207 return NULL;
208 }
209 return add_allocated_region(ptr);
210}
211
212static void *wrapped_GDK_zalloc_nojump(size_t size)
213{
214 if (size == 0)
215 return NULL;
216 void *ptr = GDKzalloc(size + sizeof(allocated_region));
217 if (!ptr) {
218 return NULL;
219 }
220 return add_allocated_region(ptr);
221}
222
223#define GENERATE_NUMERIC_IS_NULL(type, tpename) \
224 static int tpename##_is_null(type value) { return is_##tpename##_nil(value); }
225
226#define GENERATE_NUMERIC_INITIALIZE(type, tpename) \
227 static void tpename##_initialize(struct cudf_data_struct_##tpename *self, \
228 size_t count) \
229 { \
230 BAT* b; \
231 if (self->bat) { \
232 BBPunfix(((BAT*)self->bat)->batCacheid); \
233 self->bat = NULL; \
234 } \
235 b = COLnew(0, TYPE_##tpename, count, TRANSIENT); \
236 if (!b) { \
237 if (option_enable_longjmp) longjmp(jump_buffer[THRgettid()], 2); \
238 else return; \
239 } \
240 self->bat = (void*) b; \
241 self->count = count; \
242 self->data = (type*) b->theap.base; \
243 BATsetcount(b, count); \
244 }
245
246#define GENERATE_NUMERIC_ALL(type, tpename) \
247 GENERATE_NUMERIC_INITIALIZE(type, tpename) \
248 GENERATE_NUMERIC_IS_NULL(type, tpename)
249
250
251#define GENERATE_BASE_HEADERS(type, tpename) \
252 static int tpename##_is_null(type value); \
253 static void tpename##_initialize(struct cudf_data_struct_##tpename *self, \
254 size_t count) \
255 { \
256 self->count = count; \
257 self->data = jump_GDK_malloc(count * sizeof(self->null_value)); \
258 }
259
260GENERATE_NUMERIC_ALL(bit, bit);
261GENERATE_NUMERIC_ALL(bte, bte);
262GENERATE_NUMERIC_ALL(sht, sht);
263GENERATE_NUMERIC_ALL(int, int);
264GENERATE_NUMERIC_ALL(lng, lng);
265GENERATE_NUMERIC_ALL(flt, flt);
266GENERATE_NUMERIC_ALL(dbl, dbl);
267GENERATE_NUMERIC_ALL(oid, oid);
268
269GENERATE_BASE_HEADERS(char *, str);
270GENERATE_BASE_HEADERS(cudf_data_date, date);
271GENERATE_BASE_HEADERS(cudf_data_time, time);
272GENERATE_BASE_HEADERS(cudf_data_timestamp, timestamp);
273static int blob_is_null(cudf_data_blob value);
274static void blob_initialize(struct cudf_data_struct_blob *self,
275 size_t count);
276
277#define GENERATE_BAT_INPUT_BASE(tpe) \
278 struct cudf_data_struct_##tpe *bat_data = \
279 GDKzalloc(sizeof(struct cudf_data_struct_##tpe)); \
280 if (!bat_data) { \
281 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); \
282 goto wrapup; \
283 } \
284 inputs[index] = bat_data; \
285 bat_data->is_null = tpe##_is_null; \
286 bat_data->scale = \
287 argnode ? pow(10, ((sql_arg *)argnode->data)->type.scale) : 1; \
288 bat_data->bat = NULL; \
289 bat_data->initialize = (void (*)(void *, size_t))tpe##_initialize;
290
291#define GENERATE_BAT_INPUT(b, tpe) \
292 { \
293 char *mprotect_retval; \
294 GENERATE_BAT_INPUT_BASE(tpe); \
295 bat_data->count = BATcount(b); \
296 bat_data->null_value = tpe##_nil; \
297 if (BATtdense(b)) { \
298 size_t it = 0; \
299 tpe val = b->tseqbase; \
300 /* bat is dense, materialize it */ \
301 bat_data->data = wrapped_GDK_malloc_nojump( \
302 bat_data->count * sizeof(bat_data->null_value)); \
303 if (!bat_data->data) { \
304 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); \
305 goto wrapup; \
306 } \
307 for (it = 0; it < bat_data->count; it++) { \
308 bat_data->data[it] = val++; \
309 } \
310 } else if (can_mprotect_region(Tloc(b, 0))) { \
311 bat_data->data = (tpe *)Tloc(b, 0); \
312 mprotect_retval = mprotect_region( \
313 bat_data->data, \
314 bat_data->count * sizeof(bat_data->null_value), &regions); \
315 if (mprotect_retval) { \
316 msg = createException(MAL, "cudf.eval", \
317 "Failed to mprotect region: %s", \
318 mprotect_retval); \
319 goto wrapup; \
320 } \
321 } else { \
322 /* cannot mprotect bat region, copy data */ \
323 bat_data->data = wrapped_GDK_malloc_nojump( \
324 bat_data->count * sizeof(bat_data->null_value)); \
325 if (bat_data->count > 0 && !bat_data->data) { \
326 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); \
327 goto wrapup; \
328 } \
329 memcpy(bat_data->data, Tloc(b, 0), \
330 bat_data->count * sizeof(bat_data->null_value)); \
331 } \
332 }
333
334#define GENERATE_BAT_OUTPUT_BASE(tpe) \
335 struct cudf_data_struct_##tpe *bat_data = \
336 GDKzalloc(sizeof(struct cudf_data_struct_##tpe)); \
337 if (!bat_data) { \
338 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); \
339 goto wrapup; \
340 } \
341 outputs[index] = bat_data; \
342 bat_data->count = 0; \
343 bat_data->data = NULL; \
344 bat_data->is_null = tpe##_is_null; \
345 bat_data->scale = \
346 argnode ? pow(10, ((sql_arg *)argnode->data)->type.scale) : 1; \
347 bat_data->initialize = (void (*)(void *, size_t))tpe##_initialize;
348
349#define GENERATE_BAT_OUTPUT(tpe) \
350 { \
351 GENERATE_BAT_OUTPUT_BASE(tpe); \
352 bat_data->null_value = tpe##_nil; \
353 }
354
355const char *debug_flag = "capi_use_debug";
356const char *cc_flag = "capi_cc";
357const char *cpp_flag = "capi_cpp";
358
359const char *cflags_pragma = "#pragma CFLAGS ";
360const char *ldflags_pragma = "#pragma LDFLAGS ";
361
362#define JIT_COMPILER_NAME "cc"
363#define JIT_CPP_COMPILER_NAME "c++"
364
365static size_t GetTypeCount(int type, void *struct_ptr);
366static void *GetTypeData(int type, void *struct_ptr);
367static void *GetTypeBat(int type, void *struct_ptr);
368static const char *GetTypeName(int type);
369
370static void data_from_date(date d, cudf_data_date *ptr);
371static date date_from_data(cudf_data_date *ptr);
372static void data_from_time(daytime d, cudf_data_time *ptr);
373static daytime time_from_data(cudf_data_time *ptr);
374static void data_from_timestamp(timestamp d, cudf_data_timestamp *ptr);
375static timestamp timestamp_from_data(cudf_data_timestamp *ptr);
376
377static char valid_path_characters[] = "abcdefghijklmnopqrstuvwxyz";
378
379static str CUDFeval(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
380 bool grouped)
381{
382 sql_func *sqlfun = NULL;
383 bit use_cpp = *getArgReference_bit(stk, pci, pci->retc + 1);
384 str exprStr = *getArgReference_str(stk, pci, pci->retc + 2);
385
386 const int ARG_OFFSET = 3;
387
388 size_t i = 0, j = 0;
389 char argbuf[64];
390 char buf[8192];
391 char fname[BUFSIZ];
392 char oname[BUFSIZ];
393 char libname[BUFSIZ];
394 char error_buf[BUFSIZ];
395 char total_error_buf[8192];
396 size_t error_buffer_position = 0;
397 str *args = NULL;
398 str *output_names = NULL;
399 char *msg = MAL_SUCCEED;
400 node *argnode;
401 int seengrp = FALSE;
402 FILE *f = NULL;
403 void *handle = NULL;
404 jitted_function volatile func = NULL;
405 int ret;
406
407 FILE *compiler = NULL;
408 int compiler_return_code;
409
410 void ** volatile inputs = NULL;
411 size_t volatile input_count = 0;
412 void ** volatile outputs = NULL;
413 size_t volatile output_count = 0;
414 BAT ** volatile input_bats = NULL;
415 mprotected_region *regions = NULL, *region_iter = NULL;
416
417 lng initial_output_count = -1;
418
419 struct sigaction sa, oldsa, oldsb;
420 sigset_t signal_set;
421
422#ifdef NDEBUG
423 bool debug_build =
424 GDKgetenv_istrue(debug_flag) || GDKgetenv_isyes(debug_flag);
425#else
426 bool debug_build = true;
427#endif
428 char* extra_cflags = NULL;
429 char* extra_ldflags = NULL;
430
431
432 const char *compilation_flags = debug_build ? "-g -O0" : "-O2";
433 const char *c_compiler =
434 use_cpp ? (GDKgetenv(cpp_flag) ? GDKgetenv(cpp_flag)
435 : JIT_CPP_COMPILER_NAME)
436 : (GDKgetenv(cc_flag) ? GDKgetenv(cc_flag) : JIT_COMPILER_NAME);
437
438 const char *struct_prefix = "struct cudf_data_struct_";
439 const char *funcname;
440
441 BUN expression_hash = 0, funcname_hash = 0;
442 cached_functions *cached_function;
443 char* volatile function_parameters = NULL;
444 int tid = THRgettid();
445 size_t input_size = 0;
446 bit non_grouped_aggregate = 0;
447
448 size_t index = 0;
449 int bat_type = 0;
450 const char* tpe = NULL;
451
452 size_t volatile extra_inputs = 0;
453
454 (void)cntxt;
455
456 allocated_regions[tid] = NULL;
457
458 if (!GDKgetenv_istrue("embedded_c") && !GDKgetenv_isyes("embedded_c"))
459 throw(MAL, "cudf.eval", "Embedded C has not been enabled. "
460 "Start server with --set embedded_c=true");
461
462 // we need to be able to catch segfaults and bus errors
463 // so we can work with mprotect to prevent UDFs from changing
464 // the input data
465
466 // we remove them from the pthread_sigmask
467 if (option_enable_mprotect) {
468 (void)sigemptyset(&signal_set);
469 (void)sigaddset(&signal_set, SIGSEGV);
470 (void)sigaddset(&signal_set, SIGBUS);
471 (void)pthread_sigmask(SIG_UNBLOCK, &signal_set, NULL);
472
473 sa = (struct sigaction) {.sa_flags = 0,};
474 }
475
476 if (!grouped) {
477 sql_subfunc *sqlmorefun =
478 (*(sql_subfunc **)getArgReference_ptr(stk, pci, pci->retc));
479 if (sqlmorefun)
480 sqlfun =
481 (*(sql_subfunc **)getArgReference_ptr(stk, pci, pci->retc))->func;
482 } else {
483 sqlfun = *(sql_func **)getArgReference_ptr(stk, pci, pci->retc);
484 }
485
486 funcname = sqlfun ? sqlfun->base.name : "yet_another_c_function";
487
488 args = (str *)GDKzalloc(sizeof(str) * pci->argc);
489 output_names = (str *)GDKzalloc(sizeof(str) * pci->argc);
490 if (!args || !output_names) {
491 throw(MAL, "cudf.eval", MAL_MALLOC_FAIL);
492 }
493
494 // retrieve the argument names from the sqlfun structure
495 // first argument after the return contains the pointer to the sql_func
496 // structure
497 if (sqlfun != NULL) {
498 // retrieve the argument names (inputs)
499 if (sqlfun->ops->cnt > 0) {
500 int carg = pci->retc + ARG_OFFSET;
501 argnode = sqlfun->ops->h;
502 while (argnode) {
503 char *argname = ((sql_arg *)argnode->data)->name;
504 args[carg] = GDKstrdup(argname);
505 if (!args[carg]) {
506 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
507 goto wrapup;
508 }
509 carg++;
510 argnode = argnode->next;
511 }
512 }
513 // retrieve the output names
514 argnode = sqlfun->res->h;
515 for (i = 0; i < (size_t)sqlfun->res->cnt; i++) {
516 output_names[i] = GDKstrdup(((sql_arg *)argnode->data)->name);
517 argnode = argnode->next;
518 }
519 }
520
521 // name unnamed outputs
522 for (i = 0; i < (size_t)pci->retc; i++) {
523 if (!output_names[i]) {
524 if (pci->retc > 1) {
525 snprintf(argbuf, sizeof(argbuf), "output%zu", i);
526 } else {
527 // just call it "output" if there is only one
528 snprintf(argbuf, sizeof(argbuf), "output");
529 }
530 output_names[i] = GDKstrdup(argbuf);
531 }
532 }
533 // the first unknown argument is the group, we don't really care for the
534 // rest.
535 for (i = pci->retc + ARG_OFFSET; i < (size_t)pci->argc; i++) {
536 if (args[i] == NULL) {
537 if (!seengrp && grouped) {
538 args[i] = GDKstrdup("aggr_group");
539 if (!args[i]) {
540 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
541 goto wrapup;
542 }
543 seengrp = TRUE;
544 } else {
545 snprintf(argbuf, sizeof(argbuf), "arg%zu", i - pci->retc - 1);
546 args[i] = GDKstrdup(argbuf);
547 if (!args[i]) {
548 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
549 goto wrapup;
550 }
551 }
552 }
553 }
554 // non-grouped aggregates don't have the group list
555 // to allow users to write code for both grouped and non-grouped aggregates
556 // we create an "aggr_group" BAT for non-grouped aggregates
557 non_grouped_aggregate = grouped && !seengrp;
558
559 input_count = pci->argc - (pci->retc + ARG_OFFSET);
560 output_count = pci->retc;
561
562 // begin the compilation phase
563 // first look up if we have already compiled this function
564 expression_hash = 0;
565 GDK_STRHASH(exprStr, expression_hash);
566 GDK_STRHASH(funcname, funcname_hash);
567 funcname_hash = funcname_hash % FUNCTION_CACHE_SIZE;
568 j = 0;
569 for (i = 0; i < (size_t)pci->argc; i++) {
570 if (args[i]) {
571 j += strlen(args[i]);
572 }
573 if (output_names[i]) {
574 j += strlen(output_names[i]);
575 }
576 }
577
578 function_parameters =
579 GDKzalloc((j + input_count + output_count + 1) * sizeof(char));
580 if (!function_parameters) {
581 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
582 goto wrapup;
583 }
584 for (i = 0; i < input_count; i++) {
585 if (!isaBatType(getArgType(mb, pci, i))) {
586 function_parameters[i] = getArgType(mb, pci, i);
587 } else {
588 function_parameters[i] = getBatType(getArgType(mb, pci, i));
589 }
590 }
591 for (i = 0; i < output_count; i++) {
592 if (!isaBatType(getArgType(mb, pci, i))) {
593 function_parameters[input_count + i] = getArgType(mb, pci, i);
594 } else {
595 function_parameters[input_count + i] =
596 getBatType(getArgType(mb, pci, i));
597 }
598 }
599 j = input_count + output_count;
600 for (i = 0; i < (size_t)pci->argc; i++) {
601 if (args[i]) {
602 size_t len = strlen(args[i]);
603 memcpy(function_parameters + j, args[i], len);
604 j += len;
605 }
606 if (output_names[i]) {
607 size_t len = strlen(output_names[i]);
608 memcpy(function_parameters + j, output_names[i], len);
609 j += len;
610 }
611 }
612
613 MT_lock_set(&cache_lock);
614 cached_function = function_cache[funcname_hash];
615 while (cached_function) {
616 if (cached_function->expression_hash == expression_hash &&
617 strcmp(cached_function->parameters, function_parameters) == 0) {
618 // this function matches our compiled function
619 // in both source code and parameters
620 // use the already compiled function instead of recompiling
621 func = cached_function->function;
622 break;
623 }
624 cached_function = cached_function->next;
625 }
626 MT_lock_unset(&cache_lock);
627
628 if (!func) {
629 // function was not found in the cache
630 // we have to compile it
631
632 // first generate the names of the files
633 // we place the temporary files in the DELDIR directory
634 // because this will be removed again upon server startup
635 const int RANDOM_NAME_SIZE = 32;
636 char *path = NULL;
637 const char *prefix = TEMPDIR_NAME DIR_SEP_STR;
638 size_t prefix_size = strlen(prefix);
639 char *deldirpath;
640
641 memcpy(buf, prefix, sizeof(char) * strlen(prefix));
642 // generate a random 32-character name for the temporary files
643 for (i = prefix_size; i < prefix_size + RANDOM_NAME_SIZE; i++) {
644 buf[i] = valid_path_characters[rand() %
645 (sizeof(valid_path_characters) - 1)];
646 }
647 buf[i] = '\0';
648 path = GDKfilepath(0, BATDIR, buf, "c");
649 if (!path) {
650 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
651 goto wrapup;
652 }
653 strcpy(fname, path);
654 strcpy(oname, fname);
655 oname[strlen(oname) - 1] = 'o';
656 GDKfree(path);
657
658 memmove(buf + strlen(SO_PREFIX) + prefix_size, buf + prefix_size,
659 i + 1 - prefix_size);
660 memcpy(buf + prefix_size, SO_PREFIX, sizeof(char) * strlen(SO_PREFIX));
661 path =
662 GDKfilepath(0, BATDIR, buf, SO_EXT[0] == '.' ? &SO_EXT[1] : SO_EXT);
663 if (!path) {
664 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
665 goto wrapup;
666 }
667 strcpy(libname, path);
668 GDKfree(path);
669
670 // if DELDIR directory does not exist, create it
671 deldirpath = GDKfilepath(0, NULL, TEMPDIR, NULL);
672 if (!deldirpath) {
673 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
674 goto wrapup;
675 }
676 if (mkdir(deldirpath, 0777) < 0 && errno != EEXIST) {
677 msg = createException(MAL, "cudf.eval",
678 "cannot create directory %s\n", deldirpath);
679 goto wrapup;
680 }
681 GDKfree(deldirpath);
682
683 // now generate the source file
684 f = fopen(fname, "w+");
685 if (!f) {
686 msg = createException(MAL, "cudf.eval",
687 "Failed to open file for JIT compilation: %s",
688 strerror(errno));
689 errno = 0;
690 goto wrapup;
691 }
692
693 // include some standard C headers first
694 ATTEMPT_TO_WRITE_TO_FILE(f, "#include <stdio.h>\n");
695 ATTEMPT_TO_WRITE_TO_FILE(f, "#include <stdlib.h>\n");
696 // we include "cheader.h", but not directly to avoid having to deal with
697 // headers, etc...
698 // Instead it is embedded in a string (loaded from "cheader.text.h")
699 // this file contains the structures used for input/output arguments
700 ATTEMPT_TO_WRITE_TO_FILE(f, cheader_header_text);
701 // some monetdb-style typedefs to make it easier
702 ATTEMPT_TO_WRITE_TO_FILE(f, "typedef int8_t bte;\n");
703 ATTEMPT_TO_WRITE_TO_FILE(f, "typedef int16_t sht;\n");
704 ATTEMPT_TO_WRITE_TO_FILE(f, "typedef int64_t lng;\n");
705 ATTEMPT_TO_WRITE_TO_FILE(f, "typedef float flt;\n");
706 ATTEMPT_TO_WRITE_TO_FILE(f, "typedef double dbl;\n");
707 ATTEMPT_TO_WRITE_TO_FILE(f, "typedef char* str;\n");
708 ATTEMPT_TO_WRITE_TO_FILE(f, "typedef size_t oid;\n");
709 // now we search exprStr for any preprocessor directives (#)
710 // we move these to the top of the file
711 // this allows the user to normally #include files
712 {
713 int preprocessor_start = 0;
714 bool is_preprocessor_directive = false;
715 bool new_line = false;
716 for (i = 0; i < strlen(exprStr); i++) {
717 if (exprStr[i] == '\n') {
718 if (is_preprocessor_directive) {
719 // the previous line was a preprocessor directive
720 // first check if it is one of our special preprocessor directives
721 if (i - preprocessor_start >= strlen(cflags_pragma) &&
722 memcmp(exprStr + preprocessor_start, cflags_pragma, strlen(cflags_pragma)) == 0) {
723 size_t cflags_characters = (i - preprocessor_start) - strlen(cflags_pragma);
724 if (cflags_characters > 0 && !extra_cflags) {
725 extra_cflags = GDKzalloc(cflags_characters + 1);
726 if (extra_cflags) {
727 memcpy(extra_cflags, exprStr + preprocessor_start + strlen(cflags_pragma), cflags_characters);
728 }
729 }
730 } else if (i - preprocessor_start >= strlen(ldflags_pragma) &&
731 memcmp(exprStr + preprocessor_start, ldflags_pragma, strlen(ldflags_pragma)) == 0) {
732 size_t ldflags_characters = (i - preprocessor_start) - strlen(ldflags_pragma);
733 if (ldflags_characters > 0 && !extra_ldflags) {
734 extra_ldflags = GDKzalloc(ldflags_characters + 1);
735 if (extra_ldflags) {
736 memcpy(extra_ldflags, exprStr + preprocessor_start + strlen(ldflags_pragma), ldflags_characters);
737 }
738 }
739 } else {
740 // regular preprocessor directive: write it to the file
741 ATTEMPT_TO_WRITE_DATA_TO_FILE(f, exprStr +
742 preprocessor_start,
743 i - preprocessor_start);
744 ATTEMPT_TO_WRITE_TO_FILE(f, "\n");
745 }
746 // now overwrite the preprocessor directive in the
747 // expression string with spaces
748 for (j = preprocessor_start; j < i; j++) {
749 exprStr[j] = ' ';
750 }
751 }
752 is_preprocessor_directive = false;
753 new_line = true;
754 } else if (exprStr[i] == ' ' || exprStr[i] == '\t') {
755 // skip any spaces
756 continue;
757 } else if (new_line) {
758 if (exprStr[i] == '#') {
759 preprocessor_start = i;
760 is_preprocessor_directive = true;
761 }
762 new_line = false;
763 }
764 }
765 }
766
767 // create the actual function
768 if (use_cpp) {
769 // avoid name wrangling if we are compiling C++ code
770 ATTEMPT_TO_WRITE_TO_FILE(f, "\nextern \"C\"");
771 }
772 ATTEMPT_TO_WRITE_TO_FILE(f, "\nchar* ");
773 ATTEMPT_TO_WRITE_TO_FILE(f, funcname);
774 ATTEMPT_TO_WRITE_TO_FILE(f, "(void** __inputs, void** __outputs, "
775 "malloc_function_ptr malloc, free_function_ptr free) {\n");
776
777 // now we convert the input arguments from void** to the proper
778 // input/output
779 // of the function
780 // first convert the input
781 for (i = pci->retc + ARG_OFFSET; i < (size_t)pci->argc; i++) {
782 bat_type = !isaBatType(getArgType(mb, pci, i))
783 ? getArgType(mb, pci, i)
784 : getBatType(getArgType(mb, pci, i));
785 tpe = GetTypeName(bat_type);
786 assert(tpe);
787 if (tpe) {
788 snprintf(buf, sizeof(buf),
789 "\t%s%s %s = *((%s%s*)__inputs[%zu]);\n", struct_prefix,
790 tpe, args[i], struct_prefix, tpe,
791 i - (pci->retc + ARG_OFFSET));
792 ATTEMPT_TO_WRITE_TO_FILE(f, buf);
793 }
794 }
795 if (non_grouped_aggregate) {
796 // manually add "aggr_group" for non-grouped aggregates
797 bat_type = TYPE_oid;
798 tpe = GetTypeName(bat_type);
799 assert(tpe);
800 if (tpe) {
801 snprintf(buf, sizeof(buf),
802 "\t%s%s %s = *((%s%s*)__inputs[%zu]);\n", struct_prefix,
803 tpe, "aggr_group", struct_prefix, tpe, input_count);
804 ATTEMPT_TO_WRITE_TO_FILE(f, buf);
805 }
806 }
807 // output types
808 for (i = 0; i < (size_t)pci->retc; i++) {
809 bat_type = getBatType(getArgType(mb, pci, i));
810 tpe = GetTypeName(bat_type);
811 assert(tpe);
812 if (tpe) {
813 snprintf(buf, sizeof(buf),
814 "\t%s%s* %s = ((%s%s*)__outputs[%zu]);\n", struct_prefix,
815 tpe, output_names[i], struct_prefix, tpe, i);
816 ATTEMPT_TO_WRITE_TO_FILE(f, buf);
817 }
818 }
819
820 ATTEMPT_TO_WRITE_TO_FILE(f, "\n");
821 // write the actual user defined code into the file
822 ATTEMPT_TO_WRITE_TO_FILE(f, exprStr);
823
824 ATTEMPT_TO_WRITE_TO_FILE(f, "\nreturn 0;\n}\n");
825
826 fclose(f);
827 f = NULL;
828
829 // now it's time to try to compile the code
830 // we use popen to capture any error output
831 snprintf(buf, sizeof(buf), "%s %s -c -fPIC %s %s -o %s 2>&1 >/dev/null",
832 c_compiler, extra_cflags ? extra_cflags : "", compilation_flags, fname, oname);
833 compiler = popen(buf, "r");
834 if (!compiler) {
835 msg = createException(MAL, "cudf.eval", "Failed popen");
836 goto wrapup;
837 }
838 // read the error stream into the error buffer until the compiler is
839 // done
840 while (fgets(error_buf, sizeof(error_buf) - 1, compiler)) {
841 size_t error_size = strlen(error_buf);
842 snprintf(total_error_buf + error_buffer_position,
843 sizeof(total_error_buf) - error_buffer_position - 1, "%s",
844 error_buf);
845 error_buffer_position += error_size;
846 if (error_buffer_position >= sizeof(total_error_buf)) break;
847 }
848
849 compiler_return_code = pclose(compiler);
850 compiler = NULL;
851
852 if (compiler_return_code != 0) {
853 // failure in compiling the code
854 // report the failure to the user
855 msg = createException(MAL, "cudf.eval",
856 "Failed to compile C UDF:\n%s",
857 total_error_buf);
858 goto wrapup;
859 }
860
861 error_buffer_position = 0;
862 error_buf[0] = '\0';
863
864 snprintf(buf, sizeof(buf), "%s %s %s -shared -o %s 2>&1 >/dev/null", c_compiler,
865 extra_ldflags ? extra_ldflags : "", oname, libname);
866 compiler = popen(buf, "r");
867 if (!compiler) {
868 msg = createException(MAL, "cudf.eval", "Failed popen");
869 goto wrapup;
870 }
871 while (fgets(error_buf, sizeof(error_buf) - 1, compiler)) {
872 size_t error_size = strlen(error_buf);
873 snprintf(total_error_buf + error_buffer_position,
874 sizeof(total_error_buf) - error_buffer_position - 1, "%s",
875 error_buf);
876 error_buffer_position += error_size;
877 if (error_buffer_position >= sizeof(total_error_buf)) break;
878 }
879
880 compiler_return_code = pclose(compiler);
881 compiler = NULL;
882
883 if (compiler_return_code != 0) {
884 // failure in compiler
885 msg = createException(MAL, "cudf.eval", "Failed to link C UDF.\n%s",
886 total_error_buf);
887 goto wrapup;
888 }
889
890 handle = dlopen(libname, RTLD_LAZY);
891 if (!handle) {
892 msg = createException(MAL, "cudf.eval",
893 "Failed to open shared library: %s.",
894 dlerror());
895 goto wrapup;
896 }
897 func = (jitted_function)dlsym(handle, funcname);
898 if (!func) {
899 msg = createException(MAL, "cudf.eval",
900 "Failed to load function from library: %s.",
901 dlerror());
902 goto wrapup;
903 }
904 // now that we have compiled this function
905 // store it in our function cache
906 {
907 cached_functions *new_entry = GDKmalloc(sizeof(cached_functions));
908 if (!new_entry) {
909 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
910 goto wrapup;
911 }
912 new_entry->function = func;
913 new_entry->expression_hash = expression_hash;
914 new_entry->parameters = function_parameters;
915 new_entry->dll_handle = handle;
916 function_parameters = NULL;
917 handle = NULL;
918 MT_lock_set(&cache_lock);
919 new_entry->next = function_cache[funcname_hash];
920 function_cache[funcname_hash] = new_entry;
921 MT_lock_unset(&cache_lock);
922 }
923 }
924 if (input_count > 0) {
925 // add "aggr_group" for non-grouped aggregates
926 extra_inputs = non_grouped_aggregate ? 1 : 0;
927 input_bats = GDKzalloc(sizeof(BAT *) * (input_count + extra_inputs));
928 inputs = GDKzalloc(sizeof(void *) * (input_count + extra_inputs));
929 if (!inputs || !input_bats) {
930 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
931 goto wrapup;
932 }
933 }
934 if (output_count > 0) {
935 outputs = GDKzalloc(sizeof(void *) * output_count);
936 if (!outputs) {
937 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
938 goto wrapup;
939 }
940 }
941 // create the inputs
942 argnode = sqlfun ? sqlfun->ops->h : NULL;
943 for (i = pci->retc + ARG_OFFSET; i < (size_t)pci->argc; i++) {
944 index = i - (pci->retc + ARG_OFFSET);
945 bat_type = getArgType(mb, pci, i);
946 if (!isaBatType(bat_type)) {
947 void* input = NULL;
948 if (bat_type == TYPE_str) {
949 input = *getArgReference_str(stk, pci, i);
950 } else if (bat_type == TYPE_blob) {
951 input = *(blob**)getArgReference(stk, pci, i);
952 } else {
953 input = getArgReference(stk, pci, i);
954 }
955 // scalar input
956 // create a temporary BAT
957 input_bats[index] = COLnew(0, bat_type, 1, TRANSIENT);
958 if (!input_bats[index]) {
959 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
960 goto wrapup;
961 }
962 if (BUNappend(input_bats[index], input,
963 false) != GDK_SUCCEED) {
964 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
965 goto wrapup;
966 }
967 } else {
968 // deal with BAT input
969 bat_type = getBatType(getArgType(mb, pci, i));
970 input_bats[index] =
971 BATdescriptor(*getArgReference_bat(stk, pci, i));
972 }
973
974 if (bat_type == TYPE_bit) {
975 GENERATE_BAT_INPUT(input_bats[index], bit);
976 } else if (bat_type == TYPE_bte) {
977 GENERATE_BAT_INPUT(input_bats[index], bte);
978 } else if (bat_type == TYPE_sht) {
979 GENERATE_BAT_INPUT(input_bats[index], sht);
980 } else if (bat_type == TYPE_int) {
981 GENERATE_BAT_INPUT(input_bats[index], int);
982 } else if (bat_type == TYPE_oid) {
983 GENERATE_BAT_INPUT(input_bats[index], oid);
984 } else if (bat_type == TYPE_lng) {
985 GENERATE_BAT_INPUT(input_bats[index], lng);
986 } else if (bat_type == TYPE_flt) {
987 GENERATE_BAT_INPUT(input_bats[index], flt);
988 } else if (bat_type == TYPE_dbl) {
989 GENERATE_BAT_INPUT(input_bats[index], dbl);
990 } else if (bat_type == TYPE_str) {
991 BATiter li;
992 BUN p = 0, q = 0;
993 bool can_mprotect_varheap = false;
994 str mprotect_retval;
995 GENERATE_BAT_INPUT_BASE(str);
996 bat_data->count = BATcount(input_bats[index]);
997 bat_data->data = bat_data->count == 0 ? NULL : GDKmalloc(sizeof(char *) * bat_data->count);
998 bat_data->null_value = NULL;
999 if (bat_data->count > 0 && !bat_data->data) {
1000 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1001 goto wrapup;
1002 }
1003 j = 0;
1004
1005 // check if we can mprotect the varheap
1006 // if we can't mprotect, copy the strings instead
1007 assert(input_bats[index]->tvheap);
1008 can_mprotect_varheap = can_mprotect_region(input_bats[index]->tvheap->base);
1009
1010 li = bat_iterator(input_bats[index]);
1011 BATloop(input_bats[index], p, q)
1012 {
1013 char *t = (char *)BUNtvar(li, p);
1014 if (strcmp(t, str_nil) == 0) {
1015 bat_data->data[j] = NULL;
1016 } else {
1017 if (can_mprotect_varheap) {
1018 bat_data->data[j] = t;
1019 } else {
1020 bat_data->data[j] = wrapped_GDK_malloc_nojump(strlen(t) + 1);
1021 if (!bat_data->data[j]) {
1022 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1023 goto wrapup;
1024 }
1025 strcpy(bat_data->data[j], t);
1026 }
1027 }
1028 j++;
1029 }
1030 if (can_mprotect_varheap) {
1031 // mprotect the varheap of the BAT to prevent modification of input strings
1032 mprotect_retval =
1033 mprotect_region(input_bats[index]->tvheap->base,
1034 input_bats[index]->tvheap->size, &regions);
1035 if (mprotect_retval) {
1036 msg = createException(MAL, "cudf.eval",
1037 "Failed to mprotect region: %s",
1038 mprotect_retval);
1039 goto wrapup;
1040 }
1041 }
1042 } else if (bat_type == TYPE_date) {
1043 date *baseptr;
1044 GENERATE_BAT_INPUT_BASE(date);
1045 bat_data->count = BATcount(input_bats[index]);
1046 bat_data->data = bat_data->count == 0 ? NULL :
1047 GDKmalloc(sizeof(bat_data->null_value) * bat_data->count);
1048 if (bat_data->count > 0 && !bat_data->data) {
1049 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1050 goto wrapup;
1051 }
1052
1053 baseptr = (date *)Tloc(input_bats[index], 0);
1054 for (j = 0; j < bat_data->count; j++) {
1055 data_from_date(baseptr[j], bat_data->data + j);
1056 }
1057 data_from_date(date_nil, &bat_data->null_value);
1058 } else if (bat_type == TYPE_daytime) {
1059 daytime *baseptr;
1060 GENERATE_BAT_INPUT_BASE(time);
1061 bat_data->count = BATcount(input_bats[index]);
1062 bat_data->data = bat_data->count == 0 ? NULL :
1063 GDKmalloc(sizeof(bat_data->null_value) * bat_data->count);
1064 if (bat_data->count > 0 && !bat_data->data) {
1065 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1066 goto wrapup;
1067 }
1068
1069 baseptr = (daytime *)Tloc(input_bats[index], 0);
1070 for (j = 0; j < bat_data->count; j++) {
1071 data_from_time(baseptr[j], bat_data->data + j);
1072 }
1073 data_from_time(daytime_nil, &bat_data->null_value);
1074 } else if (bat_type == TYPE_timestamp) {
1075 timestamp *baseptr;
1076 GENERATE_BAT_INPUT_BASE(timestamp);
1077 bat_data->count = BATcount(input_bats[index]);
1078 bat_data->data = bat_data->count == 0 ? NULL :
1079 GDKmalloc(sizeof(bat_data->null_value) * bat_data->count);
1080 if (bat_data->count > 0 && !bat_data->data) {
1081 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1082 goto wrapup;
1083 }
1084
1085 baseptr = (timestamp *)Tloc(input_bats[index], 0);
1086 for (j = 0; j < bat_data->count; j++) {
1087 data_from_timestamp(baseptr[j], bat_data->data + j);
1088 }
1089 data_from_timestamp(timestamp_nil, &bat_data->null_value);
1090 } else if (bat_type == TYPE_blob) {
1091 BATiter li;
1092 BUN p = 0, q = 0;
1093 str mprotect_retval;
1094 bool can_mprotect_varheap = false;
1095 GENERATE_BAT_INPUT_BASE(blob);
1096 bat_data->count = BATcount(input_bats[index]);
1097 bat_data->data = bat_data->count == 0 ? NULL :
1098 GDKmalloc(sizeof(cudf_data_blob) * bat_data->count);
1099 if (bat_data->count > 0 && !bat_data->data) {
1100 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1101 goto wrapup;
1102 }
1103 j = 0;
1104
1105 // check if we can mprotect the varheap
1106 // if we can't mprotect, copy the strings instead
1107 assert(input_bats[index]->tvheap);
1108 can_mprotect_varheap = can_mprotect_region(input_bats[index]->tvheap->base);
1109
1110 li = bat_iterator(input_bats[index]);
1111 BATloop(input_bats[index], p, q)
1112 {
1113 blob *t = (blob *)BUNtvar(li, p);
1114 if (t->nitems == ~(size_t)0) {
1115 bat_data->data[j].size = ~(size_t) 0;
1116 bat_data->data[j].data = NULL;
1117 } else {
1118 bat_data->data[j].size = t->nitems;
1119 if (can_mprotect_varheap) {
1120 bat_data->data[j].data = &t->data[0];
1121 } else {
1122 bat_data->data[j].data = t->nitems == 0 ? NULL :
1123 wrapped_GDK_malloc_nojump(t->nitems);
1124 if (t->nitems > 0 && !bat_data->data[j].data) {
1125 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1126 goto wrapup;
1127 }
1128 memcpy(bat_data->data[j].data, &t->data[0], t->nitems);
1129 }
1130 }
1131 j++;
1132 }
1133 bat_data->null_value.size = ~(size_t) 0;
1134 bat_data->null_value.data = NULL;
1135 if (can_mprotect_varheap) {
1136 // for blob columns, mprotect the varheap of the BAT
1137 mprotect_retval =
1138 mprotect_region(input_bats[index]->tvheap->base,
1139 input_bats[index]->tvheap->size, &regions);
1140 if (mprotect_retval) {
1141 msg = createException(MAL, "cudf.eval",
1142 "Failed to mprotect region: %s",
1143 mprotect_retval);
1144 goto wrapup;
1145 }
1146 }
1147 } else {
1148 // unsupported type: convert to string
1149 BATiter li;
1150 BUN p = 0, q = 0;
1151 GENERATE_BAT_INPUT_BASE(str);
1152 bat_data->count = BATcount(input_bats[index]);
1153 bat_data->null_value = NULL;
1154 bat_data->data = bat_data->count == 0 ? NULL :
1155 GDKzalloc(sizeof(char *) * bat_data->count);
1156 if (bat_data->count > 0 && !bat_data->data) {
1157 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1158 goto wrapup;
1159 }
1160 j = 0;
1161
1162 li = bat_iterator(input_bats[index]);
1163 BATloop(input_bats[index], p, q)
1164 {
1165 void *t = BUNtail(li, p);
1166 if (BATatoms[bat_type].atomCmp(
1167 t, BATatoms[bat_type].atomNull) == 0) {
1168 bat_data->data[j] = NULL;
1169 } else {
1170 char *result = NULL;
1171 size_t length = 0;
1172 if (BATatoms[bat_type].atomToStr(&result, &length, t, false) ==
1173 0) {
1174 msg = createException(
1175 MAL, "cudf.eval",
1176 "Failed to convert element to string");
1177 goto wrapup;
1178 }
1179 bat_data->data[j] = result;
1180 }
1181 j++;
1182 }
1183 }
1184 input_size = BATcount(input_bats[index]) > input_size
1185 ? BATcount(input_bats[index])
1186 : input_size;
1187 argnode = argnode ? argnode->next : NULL;
1188 }
1189
1190 index = input_count;
1191 if (non_grouped_aggregate) {
1192 GENERATE_BAT_INPUT_BASE(oid);
1193 bat_data->count = input_size;
1194 bat_data->null_value = oid_nil;
1195 bat_data->data =
1196 wrapped_GDK_zalloc_nojump(bat_data->count * sizeof(bat_data->null_value));
1197 if (!bat_data->data) {
1198 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1199 goto wrapup;
1200 }
1201 }
1202
1203 argnode = sqlfun ? sqlfun->res->h : NULL;
1204 // output types
1205 for (i = 0; i < output_count; i++) {
1206 index = i;
1207 bat_type = getBatType(getArgType(mb, pci, i));
1208 if (bat_type == TYPE_bit) {
1209 GENERATE_BAT_OUTPUT(bit);
1210 } else if (bat_type == TYPE_bte) {
1211 GENERATE_BAT_OUTPUT(bte);
1212 } else if (bat_type == TYPE_sht) {
1213 GENERATE_BAT_OUTPUT(sht);
1214 } else if (bat_type == TYPE_int) {
1215 GENERATE_BAT_OUTPUT(int);
1216 } else if (bat_type == TYPE_oid) {
1217 GENERATE_BAT_OUTPUT(oid);
1218 } else if (bat_type == TYPE_lng) {
1219 GENERATE_BAT_OUTPUT(lng);
1220 } else if (bat_type == TYPE_flt) {
1221 GENERATE_BAT_OUTPUT(flt);
1222 } else if (bat_type == TYPE_dbl) {
1223 GENERATE_BAT_OUTPUT(dbl);
1224 } else if (bat_type == TYPE_str) {
1225 GENERATE_BAT_OUTPUT_BASE(str);
1226 bat_data->null_value = NULL;
1227 } else if (bat_type == TYPE_date) {
1228 GENERATE_BAT_OUTPUT_BASE(date);
1229 data_from_date(date_nil, &bat_data->null_value);
1230 } else if (bat_type == TYPE_daytime) {
1231 GENERATE_BAT_OUTPUT_BASE(time);
1232 data_from_time(daytime_nil, &bat_data->null_value);
1233 } else if (bat_type == TYPE_timestamp) {
1234 GENERATE_BAT_OUTPUT_BASE(timestamp);
1235 data_from_timestamp(timestamp_nil, &bat_data->null_value);
1236 } else if (bat_type == TYPE_blob) {
1237 GENERATE_BAT_OUTPUT_BASE(blob);
1238 bat_data->null_value.size = ~(size_t) 0;
1239 bat_data->null_value.data = NULL;
1240 } else {
1241 // unsupported type, convert from string output
1242 GENERATE_BAT_OUTPUT_BASE(str);
1243 bat_data->null_value = NULL;
1244 }
1245 argnode = argnode ? argnode->next : NULL;
1246 }
1247
1248 // set up a longjmp point
1249 // this longjmp point is used for some error handling in the C function
1250 // such as failed mallocs
1251 if (option_enable_longjmp) {
1252 ret = setjmp(jump_buffer[tid]);
1253 if (ret < 0) {
1254 // error value
1255 msg = createException(MAL, "cudf.eval", "Failed setjmp: %s",
1256 strerror(errno));
1257 errno = 0;
1258 goto wrapup;
1259 } else if (ret > 0) {
1260 if (ret == 1) {
1261 msg = createException(MAL, "cudf.eval", "Attempting to write to "
1262 "the input or triggered a "
1263 "segfault/bus error");
1264 } else if (ret == 2) {
1265 msg = createException(MAL, "cudf.eval",
1266 "Malloc failure in internal function!");
1267 } else {
1268 // we jumped here
1269 msg = createException(MAL, "cudf.eval", "We longjumped here "
1270 "because of an error, but "
1271 "we don't know which!");
1272 }
1273 goto wrapup;
1274 }
1275 }
1276
1277 // set up the signal handler for catching segfaults
1278 if (option_enable_mprotect) {
1279 sa = (struct sigaction) {
1280 .sa_flags = SA_SIGINFO,
1281 .sa_sigaction = handler,
1282 };
1283 (void) sigfillset(&sa.sa_mask);
1284 if (sigaction(SIGSEGV, &sa, &oldsa) == -1 ||
1285 sigaction(SIGBUS, &sa, &oldsb) == -1) {
1286 msg = createException(MAL, "cudf.eval",
1287 "Failed to set signal handler: %s",
1288 strerror(errno));
1289 errno = 0;
1290 goto wrapup;
1291 }
1292 // actually mprotect the regions now that the signal handlers are set
1293 region_iter = regions;
1294 while (region_iter) {
1295 if (mprotect(region_iter->addr, region_iter->len, PROT_READ) < 0) {
1296 goto wrapup;
1297 }
1298 region_iter = region_iter->next;
1299 }
1300 }
1301 // call the actual jitted function
1302 msg = func(inputs, outputs, wrapped_GDK_malloc, wrapped_GDK_free);
1303
1304
1305 if (option_enable_mprotect) {
1306 // clear any mprotected regions
1307 while (regions) {
1308 mprotected_region *next = regions->next;
1309 clear_mprotect(regions->addr, regions->len);
1310 GDKfree(regions);
1311 regions = next;
1312 }
1313 // clear the signal handlers
1314 if (sigaction(SIGSEGV, &oldsa, NULL) == -1 ||
1315 sigaction(SIGBUS, &oldsb, NULL) == -1) {
1316 msg = createException(MAL, "cudf.eval",
1317 "Failed to unset signal handler: %s",
1318 strerror(errno));
1319 errno = 0;
1320 goto wrapup;
1321 }
1322 sa = (struct sigaction) {.sa_flags = 0,};
1323 }
1324
1325 if (msg) {
1326 // failure in function
1327 msg = createException(MAL, "cudf.eval", "%s", msg);
1328 goto wrapup;
1329 }
1330
1331 // create the output bats from the returned results
1332 for (i = 0; i < (size_t)pci->retc; i++) {
1333 size_t count;
1334 void *data;
1335 BAT *b;
1336 bat_type = getBatType(getArgType(mb, pci, i));
1337
1338 if (!outputs[i]) {
1339 msg = createException(MAL, "cudf.eval", "No data returned.");
1340 goto wrapup;
1341 }
1342 count = GetTypeCount(bat_type, outputs[i]);
1343 data = GetTypeData(bat_type, outputs[i]);
1344 if (!data) {
1345 msg = createException(MAL, "cudf.eval", "No data returned.");
1346 goto wrapup;
1347 }
1348 if (initial_output_count < 0) {
1349 initial_output_count = count;
1350 } else if ((size_t)initial_output_count != count) {
1351 msg = createException(MAL, "cudf.eval",
1352 "Data has different cardinalities.");
1353 goto wrapup;
1354 }
1355 if (bat_type == TYPE_bit || bat_type == TYPE_bte ||
1356 bat_type == TYPE_sht || bat_type == TYPE_int ||
1357 bat_type == TYPE_oid || bat_type == TYPE_lng ||
1358 bat_type == TYPE_flt || bat_type == TYPE_dbl) {
1359 b = GetTypeBat(bat_type, outputs[i]);
1360 if (!b) {
1361 msg = createException(MAL, "cudf.eval", "Output column was not properly initialized.");
1362 goto wrapup;
1363 }
1364 } else {
1365 assert(GetTypeBat(bat_type, outputs[i]) == NULL);
1366 b = COLnew(0, bat_type, count, TRANSIENT);
1367 if (!b) {
1368 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1369 goto wrapup;
1370 }
1371 if (bat_type == TYPE_date) {
1372 date *baseptr = (date *)Tloc(b, 0);
1373 cudf_data_date *source_base = (cudf_data_date *)data;
1374 for (j = 0; j < count; j++) {
1375 baseptr[j] = date_from_data(source_base + j);
1376 }
1377 BATsetcount(b, count);
1378 GDKfree(data);
1379 } else if (bat_type == TYPE_daytime) {
1380 daytime *baseptr = (daytime *)Tloc(b, 0);
1381 cudf_data_time *source_base = (cudf_data_time *)data;
1382 for (j = 0; j < count; j++) {
1383 baseptr[j] = time_from_data(source_base + j);
1384 }
1385 BATsetcount(b, count);
1386 GDKfree(data);
1387 } else if (bat_type == TYPE_timestamp) {
1388 timestamp *baseptr = (timestamp *)Tloc(b, 0);
1389 cudf_data_timestamp *source_base = (cudf_data_timestamp *)data;
1390 for (j = 0; j < count; j++) {
1391 baseptr[j] = timestamp_from_data(source_base + j);
1392 }
1393 BATsetcount(b, count);
1394 GDKfree(data);
1395 } else if (bat_type == TYPE_str) {
1396 char **source_base = (char **)data;
1397 for (j = 0; j < count; j++) {
1398 const char *ptr = source_base[j];
1399 if (!ptr) {
1400 ptr = str_nil;
1401 }
1402 if (BUNappend(b, ptr, false) != GDK_SUCCEED) {
1403 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1404 goto wrapup;
1405 }
1406 }
1407 GDKfree(data);
1408 } else if (bat_type == TYPE_blob) {
1409 cudf_data_blob *source_base = (cudf_data_blob *)data;
1410 blob *current_blob = NULL;
1411 size_t current_blob_maxsize = 0;
1412 for (j = 0; j < count; j++) {
1413 const cudf_data_blob blob = source_base[j];
1414
1415 if (blob.size == ~(size_t) 0) {
1416 current_blob->nitems = ~(size_t)0;
1417 } else {
1418 if (!current_blob || current_blob_maxsize < blob.size) {
1419 if (current_blob) {
1420 GDKfree(current_blob);
1421 }
1422 current_blob_maxsize = blob.size;
1423 current_blob = GDKmalloc(sizeof(size_t) + blob.size);
1424 if (!current_blob) {
1425 msg =
1426 createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1427 goto wrapup;
1428 }
1429 }
1430
1431 current_blob->nitems = blob.size;
1432 memcpy(&current_blob->data[0], blob.data, blob.size);
1433 }
1434
1435 if (BUNappend(b, current_blob, false) != GDK_SUCCEED) {
1436 if (current_blob) {
1437 GDKfree(current_blob);
1438 }
1439 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1440 goto wrapup;
1441 }
1442 }
1443 if (current_blob) {
1444 GDKfree(current_blob);
1445 }
1446 GDKfree(data);
1447 } else {
1448 char **source_base = (char **)data;
1449 size_t len = 0;
1450 void *element = NULL;
1451 for (j = 0; j < count; j++) {
1452 const char *ptr = source_base[j];
1453 const void *appended_element;
1454 if (!ptr || strcmp(ptr, str_nil) == 0) {
1455 appended_element = (void *)BATatoms[bat_type].atomNull;
1456 } else {
1457 if (BATatoms[bat_type].atomFromStr(ptr, &len, &element, false) ==
1458 0) {
1459 msg = createException(MAL, "cudf.eval",
1460 "Failed to convert output "
1461 "element from string: %s",
1462 ptr);
1463 goto wrapup;
1464 }
1465 appended_element = element;
1466 }
1467 if (BUNappend(b, appended_element, false) != GDK_SUCCEED) {
1468 if (element) {
1469 GDKfree(element);
1470 }
1471 msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);
1472 goto wrapup;
1473 }
1474 }
1475 if (element) {
1476 GDKfree(element);
1477 }
1478 GDKfree(data);
1479 }
1480 }
1481 b->tnil = false;
1482 b->tnonil = false;
1483 b->tkey = false;
1484 b->tsorted = false;
1485 b->trevsorted = false;
1486
1487 // free the output value right now to prevent the internal data from
1488 // being freed later
1489 // as the internal data is now part of the bat we just created
1490 GDKfree(outputs[i]);
1491 outputs[i] = NULL;
1492
1493 // return the BAT from the function
1494 if (isaBatType(getArgType(mb, pci, i))) {
1495 *getArgReference_bat(stk, pci, i) = b->batCacheid;
1496 BBPkeepref(b->batCacheid);
1497 } else {
1498 BATiter li = bat_iterator(b);
1499 if (VALinit(&stk->stk[pci->argv[i]], bat_type,
1500 BUNtail(li, 0)) == NULL) {
1501 msg = createException(MAL, "cudf.eval", SQLSTATE(HY001) MAL_MALLOC_FAIL);
1502 }
1503 BBPunfix(b->batCacheid);
1504 }
1505 }
1506
1507wrapup:
1508 // cleanup
1509 // remove the signal handler, if any was set
1510 if (option_enable_mprotect) {
1511 if (sa.sa_sigaction) {
1512 (void) sigaction(SIGSEGV, &oldsa, NULL);
1513 (void) sigaction(SIGBUS, &oldsb, NULL);
1514
1515 sa = (struct sigaction) {.sa_flags = 0,};
1516 }
1517 // clear any mprotected regions
1518 while (regions) {
1519 mprotected_region *next = regions->next;
1520 clear_mprotect(regions->addr, regions->len);
1521 GDKfree(regions);
1522 regions = next;
1523 }
1524 }
1525 while (allocated_regions[tid]) {
1526 allocated_region *next = allocated_regions[tid]->next;
1527 GDKfree(allocated_regions[tid]);
1528 allocated_regions[tid] = next;
1529 }
1530 if (option_enable_mprotect) {
1531 // block segfaults and bus errors again after we exit
1532 (void)pthread_sigmask(SIG_BLOCK, &signal_set, NULL);
1533 }
1534 // argument names (input)
1535 if (args) {
1536 for (i = 0; i < (size_t)pci->argc; i++) {
1537 if (args[i]) {
1538 GDKfree(args[i]);
1539 }
1540 }
1541 GDKfree(args);
1542 }
1543 // output names
1544 if (output_names) {
1545 for (i = 0; i < (size_t)pci->retc; i++) {
1546 if (output_names[i]) {
1547 GDKfree(output_names[i]);
1548 }
1549 }
1550 GDKfree(output_names);
1551 }
1552 if (input_bats) {
1553 for(i = 0; i < input_count + extra_inputs; i++) {
1554 if (input_bats[i]) {
1555 BBPunfix(input_bats[i]->batCacheid);
1556 }
1557 }
1558 GDKfree(input_bats);
1559 }
1560 // input data
1561 if (inputs) {
1562 for (i = 0; i < (size_t)input_count + extra_inputs; i++) {
1563 if (inputs[i]) {
1564 if (isaBatType(getArgType(mb, pci, i))) {
1565 bat_type = getBatType(getArgType(mb, pci, i));
1566 }
1567 if (bat_type == TYPE_str || bat_type == TYPE_date ||
1568 bat_type == TYPE_daytime ||
1569 bat_type == TYPE_timestamp || bat_type == TYPE_blob) {
1570 // have to free input data
1571 void *data = GetTypeData(bat_type, inputs[i]);
1572 if (data) {
1573 GDKfree(data);
1574 }
1575 } else if (bat_type > TYPE_str) {
1576 // this type was converted to individually malloced
1577 // strings
1578 // we have to free all the individual strings
1579 char **data = (char **)GetTypeData(bat_type, inputs[i]);
1580 size_t count = GetTypeCount(bat_type, inputs[i]);
1581 for (j = 0; j < count; j++) {
1582 if (data[j]) {
1583 GDKfree(data[j]);
1584 }
1585 }
1586 if (data) {
1587 GDKfree(data);
1588 }
1589 }
1590 GDKfree(inputs[i]);
1591 }
1592 }
1593 GDKfree(inputs);
1594 }
1595 // output data
1596 if (outputs) {
1597 for (i = 0; i < (size_t)output_count; i++) {
1598 bat_type = isaBatType(getArgType(mb, pci, i))
1599 ? getBatType(getArgType(mb, pci, i))
1600 : getArgType(mb, pci, i);
1601 if (outputs[i]) {
1602 void* b = GetTypeBat(bat_type, outputs[i]);
1603 if (b) {
1604 BBPunfix(((BAT*)b)->batCacheid);
1605 } else {
1606 void *data = GetTypeData(bat_type, outputs[i]);
1607 if (data) {
1608 GDKfree(data);
1609 }
1610 }
1611 GDKfree(outputs[i]);
1612 }
1613 }
1614 GDKfree(outputs);
1615 }
1616 if (function_parameters) {
1617 GDKfree(function_parameters);
1618 }
1619 // close the file handle
1620 if (f) {
1621 fclose(f);
1622 }
1623 // close the dll
1624 if (handle) {
1625 dlclose(handle);
1626 }
1627 // close the compiler stream
1628 if (compiler) {
1629 pclose(compiler);
1630 }
1631 if (extra_cflags) {
1632 GDKfree(extra_cflags);
1633 }
1634 if (extra_ldflags) {
1635 GDKfree(extra_ldflags);
1636 }
1637 return msg;
1638}
1639
1640static const char *GetTypeName(int type)
1641{
1642 const char *tpe = NULL;
1643 if (type == TYPE_bit || type == TYPE_bte) {
1644 tpe = "bte";
1645 } else if (type == TYPE_sht) {
1646 tpe = "sht";
1647 } else if (type == TYPE_int) {
1648 tpe = "int";
1649 } else if (type == TYPE_oid) {
1650 tpe = "oid";
1651 } else if (type == TYPE_lng) {
1652 tpe = "lng";
1653 } else if (type == TYPE_flt) {
1654 tpe = "flt";
1655 } else if (type == TYPE_dbl) {
1656 tpe = "dbl";
1657 } else if (type == TYPE_str) {
1658 tpe = "str";
1659 } else if (type == TYPE_date) {
1660 tpe = "date";
1661 } else if (type == TYPE_daytime) {
1662 tpe = "time";
1663 } else if (type == TYPE_timestamp) {
1664 tpe = "timestamp";
1665 } else if (type == TYPE_blob) {
1666 tpe = "blob";
1667 } else {
1668 // unsupported type: string
1669 tpe = "str";
1670 }
1671 return tpe;
1672}
1673
1674void *GetTypeData(int type, void *struct_ptr)
1675{
1676 void *data = NULL;
1677
1678 if (type == TYPE_bit || type == TYPE_bte) {
1679 data = ((struct cudf_data_struct_bte *)struct_ptr)->data;
1680 } else if (type == TYPE_sht) {
1681 data = ((struct cudf_data_struct_sht *)struct_ptr)->data;
1682 } else if (type == TYPE_int) {
1683 data = ((struct cudf_data_struct_int *)struct_ptr)->data;
1684 } else if (type == TYPE_oid) {
1685 data = ((struct cudf_data_struct_oid *)struct_ptr)->data;
1686 } else if (type == TYPE_lng) {
1687 data = ((struct cudf_data_struct_lng *)struct_ptr)->data;
1688 } else if (type == TYPE_flt) {
1689 data = ((struct cudf_data_struct_flt *)struct_ptr)->data;
1690 } else if (type == TYPE_dbl) {
1691 data = ((struct cudf_data_struct_dbl *)struct_ptr)->data;
1692 } else if (type == TYPE_str) {
1693 data = ((struct cudf_data_struct_str *)struct_ptr)->data;
1694 } else if (type == TYPE_date) {
1695 data = ((struct cudf_data_struct_date *)struct_ptr)->data;
1696 } else if (type == TYPE_daytime) {
1697 data = ((struct cudf_data_struct_time *)struct_ptr)->data;
1698 } else if (type == TYPE_timestamp) {
1699 data = ((struct cudf_data_struct_timestamp *)struct_ptr)->data;
1700 } else if (type == TYPE_blob) {
1701 data = ((struct cudf_data_struct_blob *)struct_ptr)->data;
1702 } else {
1703 // unsupported type: string
1704 data = ((struct cudf_data_struct_str *)struct_ptr)->data;
1705 }
1706 return data;
1707}
1708
1709void *GetTypeBat(int type, void *struct_ptr)
1710{
1711 void *bat = NULL;
1712
1713 if (type == TYPE_bit || type == TYPE_bte) {
1714 bat = ((struct cudf_data_struct_bte *)struct_ptr)->bat;
1715 } else if (type == TYPE_sht) {
1716 bat = ((struct cudf_data_struct_sht *)struct_ptr)->bat;
1717 } else if (type == TYPE_int) {
1718 bat = ((struct cudf_data_struct_int *)struct_ptr)->bat;
1719 } else if (type == TYPE_oid) {
1720 bat = ((struct cudf_data_struct_oid *)struct_ptr)->bat;
1721 } else if (type == TYPE_lng) {
1722 bat = ((struct cudf_data_struct_lng *)struct_ptr)->bat;
1723 } else if (type == TYPE_flt) {
1724 bat = ((struct cudf_data_struct_flt *)struct_ptr)->bat;
1725 } else if (type == TYPE_dbl) {
1726 bat = ((struct cudf_data_struct_dbl *)struct_ptr)->bat;
1727 } else if (type == TYPE_str) {
1728 bat = ((struct cudf_data_struct_str *)struct_ptr)->bat;
1729 } else if (type == TYPE_date) {
1730 bat = ((struct cudf_data_struct_date *)struct_ptr)->bat;
1731 } else if (type == TYPE_daytime) {
1732 bat = ((struct cudf_data_struct_time *)struct_ptr)->bat;
1733 } else if (type == TYPE_timestamp) {
1734 bat = ((struct cudf_data_struct_timestamp *)struct_ptr)->bat;
1735 } else if (type == TYPE_blob) {
1736 bat = ((struct cudf_data_struct_blob *)struct_ptr)->bat;
1737 } else {
1738 // unsupported type: string
1739 bat = ((struct cudf_data_struct_str *)struct_ptr)->bat;
1740 }
1741 return bat;
1742}
1743
1744size_t GetTypeCount(int type, void *struct_ptr)
1745{
1746 size_t count = 0;
1747 if (type == TYPE_bit || type == TYPE_bte) {
1748 count = ((struct cudf_data_struct_bte *)struct_ptr)->count;
1749 } else if (type == TYPE_sht) {
1750 count = ((struct cudf_data_struct_sht *)struct_ptr)->count;
1751 } else if (type == TYPE_int) {
1752 count = ((struct cudf_data_struct_int *)struct_ptr)->count;
1753 } else if (type == TYPE_oid) {
1754 count = ((struct cudf_data_struct_oid *)struct_ptr)->count;
1755 } else if (type == TYPE_lng) {
1756 count = ((struct cudf_data_struct_lng *)struct_ptr)->count;
1757 } else if (type == TYPE_flt) {
1758 count = ((struct cudf_data_struct_flt *)struct_ptr)->count;
1759 } else if (type == TYPE_dbl) {
1760 count = ((struct cudf_data_struct_dbl *)struct_ptr)->count;
1761 } else if (type == TYPE_str) {
1762 count = ((struct cudf_data_struct_str *)struct_ptr)->count;
1763 } else if (type == TYPE_date) {
1764 count = ((struct cudf_data_struct_date *)struct_ptr)->count;
1765 } else if (type == TYPE_daytime) {
1766 count = ((struct cudf_data_struct_time *)struct_ptr)->count;
1767 } else if (type == TYPE_timestamp) {
1768 count = ((struct cudf_data_struct_timestamp *)struct_ptr)->count;
1769 } else if (type == TYPE_blob) {
1770 count = ((struct cudf_data_struct_blob *)struct_ptr)->count;
1771 } else {
1772 // unsupported type: string
1773 count = ((struct cudf_data_struct_str *)struct_ptr)->count;
1774 }
1775 return count;
1776}
1777
1778void data_from_date(date d, cudf_data_date *ptr)
1779{
1780 ptr->day = date_day(d);
1781 ptr->month = date_month(d);
1782 ptr->year = date_year(d);
1783}
1784
1785date date_from_data(cudf_data_date *ptr)
1786{
1787 return date_create(ptr->year, ptr->month, ptr->day);
1788}
1789
1790void data_from_time(daytime d, cudf_data_time *ptr)
1791{
1792 ptr->hours = daytime_hour(d);
1793 ptr->minutes = daytime_min(d);
1794 ptr->seconds = daytime_sec(d);
1795 ptr->ms = daytime_usec(d) / 1000;
1796}
1797
1798daytime time_from_data(cudf_data_time *ptr)
1799{
1800 return daytime_create(ptr->hours, ptr->minutes, ptr->seconds,
1801 ptr->ms * 1000);
1802}
1803
1804void data_from_timestamp(timestamp d, cudf_data_timestamp *ptr)
1805{
1806 daytime tm = timestamp_daytime(d);
1807 date dt = timestamp_date(d);
1808
1809 ptr->date.day = date_day(dt);
1810 ptr->date.month = date_month(dt);
1811 ptr->date.year = date_year(dt);
1812 ptr->time.hours = daytime_hour(tm);
1813 ptr->time.minutes = daytime_min(tm);
1814 ptr->time.seconds = daytime_sec(tm);
1815 ptr->time.ms = daytime_usec(tm) / 1000;
1816}
1817
1818timestamp timestamp_from_data(cudf_data_timestamp *ptr)
1819{
1820 return timestamp_create(date_create(ptr->date.year,
1821 ptr->date.month,
1822 ptr->date.day),
1823 daytime_create(ptr->time.hours,
1824 ptr->time.minutes,
1825 ptr->time.seconds,
1826 ptr->time.ms * 1000));
1827}
1828
1829int date_is_null(cudf_data_date value)
1830{
1831 cudf_data_date null_value;
1832 data_from_date(date_nil, &null_value);
1833 return value.year == null_value.year && value.month == null_value.month &&
1834 value.day == null_value.day;
1835}
1836
1837int time_is_null(cudf_data_time value)
1838{
1839 cudf_data_time null_value;
1840 data_from_time(daytime_nil, &null_value);
1841 return value.hours == null_value.hours &&
1842 value.minutes == null_value.minutes &&
1843 value.seconds == null_value.seconds && value.ms == null_value.ms;
1844}
1845
1846int timestamp_is_null(cudf_data_timestamp value)
1847{
1848 return is_timestamp_nil(timestamp_from_data(&value));
1849}
1850
1851int str_is_null(char *value) { return value == NULL; }
1852
1853int blob_is_null(cudf_data_blob value) { return value.size == ~(size_t) 0; }
1854
1855void blob_initialize(struct cudf_data_struct_blob *self,
1856 size_t count) {
1857 self->count = count;
1858 self->data = jump_GDK_malloc(count * sizeof(self->null_value));
1859 memset(self->data, 0, count * sizeof(self->null_value));
1860}
1861