| 1 | /* |
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public |
| 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 5 | * |
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
| 7 | */ |
| 8 | |
| 9 | #include "capi.h" |
| 10 | #include "cheader.h" |
| 11 | #include "cheader.text.h" |
| 12 | |
| 13 | #include "mtime.h" |
| 14 | #include "blob.h" |
| 15 | |
| 16 | #include <setjmp.h> |
| 17 | #include <signal.h> |
| 18 | #include <sys/mman.h> |
| 19 | #include <unistd.h> |
| 20 | #include <string.h> |
| 21 | |
| 22 | #if defined(__GNUC__) && !defined(__clang__) |
| 23 | #pragma GCC diagnostic ignored "-Wclobbered" |
| 24 | #endif |
| 25 | |
| 26 | const char *mprotect_enableflag = "enable_mprotect" ; |
| 27 | static bool option_enable_mprotect = false; |
| 28 | const char *longjmp_enableflag = "enable_longjmp" ; |
| 29 | static bool option_enable_longjmp = false; |
| 30 | |
| 31 | struct _allocated_region; |
| 32 | typedef struct _allocated_region { |
| 33 | struct _allocated_region *next; |
| 34 | } allocated_region; |
| 35 | |
| 36 | struct _mprotected_region; |
| 37 | typedef struct _mprotected_region { |
| 38 | void *addr; |
| 39 | size_t len; |
| 40 | |
| 41 | struct _mprotected_region *next; |
| 42 | } mprotected_region; |
| 43 | |
| 44 | static char *mprotect_region(void *addr, size_t len, |
| 45 | mprotected_region **regions); |
| 46 | static char *clear_mprotect(void *addr, size_t len); |
| 47 | |
| 48 | static allocated_region *allocated_regions[THREADS]; |
| 49 | static jmp_buf jump_buffer[THREADS]; |
| 50 | |
| 51 | typedef char *(*jitted_function)(void **inputs, void **outputs, |
| 52 | malloc_function_ptr malloc, free_function_ptr free); |
| 53 | |
| 54 | struct _cached_functions; |
| 55 | typedef struct _cached_functions { |
| 56 | jitted_function function; |
| 57 | BUN expression_hash; |
| 58 | char *parameters; |
| 59 | void *dll_handle; |
| 60 | struct _cached_functions *next; |
| 61 | } cached_functions; |
| 62 | |
| 63 | #define FUNCTION_CACHE_SIZE 128 |
| 64 | |
| 65 | static cached_functions *function_cache[FUNCTION_CACHE_SIZE]; |
| 66 | static MT_Lock cache_lock = MT_LOCK_INITIALIZER("cache_lock" ); |
| 67 | static int cudf_initialized = 0; |
| 68 | |
| 69 | static str CUDFeval(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, |
| 70 | bool grouped); |
| 71 | |
| 72 | str CUDFevalStd(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) |
| 73 | { |
| 74 | return CUDFeval(cntxt, mb, stk, pci, false); |
| 75 | } |
| 76 | |
| 77 | str CUDFevalAggr(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) |
| 78 | { |
| 79 | return CUDFeval(cntxt, mb, stk, pci, true); |
| 80 | } |
| 81 | |
| 82 | str CUDFprelude(void *ret) |
| 83 | { |
| 84 | (void)ret; |
| 85 | if (!cudf_initialized) { |
| 86 | cudf_initialized = true; |
| 87 | option_enable_mprotect = GDKgetenv_istrue(mprotect_enableflag) || GDKgetenv_isyes(mprotect_enableflag); |
| 88 | option_enable_longjmp = GDKgetenv_istrue(longjmp_enableflag) || GDKgetenv_isyes(longjmp_enableflag); |
| 89 | } |
| 90 | return MAL_SUCCEED; |
| 91 | } |
| 92 | |
| 93 | static bool WriteDataToFile(FILE *f, const void *data, size_t data_size) |
| 94 | { |
| 95 | fwrite(data, data_size, 1, f); |
| 96 | return (!ferror(f)); |
| 97 | } |
| 98 | |
| 99 | static bool WriteTextToFile(FILE *f, const char *data) |
| 100 | { |
| 101 | return WriteDataToFile(f, data, strlen(data)); |
| 102 | } |
| 103 | |
| 104 | static void handler(int sig, siginfo_t *si, void *unused) |
| 105 | { |
| 106 | int tid = THRgettid(); |
| 107 | |
| 108 | (void)sig; |
| 109 | (void)si; |
| 110 | (void)unused; |
| 111 | |
| 112 | longjmp(jump_buffer[tid], 1); |
| 113 | } |
| 114 | |
| 115 | static bool can_mprotect_region(void* addr) { |
| 116 | if (!option_enable_mprotect) return false; |
| 117 | int pagesize = getpagesize(); |
| 118 | void* page_begin = (void *)((size_t)addr - (size_t)addr % pagesize); |
| 119 | return page_begin == addr; |
| 120 | } |
| 121 | |
| 122 | static char *mprotect_region(void *addr, size_t len, |
| 123 | mprotected_region **regions) |
| 124 | { |
| 125 | mprotected_region *region; |
| 126 | if (len == 0) |
| 127 | return NULL; |
| 128 | |
| 129 | assert(can_mprotect_region(addr)); |
| 130 | |
| 131 | region = GDKmalloc(sizeof(mprotected_region)); |
| 132 | if (!region) { |
| 133 | return MAL_MALLOC_FAIL; |
| 134 | } |
| 135 | region->addr = addr; |
| 136 | region->len = len; |
| 137 | region->next = *regions; |
| 138 | *regions = region; |
| 139 | return NULL; |
| 140 | } |
| 141 | |
| 142 | static char *clear_mprotect(void *addr, size_t len) |
| 143 | { |
| 144 | if (!addr) |
| 145 | return NULL; |
| 146 | |
| 147 | if (mprotect(addr, len, PROT_READ | PROT_WRITE) < 0) { |
| 148 | return strerror(errno); |
| 149 | } |
| 150 | return NULL; |
| 151 | } |
| 152 | |
| 153 | #define ATTEMPT_TO_WRITE_TO_FILE(f, data) \ |
| 154 | if (!WriteTextToFile(f, data)) { \ |
| 155 | errno = 0; \ |
| 156 | msg = createException(MAL, "cudf.eval", "Write error."); \ |
| 157 | goto wrapup; \ |
| 158 | } |
| 159 | |
| 160 | #define ATTEMPT_TO_WRITE_DATA_TO_FILE(f, data, size) \ |
| 161 | if (!WriteDataToFile(f, data, size)) { \ |
| 162 | errno = 0; \ |
| 163 | msg = createException(MAL, "cudf.eval", "Write error."); \ |
| 164 | goto wrapup; \ |
| 165 | } |
| 166 | |
| 167 | static void *jump_GDK_malloc(size_t size) |
| 168 | { |
| 169 | if (size == 0) |
| 170 | return NULL; |
| 171 | void *ptr = GDKmalloc(size); |
| 172 | if (!ptr && option_enable_longjmp) { |
| 173 | longjmp(jump_buffer[THRgettid()], 2); |
| 174 | } |
| 175 | return ptr; |
| 176 | } |
| 177 | |
| 178 | static void *add_allocated_region(void *ptr) |
| 179 | { |
| 180 | allocated_region *region; |
| 181 | int tid = THRgettid(); |
| 182 | region = (allocated_region *)ptr; |
| 183 | region->next = allocated_regions[tid]; |
| 184 | allocated_regions[tid] = region; |
| 185 | return (char *)ptr + sizeof(allocated_region); |
| 186 | } |
| 187 | |
| 188 | static void *wrapped_GDK_malloc(size_t size) |
| 189 | { |
| 190 | if (size == 0) |
| 191 | return NULL; |
| 192 | void *ptr = jump_GDK_malloc(size + sizeof(allocated_region)); |
| 193 | return add_allocated_region(ptr); |
| 194 | } |
| 195 | |
| 196 | static void wrapped_GDK_free(void* ptr) { |
| 197 | (void) ptr; |
| 198 | return; |
| 199 | } |
| 200 | |
| 201 | static void *wrapped_GDK_malloc_nojump(size_t size) |
| 202 | { |
| 203 | if (size == 0) |
| 204 | return NULL; |
| 205 | void *ptr = GDKmalloc(size + sizeof(allocated_region)); |
| 206 | if (!ptr) { |
| 207 | return NULL; |
| 208 | } |
| 209 | return add_allocated_region(ptr); |
| 210 | } |
| 211 | |
| 212 | static void *wrapped_GDK_zalloc_nojump(size_t size) |
| 213 | { |
| 214 | if (size == 0) |
| 215 | return NULL; |
| 216 | void *ptr = GDKzalloc(size + sizeof(allocated_region)); |
| 217 | if (!ptr) { |
| 218 | return NULL; |
| 219 | } |
| 220 | return add_allocated_region(ptr); |
| 221 | } |
| 222 | |
| 223 | #define GENERATE_NUMERIC_IS_NULL(type, tpename) \ |
| 224 | static int tpename##_is_null(type value) { return is_##tpename##_nil(value); } |
| 225 | |
| 226 | #define GENERATE_NUMERIC_INITIALIZE(type, tpename) \ |
| 227 | static void tpename##_initialize(struct cudf_data_struct_##tpename *self, \ |
| 228 | size_t count) \ |
| 229 | { \ |
| 230 | BAT* b; \ |
| 231 | if (self->bat) { \ |
| 232 | BBPunfix(((BAT*)self->bat)->batCacheid); \ |
| 233 | self->bat = NULL; \ |
| 234 | } \ |
| 235 | b = COLnew(0, TYPE_##tpename, count, TRANSIENT); \ |
| 236 | if (!b) { \ |
| 237 | if (option_enable_longjmp) longjmp(jump_buffer[THRgettid()], 2); \ |
| 238 | else return; \ |
| 239 | } \ |
| 240 | self->bat = (void*) b; \ |
| 241 | self->count = count; \ |
| 242 | self->data = (type*) b->theap.base; \ |
| 243 | BATsetcount(b, count); \ |
| 244 | } |
| 245 | |
| 246 | #define GENERATE_NUMERIC_ALL(type, tpename) \ |
| 247 | GENERATE_NUMERIC_INITIALIZE(type, tpename) \ |
| 248 | GENERATE_NUMERIC_IS_NULL(type, tpename) |
| 249 | |
| 250 | |
| 251 | #define (type, tpename) \ |
| 252 | static int tpename##_is_null(type value); \ |
| 253 | static void tpename##_initialize(struct cudf_data_struct_##tpename *self, \ |
| 254 | size_t count) \ |
| 255 | { \ |
| 256 | self->count = count; \ |
| 257 | self->data = jump_GDK_malloc(count * sizeof(self->null_value)); \ |
| 258 | } |
| 259 | |
| 260 | GENERATE_NUMERIC_ALL(bit, bit); |
| 261 | GENERATE_NUMERIC_ALL(bte, bte); |
| 262 | GENERATE_NUMERIC_ALL(sht, sht); |
| 263 | GENERATE_NUMERIC_ALL(int, int); |
| 264 | GENERATE_NUMERIC_ALL(lng, lng); |
| 265 | GENERATE_NUMERIC_ALL(flt, flt); |
| 266 | GENERATE_NUMERIC_ALL(dbl, dbl); |
| 267 | GENERATE_NUMERIC_ALL(oid, oid); |
| 268 | |
| 269 | GENERATE_BASE_HEADERS(char *, str); |
| 270 | GENERATE_BASE_HEADERS(cudf_data_date, date); |
| 271 | GENERATE_BASE_HEADERS(cudf_data_time, time); |
| 272 | GENERATE_BASE_HEADERS(cudf_data_timestamp, timestamp); |
| 273 | static int blob_is_null(cudf_data_blob value); |
| 274 | static void blob_initialize(struct cudf_data_struct_blob *self, |
| 275 | size_t count); |
| 276 | |
| 277 | #define GENERATE_BAT_INPUT_BASE(tpe) \ |
| 278 | struct cudf_data_struct_##tpe *bat_data = \ |
| 279 | GDKzalloc(sizeof(struct cudf_data_struct_##tpe)); \ |
| 280 | if (!bat_data) { \ |
| 281 | msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); \ |
| 282 | goto wrapup; \ |
| 283 | } \ |
| 284 | inputs[index] = bat_data; \ |
| 285 | bat_data->is_null = tpe##_is_null; \ |
| 286 | bat_data->scale = \ |
| 287 | argnode ? pow(10, ((sql_arg *)argnode->data)->type.scale) : 1; \ |
| 288 | bat_data->bat = NULL; \ |
| 289 | bat_data->initialize = (void (*)(void *, size_t))tpe##_initialize; |
| 290 | |
| 291 | #define GENERATE_BAT_INPUT(b, tpe) \ |
| 292 | { \ |
| 293 | char *mprotect_retval; \ |
| 294 | GENERATE_BAT_INPUT_BASE(tpe); \ |
| 295 | bat_data->count = BATcount(b); \ |
| 296 | bat_data->null_value = tpe##_nil; \ |
| 297 | if (BATtdense(b)) { \ |
| 298 | size_t it = 0; \ |
| 299 | tpe val = b->tseqbase; \ |
| 300 | /* bat is dense, materialize it */ \ |
| 301 | bat_data->data = wrapped_GDK_malloc_nojump( \ |
| 302 | bat_data->count * sizeof(bat_data->null_value)); \ |
| 303 | if (!bat_data->data) { \ |
| 304 | msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); \ |
| 305 | goto wrapup; \ |
| 306 | } \ |
| 307 | for (it = 0; it < bat_data->count; it++) { \ |
| 308 | bat_data->data[it] = val++; \ |
| 309 | } \ |
| 310 | } else if (can_mprotect_region(Tloc(b, 0))) { \ |
| 311 | bat_data->data = (tpe *)Tloc(b, 0); \ |
| 312 | mprotect_retval = mprotect_region( \ |
| 313 | bat_data->data, \ |
| 314 | bat_data->count * sizeof(bat_data->null_value), ®ions); \ |
| 315 | if (mprotect_retval) { \ |
| 316 | msg = createException(MAL, "cudf.eval", \ |
| 317 | "Failed to mprotect region: %s", \ |
| 318 | mprotect_retval); \ |
| 319 | goto wrapup; \ |
| 320 | } \ |
| 321 | } else { \ |
| 322 | /* cannot mprotect bat region, copy data */ \ |
| 323 | bat_data->data = wrapped_GDK_malloc_nojump( \ |
| 324 | bat_data->count * sizeof(bat_data->null_value)); \ |
| 325 | if (bat_data->count > 0 && !bat_data->data) { \ |
| 326 | msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); \ |
| 327 | goto wrapup; \ |
| 328 | } \ |
| 329 | memcpy(bat_data->data, Tloc(b, 0), \ |
| 330 | bat_data->count * sizeof(bat_data->null_value)); \ |
| 331 | } \ |
| 332 | } |
| 333 | |
| 334 | #define GENERATE_BAT_OUTPUT_BASE(tpe) \ |
| 335 | struct cudf_data_struct_##tpe *bat_data = \ |
| 336 | GDKzalloc(sizeof(struct cudf_data_struct_##tpe)); \ |
| 337 | if (!bat_data) { \ |
| 338 | msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); \ |
| 339 | goto wrapup; \ |
| 340 | } \ |
| 341 | outputs[index] = bat_data; \ |
| 342 | bat_data->count = 0; \ |
| 343 | bat_data->data = NULL; \ |
| 344 | bat_data->is_null = tpe##_is_null; \ |
| 345 | bat_data->scale = \ |
| 346 | argnode ? pow(10, ((sql_arg *)argnode->data)->type.scale) : 1; \ |
| 347 | bat_data->initialize = (void (*)(void *, size_t))tpe##_initialize; |
| 348 | |
| 349 | #define GENERATE_BAT_OUTPUT(tpe) \ |
| 350 | { \ |
| 351 | GENERATE_BAT_OUTPUT_BASE(tpe); \ |
| 352 | bat_data->null_value = tpe##_nil; \ |
| 353 | } |
| 354 | |
| 355 | const char *debug_flag = "capi_use_debug" ; |
| 356 | const char *cc_flag = "capi_cc" ; |
| 357 | const char *cpp_flag = "capi_cpp" ; |
| 358 | |
| 359 | const char *cflags_pragma = "#pragma CFLAGS " ; |
| 360 | const char *ldflags_pragma = "#pragma LDFLAGS " ; |
| 361 | |
| 362 | #define JIT_COMPILER_NAME "cc" |
| 363 | #define JIT_CPP_COMPILER_NAME "c++" |
| 364 | |
| 365 | static size_t GetTypeCount(int type, void *struct_ptr); |
| 366 | static void *GetTypeData(int type, void *struct_ptr); |
| 367 | static void *GetTypeBat(int type, void *struct_ptr); |
| 368 | static const char *GetTypeName(int type); |
| 369 | |
| 370 | static void data_from_date(date d, cudf_data_date *ptr); |
| 371 | static date date_from_data(cudf_data_date *ptr); |
| 372 | static void data_from_time(daytime d, cudf_data_time *ptr); |
| 373 | static daytime time_from_data(cudf_data_time *ptr); |
| 374 | static void data_from_timestamp(timestamp d, cudf_data_timestamp *ptr); |
| 375 | static timestamp timestamp_from_data(cudf_data_timestamp *ptr); |
| 376 | |
| 377 | static char valid_path_characters[] = "abcdefghijklmnopqrstuvwxyz" ; |
| 378 | |
| 379 | static str CUDFeval(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, |
| 380 | bool grouped) |
| 381 | { |
| 382 | sql_func *sqlfun = NULL; |
| 383 | bit use_cpp = *getArgReference_bit(stk, pci, pci->retc + 1); |
| 384 | str exprStr = *getArgReference_str(stk, pci, pci->retc + 2); |
| 385 | |
| 386 | const int ARG_OFFSET = 3; |
| 387 | |
| 388 | size_t i = 0, j = 0; |
| 389 | char argbuf[64]; |
| 390 | char buf[8192]; |
| 391 | char fname[BUFSIZ]; |
| 392 | char oname[BUFSIZ]; |
| 393 | char libname[BUFSIZ]; |
| 394 | char error_buf[BUFSIZ]; |
| 395 | char total_error_buf[8192]; |
| 396 | size_t error_buffer_position = 0; |
| 397 | str *args = NULL; |
| 398 | str *output_names = NULL; |
| 399 | char *msg = MAL_SUCCEED; |
| 400 | node *argnode; |
| 401 | int seengrp = FALSE; |
| 402 | FILE *f = NULL; |
| 403 | void *handle = NULL; |
| 404 | jitted_function volatile func = NULL; |
| 405 | int ret; |
| 406 | |
| 407 | FILE *compiler = NULL; |
| 408 | int compiler_return_code; |
| 409 | |
| 410 | void ** volatile inputs = NULL; |
| 411 | size_t volatile input_count = 0; |
| 412 | void ** volatile outputs = NULL; |
| 413 | size_t volatile output_count = 0; |
| 414 | BAT ** volatile input_bats = NULL; |
| 415 | mprotected_region *regions = NULL, *region_iter = NULL; |
| 416 | |
| 417 | lng initial_output_count = -1; |
| 418 | |
| 419 | struct sigaction sa, oldsa, oldsb; |
| 420 | sigset_t signal_set; |
| 421 | |
| 422 | #ifdef NDEBUG |
| 423 | bool debug_build = |
| 424 | GDKgetenv_istrue(debug_flag) || GDKgetenv_isyes(debug_flag); |
| 425 | #else |
| 426 | bool debug_build = true; |
| 427 | #endif |
| 428 | char* = NULL; |
| 429 | char* = NULL; |
| 430 | |
| 431 | |
| 432 | const char *compilation_flags = debug_build ? "-g -O0" : "-O2" ; |
| 433 | const char *c_compiler = |
| 434 | use_cpp ? (GDKgetenv(cpp_flag) ? GDKgetenv(cpp_flag) |
| 435 | : JIT_CPP_COMPILER_NAME) |
| 436 | : (GDKgetenv(cc_flag) ? GDKgetenv(cc_flag) : JIT_COMPILER_NAME); |
| 437 | |
| 438 | const char *struct_prefix = "struct cudf_data_struct_" ; |
| 439 | const char *funcname; |
| 440 | |
| 441 | BUN expression_hash = 0, funcname_hash = 0; |
| 442 | cached_functions *cached_function; |
| 443 | char* volatile function_parameters = NULL; |
| 444 | int tid = THRgettid(); |
| 445 | size_t input_size = 0; |
| 446 | bit non_grouped_aggregate = 0; |
| 447 | |
| 448 | size_t index = 0; |
| 449 | int bat_type = 0; |
| 450 | const char* tpe = NULL; |
| 451 | |
| 452 | size_t volatile = 0; |
| 453 | |
| 454 | (void)cntxt; |
| 455 | |
| 456 | allocated_regions[tid] = NULL; |
| 457 | |
| 458 | if (!GDKgetenv_istrue("embedded_c" ) && !GDKgetenv_isyes("embedded_c" )) |
| 459 | throw(MAL, "cudf.eval" , "Embedded C has not been enabled. " |
| 460 | "Start server with --set embedded_c=true" ); |
| 461 | |
| 462 | // we need to be able to catch segfaults and bus errors |
| 463 | // so we can work with mprotect to prevent UDFs from changing |
| 464 | // the input data |
| 465 | |
| 466 | // we remove them from the pthread_sigmask |
| 467 | if (option_enable_mprotect) { |
| 468 | (void)sigemptyset(&signal_set); |
| 469 | (void)sigaddset(&signal_set, SIGSEGV); |
| 470 | (void)sigaddset(&signal_set, SIGBUS); |
| 471 | (void)pthread_sigmask(SIG_UNBLOCK, &signal_set, NULL); |
| 472 | |
| 473 | sa = (struct sigaction) {.sa_flags = 0,}; |
| 474 | } |
| 475 | |
| 476 | if (!grouped) { |
| 477 | sql_subfunc *sqlmorefun = |
| 478 | (*(sql_subfunc **)getArgReference_ptr(stk, pci, pci->retc)); |
| 479 | if (sqlmorefun) |
| 480 | sqlfun = |
| 481 | (*(sql_subfunc **)getArgReference_ptr(stk, pci, pci->retc))->func; |
| 482 | } else { |
| 483 | sqlfun = *(sql_func **)getArgReference_ptr(stk, pci, pci->retc); |
| 484 | } |
| 485 | |
| 486 | funcname = sqlfun ? sqlfun->base.name : "yet_another_c_function" ; |
| 487 | |
| 488 | args = (str *)GDKzalloc(sizeof(str) * pci->argc); |
| 489 | output_names = (str *)GDKzalloc(sizeof(str) * pci->argc); |
| 490 | if (!args || !output_names) { |
| 491 | throw(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 492 | } |
| 493 | |
| 494 | // retrieve the argument names from the sqlfun structure |
| 495 | // first argument after the return contains the pointer to the sql_func |
| 496 | // structure |
| 497 | if (sqlfun != NULL) { |
| 498 | // retrieve the argument names (inputs) |
| 499 | if (sqlfun->ops->cnt > 0) { |
| 500 | int carg = pci->retc + ARG_OFFSET; |
| 501 | argnode = sqlfun->ops->h; |
| 502 | while (argnode) { |
| 503 | char *argname = ((sql_arg *)argnode->data)->name; |
| 504 | args[carg] = GDKstrdup(argname); |
| 505 | if (!args[carg]) { |
| 506 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 507 | goto wrapup; |
| 508 | } |
| 509 | carg++; |
| 510 | argnode = argnode->next; |
| 511 | } |
| 512 | } |
| 513 | // retrieve the output names |
| 514 | argnode = sqlfun->res->h; |
| 515 | for (i = 0; i < (size_t)sqlfun->res->cnt; i++) { |
| 516 | output_names[i] = GDKstrdup(((sql_arg *)argnode->data)->name); |
| 517 | argnode = argnode->next; |
| 518 | } |
| 519 | } |
| 520 | |
| 521 | // name unnamed outputs |
| 522 | for (i = 0; i < (size_t)pci->retc; i++) { |
| 523 | if (!output_names[i]) { |
| 524 | if (pci->retc > 1) { |
| 525 | snprintf(argbuf, sizeof(argbuf), "output%zu" , i); |
| 526 | } else { |
| 527 | // just call it "output" if there is only one |
| 528 | snprintf(argbuf, sizeof(argbuf), "output" ); |
| 529 | } |
| 530 | output_names[i] = GDKstrdup(argbuf); |
| 531 | } |
| 532 | } |
| 533 | // the first unknown argument is the group, we don't really care for the |
| 534 | // rest. |
| 535 | for (i = pci->retc + ARG_OFFSET; i < (size_t)pci->argc; i++) { |
| 536 | if (args[i] == NULL) { |
| 537 | if (!seengrp && grouped) { |
| 538 | args[i] = GDKstrdup("aggr_group" ); |
| 539 | if (!args[i]) { |
| 540 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 541 | goto wrapup; |
| 542 | } |
| 543 | seengrp = TRUE; |
| 544 | } else { |
| 545 | snprintf(argbuf, sizeof(argbuf), "arg%zu" , i - pci->retc - 1); |
| 546 | args[i] = GDKstrdup(argbuf); |
| 547 | if (!args[i]) { |
| 548 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 549 | goto wrapup; |
| 550 | } |
| 551 | } |
| 552 | } |
| 553 | } |
| 554 | // non-grouped aggregates don't have the group list |
| 555 | // to allow users to write code for both grouped and non-grouped aggregates |
| 556 | // we create an "aggr_group" BAT for non-grouped aggregates |
| 557 | non_grouped_aggregate = grouped && !seengrp; |
| 558 | |
| 559 | input_count = pci->argc - (pci->retc + ARG_OFFSET); |
| 560 | output_count = pci->retc; |
| 561 | |
| 562 | // begin the compilation phase |
| 563 | // first look up if we have already compiled this function |
| 564 | expression_hash = 0; |
| 565 | GDK_STRHASH(exprStr, expression_hash); |
| 566 | GDK_STRHASH(funcname, funcname_hash); |
| 567 | funcname_hash = funcname_hash % FUNCTION_CACHE_SIZE; |
| 568 | j = 0; |
| 569 | for (i = 0; i < (size_t)pci->argc; i++) { |
| 570 | if (args[i]) { |
| 571 | j += strlen(args[i]); |
| 572 | } |
| 573 | if (output_names[i]) { |
| 574 | j += strlen(output_names[i]); |
| 575 | } |
| 576 | } |
| 577 | |
| 578 | function_parameters = |
| 579 | GDKzalloc((j + input_count + output_count + 1) * sizeof(char)); |
| 580 | if (!function_parameters) { |
| 581 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 582 | goto wrapup; |
| 583 | } |
| 584 | for (i = 0; i < input_count; i++) { |
| 585 | if (!isaBatType(getArgType(mb, pci, i))) { |
| 586 | function_parameters[i] = getArgType(mb, pci, i); |
| 587 | } else { |
| 588 | function_parameters[i] = getBatType(getArgType(mb, pci, i)); |
| 589 | } |
| 590 | } |
| 591 | for (i = 0; i < output_count; i++) { |
| 592 | if (!isaBatType(getArgType(mb, pci, i))) { |
| 593 | function_parameters[input_count + i] = getArgType(mb, pci, i); |
| 594 | } else { |
| 595 | function_parameters[input_count + i] = |
| 596 | getBatType(getArgType(mb, pci, i)); |
| 597 | } |
| 598 | } |
| 599 | j = input_count + output_count; |
| 600 | for (i = 0; i < (size_t)pci->argc; i++) { |
| 601 | if (args[i]) { |
| 602 | size_t len = strlen(args[i]); |
| 603 | memcpy(function_parameters + j, args[i], len); |
| 604 | j += len; |
| 605 | } |
| 606 | if (output_names[i]) { |
| 607 | size_t len = strlen(output_names[i]); |
| 608 | memcpy(function_parameters + j, output_names[i], len); |
| 609 | j += len; |
| 610 | } |
| 611 | } |
| 612 | |
| 613 | MT_lock_set(&cache_lock); |
| 614 | cached_function = function_cache[funcname_hash]; |
| 615 | while (cached_function) { |
| 616 | if (cached_function->expression_hash == expression_hash && |
| 617 | strcmp(cached_function->parameters, function_parameters) == 0) { |
| 618 | // this function matches our compiled function |
| 619 | // in both source code and parameters |
| 620 | // use the already compiled function instead of recompiling |
| 621 | func = cached_function->function; |
| 622 | break; |
| 623 | } |
| 624 | cached_function = cached_function->next; |
| 625 | } |
| 626 | MT_lock_unset(&cache_lock); |
| 627 | |
| 628 | if (!func) { |
| 629 | // function was not found in the cache |
| 630 | // we have to compile it |
| 631 | |
| 632 | // first generate the names of the files |
| 633 | // we place the temporary files in the DELDIR directory |
| 634 | // because this will be removed again upon server startup |
| 635 | const int RANDOM_NAME_SIZE = 32; |
| 636 | char *path = NULL; |
| 637 | const char *prefix = TEMPDIR_NAME DIR_SEP_STR; |
| 638 | size_t prefix_size = strlen(prefix); |
| 639 | char *deldirpath; |
| 640 | |
| 641 | memcpy(buf, prefix, sizeof(char) * strlen(prefix)); |
| 642 | // generate a random 32-character name for the temporary files |
| 643 | for (i = prefix_size; i < prefix_size + RANDOM_NAME_SIZE; i++) { |
| 644 | buf[i] = valid_path_characters[rand() % |
| 645 | (sizeof(valid_path_characters) - 1)]; |
| 646 | } |
| 647 | buf[i] = '\0'; |
| 648 | path = GDKfilepath(0, BATDIR, buf, "c" ); |
| 649 | if (!path) { |
| 650 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 651 | goto wrapup; |
| 652 | } |
| 653 | strcpy(fname, path); |
| 654 | strcpy(oname, fname); |
| 655 | oname[strlen(oname) - 1] = 'o'; |
| 656 | GDKfree(path); |
| 657 | |
| 658 | memmove(buf + strlen(SO_PREFIX) + prefix_size, buf + prefix_size, |
| 659 | i + 1 - prefix_size); |
| 660 | memcpy(buf + prefix_size, SO_PREFIX, sizeof(char) * strlen(SO_PREFIX)); |
| 661 | path = |
| 662 | GDKfilepath(0, BATDIR, buf, SO_EXT[0] == '.' ? &SO_EXT[1] : SO_EXT); |
| 663 | if (!path) { |
| 664 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 665 | goto wrapup; |
| 666 | } |
| 667 | strcpy(libname, path); |
| 668 | GDKfree(path); |
| 669 | |
| 670 | // if DELDIR directory does not exist, create it |
| 671 | deldirpath = GDKfilepath(0, NULL, TEMPDIR, NULL); |
| 672 | if (!deldirpath) { |
| 673 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 674 | goto wrapup; |
| 675 | } |
| 676 | if (mkdir(deldirpath, 0777) < 0 && errno != EEXIST) { |
| 677 | msg = createException(MAL, "cudf.eval" , |
| 678 | "cannot create directory %s\n" , deldirpath); |
| 679 | goto wrapup; |
| 680 | } |
| 681 | GDKfree(deldirpath); |
| 682 | |
| 683 | // now generate the source file |
| 684 | f = fopen(fname, "w+" ); |
| 685 | if (!f) { |
| 686 | msg = createException(MAL, "cudf.eval" , |
| 687 | "Failed to open file for JIT compilation: %s" , |
| 688 | strerror(errno)); |
| 689 | errno = 0; |
| 690 | goto wrapup; |
| 691 | } |
| 692 | |
| 693 | // include some standard C headers first |
| 694 | ATTEMPT_TO_WRITE_TO_FILE(f, "#include <stdio.h>\n" ); |
| 695 | ATTEMPT_TO_WRITE_TO_FILE(f, "#include <stdlib.h>\n" ); |
| 696 | // we include "cheader.h", but not directly to avoid having to deal with |
| 697 | // headers, etc... |
| 698 | // Instead it is embedded in a string (loaded from "cheader.text.h") |
| 699 | // this file contains the structures used for input/output arguments |
| 700 | ATTEMPT_TO_WRITE_TO_FILE(f, cheader_header_text); |
| 701 | // some monetdb-style typedefs to make it easier |
| 702 | ATTEMPT_TO_WRITE_TO_FILE(f, "typedef int8_t bte;\n" ); |
| 703 | ATTEMPT_TO_WRITE_TO_FILE(f, "typedef int16_t sht;\n" ); |
| 704 | ATTEMPT_TO_WRITE_TO_FILE(f, "typedef int64_t lng;\n" ); |
| 705 | ATTEMPT_TO_WRITE_TO_FILE(f, "typedef float flt;\n" ); |
| 706 | ATTEMPT_TO_WRITE_TO_FILE(f, "typedef double dbl;\n" ); |
| 707 | ATTEMPT_TO_WRITE_TO_FILE(f, "typedef char* str;\n" ); |
| 708 | ATTEMPT_TO_WRITE_TO_FILE(f, "typedef size_t oid;\n" ); |
| 709 | // now we search exprStr for any preprocessor directives (#) |
| 710 | // we move these to the top of the file |
| 711 | // this allows the user to normally #include files |
| 712 | { |
| 713 | int preprocessor_start = 0; |
| 714 | bool is_preprocessor_directive = false; |
| 715 | bool new_line = false; |
| 716 | for (i = 0; i < strlen(exprStr); i++) { |
| 717 | if (exprStr[i] == '\n') { |
| 718 | if (is_preprocessor_directive) { |
| 719 | // the previous line was a preprocessor directive |
| 720 | // first check if it is one of our special preprocessor directives |
| 721 | if (i - preprocessor_start >= strlen(cflags_pragma) && |
| 722 | memcmp(exprStr + preprocessor_start, cflags_pragma, strlen(cflags_pragma)) == 0) { |
| 723 | size_t cflags_characters = (i - preprocessor_start) - strlen(cflags_pragma); |
| 724 | if (cflags_characters > 0 && !extra_cflags) { |
| 725 | extra_cflags = GDKzalloc(cflags_characters + 1); |
| 726 | if (extra_cflags) { |
| 727 | memcpy(extra_cflags, exprStr + preprocessor_start + strlen(cflags_pragma), cflags_characters); |
| 728 | } |
| 729 | } |
| 730 | } else if (i - preprocessor_start >= strlen(ldflags_pragma) && |
| 731 | memcmp(exprStr + preprocessor_start, ldflags_pragma, strlen(ldflags_pragma)) == 0) { |
| 732 | size_t ldflags_characters = (i - preprocessor_start) - strlen(ldflags_pragma); |
| 733 | if (ldflags_characters > 0 && !extra_ldflags) { |
| 734 | extra_ldflags = GDKzalloc(ldflags_characters + 1); |
| 735 | if (extra_ldflags) { |
| 736 | memcpy(extra_ldflags, exprStr + preprocessor_start + strlen(ldflags_pragma), ldflags_characters); |
| 737 | } |
| 738 | } |
| 739 | } else { |
| 740 | // regular preprocessor directive: write it to the file |
| 741 | ATTEMPT_TO_WRITE_DATA_TO_FILE(f, exprStr + |
| 742 | preprocessor_start, |
| 743 | i - preprocessor_start); |
| 744 | ATTEMPT_TO_WRITE_TO_FILE(f, "\n" ); |
| 745 | } |
| 746 | // now overwrite the preprocessor directive in the |
| 747 | // expression string with spaces |
| 748 | for (j = preprocessor_start; j < i; j++) { |
| 749 | exprStr[j] = ' '; |
| 750 | } |
| 751 | } |
| 752 | is_preprocessor_directive = false; |
| 753 | new_line = true; |
| 754 | } else if (exprStr[i] == ' ' || exprStr[i] == '\t') { |
| 755 | // skip any spaces |
| 756 | continue; |
| 757 | } else if (new_line) { |
| 758 | if (exprStr[i] == '#') { |
| 759 | preprocessor_start = i; |
| 760 | is_preprocessor_directive = true; |
| 761 | } |
| 762 | new_line = false; |
| 763 | } |
| 764 | } |
| 765 | } |
| 766 | |
| 767 | // create the actual function |
| 768 | if (use_cpp) { |
| 769 | // avoid name wrangling if we are compiling C++ code |
| 770 | ATTEMPT_TO_WRITE_TO_FILE(f, "\nextern \"C\"" ); |
| 771 | } |
| 772 | ATTEMPT_TO_WRITE_TO_FILE(f, "\nchar* " ); |
| 773 | ATTEMPT_TO_WRITE_TO_FILE(f, funcname); |
| 774 | ATTEMPT_TO_WRITE_TO_FILE(f, "(void** __inputs, void** __outputs, " |
| 775 | "malloc_function_ptr malloc, free_function_ptr free) {\n" ); |
| 776 | |
| 777 | // now we convert the input arguments from void** to the proper |
| 778 | // input/output |
| 779 | // of the function |
| 780 | // first convert the input |
| 781 | for (i = pci->retc + ARG_OFFSET; i < (size_t)pci->argc; i++) { |
| 782 | bat_type = !isaBatType(getArgType(mb, pci, i)) |
| 783 | ? getArgType(mb, pci, i) |
| 784 | : getBatType(getArgType(mb, pci, i)); |
| 785 | tpe = GetTypeName(bat_type); |
| 786 | assert(tpe); |
| 787 | if (tpe) { |
| 788 | snprintf(buf, sizeof(buf), |
| 789 | "\t%s%s %s = *((%s%s*)__inputs[%zu]);\n" , struct_prefix, |
| 790 | tpe, args[i], struct_prefix, tpe, |
| 791 | i - (pci->retc + ARG_OFFSET)); |
| 792 | ATTEMPT_TO_WRITE_TO_FILE(f, buf); |
| 793 | } |
| 794 | } |
| 795 | if (non_grouped_aggregate) { |
| 796 | // manually add "aggr_group" for non-grouped aggregates |
| 797 | bat_type = TYPE_oid; |
| 798 | tpe = GetTypeName(bat_type); |
| 799 | assert(tpe); |
| 800 | if (tpe) { |
| 801 | snprintf(buf, sizeof(buf), |
| 802 | "\t%s%s %s = *((%s%s*)__inputs[%zu]);\n" , struct_prefix, |
| 803 | tpe, "aggr_group" , struct_prefix, tpe, input_count); |
| 804 | ATTEMPT_TO_WRITE_TO_FILE(f, buf); |
| 805 | } |
| 806 | } |
| 807 | // output types |
| 808 | for (i = 0; i < (size_t)pci->retc; i++) { |
| 809 | bat_type = getBatType(getArgType(mb, pci, i)); |
| 810 | tpe = GetTypeName(bat_type); |
| 811 | assert(tpe); |
| 812 | if (tpe) { |
| 813 | snprintf(buf, sizeof(buf), |
| 814 | "\t%s%s* %s = ((%s%s*)__outputs[%zu]);\n" , struct_prefix, |
| 815 | tpe, output_names[i], struct_prefix, tpe, i); |
| 816 | ATTEMPT_TO_WRITE_TO_FILE(f, buf); |
| 817 | } |
| 818 | } |
| 819 | |
| 820 | ATTEMPT_TO_WRITE_TO_FILE(f, "\n" ); |
| 821 | // write the actual user defined code into the file |
| 822 | ATTEMPT_TO_WRITE_TO_FILE(f, exprStr); |
| 823 | |
| 824 | ATTEMPT_TO_WRITE_TO_FILE(f, "\nreturn 0;\n}\n" ); |
| 825 | |
| 826 | fclose(f); |
| 827 | f = NULL; |
| 828 | |
| 829 | // now it's time to try to compile the code |
| 830 | // we use popen to capture any error output |
| 831 | snprintf(buf, sizeof(buf), "%s %s -c -fPIC %s %s -o %s 2>&1 >/dev/null" , |
| 832 | c_compiler, extra_cflags ? extra_cflags : "" , compilation_flags, fname, oname); |
| 833 | compiler = popen(buf, "r" ); |
| 834 | if (!compiler) { |
| 835 | msg = createException(MAL, "cudf.eval" , "Failed popen" ); |
| 836 | goto wrapup; |
| 837 | } |
| 838 | // read the error stream into the error buffer until the compiler is |
| 839 | // done |
| 840 | while (fgets(error_buf, sizeof(error_buf) - 1, compiler)) { |
| 841 | size_t error_size = strlen(error_buf); |
| 842 | snprintf(total_error_buf + error_buffer_position, |
| 843 | sizeof(total_error_buf) - error_buffer_position - 1, "%s" , |
| 844 | error_buf); |
| 845 | error_buffer_position += error_size; |
| 846 | if (error_buffer_position >= sizeof(total_error_buf)) break; |
| 847 | } |
| 848 | |
| 849 | compiler_return_code = pclose(compiler); |
| 850 | compiler = NULL; |
| 851 | |
| 852 | if (compiler_return_code != 0) { |
| 853 | // failure in compiling the code |
| 854 | // report the failure to the user |
| 855 | msg = createException(MAL, "cudf.eval" , |
| 856 | "Failed to compile C UDF:\n%s" , |
| 857 | total_error_buf); |
| 858 | goto wrapup; |
| 859 | } |
| 860 | |
| 861 | error_buffer_position = 0; |
| 862 | error_buf[0] = '\0'; |
| 863 | |
| 864 | snprintf(buf, sizeof(buf), "%s %s %s -shared -o %s 2>&1 >/dev/null" , c_compiler, |
| 865 | extra_ldflags ? extra_ldflags : "" , oname, libname); |
| 866 | compiler = popen(buf, "r" ); |
| 867 | if (!compiler) { |
| 868 | msg = createException(MAL, "cudf.eval" , "Failed popen" ); |
| 869 | goto wrapup; |
| 870 | } |
| 871 | while (fgets(error_buf, sizeof(error_buf) - 1, compiler)) { |
| 872 | size_t error_size = strlen(error_buf); |
| 873 | snprintf(total_error_buf + error_buffer_position, |
| 874 | sizeof(total_error_buf) - error_buffer_position - 1, "%s" , |
| 875 | error_buf); |
| 876 | error_buffer_position += error_size; |
| 877 | if (error_buffer_position >= sizeof(total_error_buf)) break; |
| 878 | } |
| 879 | |
| 880 | compiler_return_code = pclose(compiler); |
| 881 | compiler = NULL; |
| 882 | |
| 883 | if (compiler_return_code != 0) { |
| 884 | // failure in compiler |
| 885 | msg = createException(MAL, "cudf.eval" , "Failed to link C UDF.\n%s" , |
| 886 | total_error_buf); |
| 887 | goto wrapup; |
| 888 | } |
| 889 | |
| 890 | handle = dlopen(libname, RTLD_LAZY); |
| 891 | if (!handle) { |
| 892 | msg = createException(MAL, "cudf.eval" , |
| 893 | "Failed to open shared library: %s." , |
| 894 | dlerror()); |
| 895 | goto wrapup; |
| 896 | } |
| 897 | func = (jitted_function)dlsym(handle, funcname); |
| 898 | if (!func) { |
| 899 | msg = createException(MAL, "cudf.eval" , |
| 900 | "Failed to load function from library: %s." , |
| 901 | dlerror()); |
| 902 | goto wrapup; |
| 903 | } |
| 904 | // now that we have compiled this function |
| 905 | // store it in our function cache |
| 906 | { |
| 907 | cached_functions *new_entry = GDKmalloc(sizeof(cached_functions)); |
| 908 | if (!new_entry) { |
| 909 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 910 | goto wrapup; |
| 911 | } |
| 912 | new_entry->function = func; |
| 913 | new_entry->expression_hash = expression_hash; |
| 914 | new_entry->parameters = function_parameters; |
| 915 | new_entry->dll_handle = handle; |
| 916 | function_parameters = NULL; |
| 917 | handle = NULL; |
| 918 | MT_lock_set(&cache_lock); |
| 919 | new_entry->next = function_cache[funcname_hash]; |
| 920 | function_cache[funcname_hash] = new_entry; |
| 921 | MT_lock_unset(&cache_lock); |
| 922 | } |
| 923 | } |
| 924 | if (input_count > 0) { |
| 925 | // add "aggr_group" for non-grouped aggregates |
| 926 | extra_inputs = non_grouped_aggregate ? 1 : 0; |
| 927 | input_bats = GDKzalloc(sizeof(BAT *) * (input_count + extra_inputs)); |
| 928 | inputs = GDKzalloc(sizeof(void *) * (input_count + extra_inputs)); |
| 929 | if (!inputs || !input_bats) { |
| 930 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 931 | goto wrapup; |
| 932 | } |
| 933 | } |
| 934 | if (output_count > 0) { |
| 935 | outputs = GDKzalloc(sizeof(void *) * output_count); |
| 936 | if (!outputs) { |
| 937 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 938 | goto wrapup; |
| 939 | } |
| 940 | } |
| 941 | // create the inputs |
| 942 | argnode = sqlfun ? sqlfun->ops->h : NULL; |
| 943 | for (i = pci->retc + ARG_OFFSET; i < (size_t)pci->argc; i++) { |
| 944 | index = i - (pci->retc + ARG_OFFSET); |
| 945 | bat_type = getArgType(mb, pci, i); |
| 946 | if (!isaBatType(bat_type)) { |
| 947 | void* input = NULL; |
| 948 | if (bat_type == TYPE_str) { |
| 949 | input = *getArgReference_str(stk, pci, i); |
| 950 | } else if (bat_type == TYPE_blob) { |
| 951 | input = *(blob**)getArgReference(stk, pci, i); |
| 952 | } else { |
| 953 | input = getArgReference(stk, pci, i); |
| 954 | } |
| 955 | // scalar input |
| 956 | // create a temporary BAT |
| 957 | input_bats[index] = COLnew(0, bat_type, 1, TRANSIENT); |
| 958 | if (!input_bats[index]) { |
| 959 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 960 | goto wrapup; |
| 961 | } |
| 962 | if (BUNappend(input_bats[index], input, |
| 963 | false) != GDK_SUCCEED) { |
| 964 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 965 | goto wrapup; |
| 966 | } |
| 967 | } else { |
| 968 | // deal with BAT input |
| 969 | bat_type = getBatType(getArgType(mb, pci, i)); |
| 970 | input_bats[index] = |
| 971 | BATdescriptor(*getArgReference_bat(stk, pci, i)); |
| 972 | } |
| 973 | |
| 974 | if (bat_type == TYPE_bit) { |
| 975 | GENERATE_BAT_INPUT(input_bats[index], bit); |
| 976 | } else if (bat_type == TYPE_bte) { |
| 977 | GENERATE_BAT_INPUT(input_bats[index], bte); |
| 978 | } else if (bat_type == TYPE_sht) { |
| 979 | GENERATE_BAT_INPUT(input_bats[index], sht); |
| 980 | } else if (bat_type == TYPE_int) { |
| 981 | GENERATE_BAT_INPUT(input_bats[index], int); |
| 982 | } else if (bat_type == TYPE_oid) { |
| 983 | GENERATE_BAT_INPUT(input_bats[index], oid); |
| 984 | } else if (bat_type == TYPE_lng) { |
| 985 | GENERATE_BAT_INPUT(input_bats[index], lng); |
| 986 | } else if (bat_type == TYPE_flt) { |
| 987 | GENERATE_BAT_INPUT(input_bats[index], flt); |
| 988 | } else if (bat_type == TYPE_dbl) { |
| 989 | GENERATE_BAT_INPUT(input_bats[index], dbl); |
| 990 | } else if (bat_type == TYPE_str) { |
| 991 | BATiter li; |
| 992 | BUN p = 0, q = 0; |
| 993 | bool can_mprotect_varheap = false; |
| 994 | str mprotect_retval; |
| 995 | GENERATE_BAT_INPUT_BASE(str); |
| 996 | bat_data->count = BATcount(input_bats[index]); |
| 997 | bat_data->data = bat_data->count == 0 ? NULL : GDKmalloc(sizeof(char *) * bat_data->count); |
| 998 | bat_data->null_value = NULL; |
| 999 | if (bat_data->count > 0 && !bat_data->data) { |
| 1000 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 1001 | goto wrapup; |
| 1002 | } |
| 1003 | j = 0; |
| 1004 | |
| 1005 | // check if we can mprotect the varheap |
| 1006 | // if we can't mprotect, copy the strings instead |
| 1007 | assert(input_bats[index]->tvheap); |
| 1008 | can_mprotect_varheap = can_mprotect_region(input_bats[index]->tvheap->base); |
| 1009 | |
| 1010 | li = bat_iterator(input_bats[index]); |
| 1011 | BATloop(input_bats[index], p, q) |
| 1012 | { |
| 1013 | char *t = (char *)BUNtvar(li, p); |
| 1014 | if (strcmp(t, str_nil) == 0) { |
| 1015 | bat_data->data[j] = NULL; |
| 1016 | } else { |
| 1017 | if (can_mprotect_varheap) { |
| 1018 | bat_data->data[j] = t; |
| 1019 | } else { |
| 1020 | bat_data->data[j] = wrapped_GDK_malloc_nojump(strlen(t) + 1); |
| 1021 | if (!bat_data->data[j]) { |
| 1022 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 1023 | goto wrapup; |
| 1024 | } |
| 1025 | strcpy(bat_data->data[j], t); |
| 1026 | } |
| 1027 | } |
| 1028 | j++; |
| 1029 | } |
| 1030 | if (can_mprotect_varheap) { |
| 1031 | // mprotect the varheap of the BAT to prevent modification of input strings |
| 1032 | mprotect_retval = |
| 1033 | mprotect_region(input_bats[index]->tvheap->base, |
| 1034 | input_bats[index]->tvheap->size, ®ions); |
| 1035 | if (mprotect_retval) { |
| 1036 | msg = createException(MAL, "cudf.eval" , |
| 1037 | "Failed to mprotect region: %s" , |
| 1038 | mprotect_retval); |
| 1039 | goto wrapup; |
| 1040 | } |
| 1041 | } |
| 1042 | } else if (bat_type == TYPE_date) { |
| 1043 | date *baseptr; |
| 1044 | GENERATE_BAT_INPUT_BASE(date); |
| 1045 | bat_data->count = BATcount(input_bats[index]); |
| 1046 | bat_data->data = bat_data->count == 0 ? NULL : |
| 1047 | GDKmalloc(sizeof(bat_data->null_value) * bat_data->count); |
| 1048 | if (bat_data->count > 0 && !bat_data->data) { |
| 1049 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 1050 | goto wrapup; |
| 1051 | } |
| 1052 | |
| 1053 | baseptr = (date *)Tloc(input_bats[index], 0); |
| 1054 | for (j = 0; j < bat_data->count; j++) { |
| 1055 | data_from_date(baseptr[j], bat_data->data + j); |
| 1056 | } |
| 1057 | data_from_date(date_nil, &bat_data->null_value); |
| 1058 | } else if (bat_type == TYPE_daytime) { |
| 1059 | daytime *baseptr; |
| 1060 | GENERATE_BAT_INPUT_BASE(time); |
| 1061 | bat_data->count = BATcount(input_bats[index]); |
| 1062 | bat_data->data = bat_data->count == 0 ? NULL : |
| 1063 | GDKmalloc(sizeof(bat_data->null_value) * bat_data->count); |
| 1064 | if (bat_data->count > 0 && !bat_data->data) { |
| 1065 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 1066 | goto wrapup; |
| 1067 | } |
| 1068 | |
| 1069 | baseptr = (daytime *)Tloc(input_bats[index], 0); |
| 1070 | for (j = 0; j < bat_data->count; j++) { |
| 1071 | data_from_time(baseptr[j], bat_data->data + j); |
| 1072 | } |
| 1073 | data_from_time(daytime_nil, &bat_data->null_value); |
| 1074 | } else if (bat_type == TYPE_timestamp) { |
| 1075 | timestamp *baseptr; |
| 1076 | GENERATE_BAT_INPUT_BASE(timestamp); |
| 1077 | bat_data->count = BATcount(input_bats[index]); |
| 1078 | bat_data->data = bat_data->count == 0 ? NULL : |
| 1079 | GDKmalloc(sizeof(bat_data->null_value) * bat_data->count); |
| 1080 | if (bat_data->count > 0 && !bat_data->data) { |
| 1081 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 1082 | goto wrapup; |
| 1083 | } |
| 1084 | |
| 1085 | baseptr = (timestamp *)Tloc(input_bats[index], 0); |
| 1086 | for (j = 0; j < bat_data->count; j++) { |
| 1087 | data_from_timestamp(baseptr[j], bat_data->data + j); |
| 1088 | } |
| 1089 | data_from_timestamp(timestamp_nil, &bat_data->null_value); |
| 1090 | } else if (bat_type == TYPE_blob) { |
| 1091 | BATiter li; |
| 1092 | BUN p = 0, q = 0; |
| 1093 | str mprotect_retval; |
| 1094 | bool can_mprotect_varheap = false; |
| 1095 | GENERATE_BAT_INPUT_BASE(blob); |
| 1096 | bat_data->count = BATcount(input_bats[index]); |
| 1097 | bat_data->data = bat_data->count == 0 ? NULL : |
| 1098 | GDKmalloc(sizeof(cudf_data_blob) * bat_data->count); |
| 1099 | if (bat_data->count > 0 && !bat_data->data) { |
| 1100 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 1101 | goto wrapup; |
| 1102 | } |
| 1103 | j = 0; |
| 1104 | |
| 1105 | // check if we can mprotect the varheap |
| 1106 | // if we can't mprotect, copy the strings instead |
| 1107 | assert(input_bats[index]->tvheap); |
| 1108 | can_mprotect_varheap = can_mprotect_region(input_bats[index]->tvheap->base); |
| 1109 | |
| 1110 | li = bat_iterator(input_bats[index]); |
| 1111 | BATloop(input_bats[index], p, q) |
| 1112 | { |
| 1113 | blob *t = (blob *)BUNtvar(li, p); |
| 1114 | if (t->nitems == ~(size_t)0) { |
| 1115 | bat_data->data[j].size = ~(size_t) 0; |
| 1116 | bat_data->data[j].data = NULL; |
| 1117 | } else { |
| 1118 | bat_data->data[j].size = t->nitems; |
| 1119 | if (can_mprotect_varheap) { |
| 1120 | bat_data->data[j].data = &t->data[0]; |
| 1121 | } else { |
| 1122 | bat_data->data[j].data = t->nitems == 0 ? NULL : |
| 1123 | wrapped_GDK_malloc_nojump(t->nitems); |
| 1124 | if (t->nitems > 0 && !bat_data->data[j].data) { |
| 1125 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 1126 | goto wrapup; |
| 1127 | } |
| 1128 | memcpy(bat_data->data[j].data, &t->data[0], t->nitems); |
| 1129 | } |
| 1130 | } |
| 1131 | j++; |
| 1132 | } |
| 1133 | bat_data->null_value.size = ~(size_t) 0; |
| 1134 | bat_data->null_value.data = NULL; |
| 1135 | if (can_mprotect_varheap) { |
| 1136 | // for blob columns, mprotect the varheap of the BAT |
| 1137 | mprotect_retval = |
| 1138 | mprotect_region(input_bats[index]->tvheap->base, |
| 1139 | input_bats[index]->tvheap->size, ®ions); |
| 1140 | if (mprotect_retval) { |
| 1141 | msg = createException(MAL, "cudf.eval" , |
| 1142 | "Failed to mprotect region: %s" , |
| 1143 | mprotect_retval); |
| 1144 | goto wrapup; |
| 1145 | } |
| 1146 | } |
| 1147 | } else { |
| 1148 | // unsupported type: convert to string |
| 1149 | BATiter li; |
| 1150 | BUN p = 0, q = 0; |
| 1151 | GENERATE_BAT_INPUT_BASE(str); |
| 1152 | bat_data->count = BATcount(input_bats[index]); |
| 1153 | bat_data->null_value = NULL; |
| 1154 | bat_data->data = bat_data->count == 0 ? NULL : |
| 1155 | GDKzalloc(sizeof(char *) * bat_data->count); |
| 1156 | if (bat_data->count > 0 && !bat_data->data) { |
| 1157 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 1158 | goto wrapup; |
| 1159 | } |
| 1160 | j = 0; |
| 1161 | |
| 1162 | li = bat_iterator(input_bats[index]); |
| 1163 | BATloop(input_bats[index], p, q) |
| 1164 | { |
| 1165 | void *t = BUNtail(li, p); |
| 1166 | if (BATatoms[bat_type].atomCmp( |
| 1167 | t, BATatoms[bat_type].atomNull) == 0) { |
| 1168 | bat_data->data[j] = NULL; |
| 1169 | } else { |
| 1170 | char *result = NULL; |
| 1171 | size_t length = 0; |
| 1172 | if (BATatoms[bat_type].atomToStr(&result, &length, t, false) == |
| 1173 | 0) { |
| 1174 | msg = createException( |
| 1175 | MAL, "cudf.eval" , |
| 1176 | "Failed to convert element to string" ); |
| 1177 | goto wrapup; |
| 1178 | } |
| 1179 | bat_data->data[j] = result; |
| 1180 | } |
| 1181 | j++; |
| 1182 | } |
| 1183 | } |
| 1184 | input_size = BATcount(input_bats[index]) > input_size |
| 1185 | ? BATcount(input_bats[index]) |
| 1186 | : input_size; |
| 1187 | argnode = argnode ? argnode->next : NULL; |
| 1188 | } |
| 1189 | |
| 1190 | index = input_count; |
| 1191 | if (non_grouped_aggregate) { |
| 1192 | GENERATE_BAT_INPUT_BASE(oid); |
| 1193 | bat_data->count = input_size; |
| 1194 | bat_data->null_value = oid_nil; |
| 1195 | bat_data->data = |
| 1196 | wrapped_GDK_zalloc_nojump(bat_data->count * sizeof(bat_data->null_value)); |
| 1197 | if (!bat_data->data) { |
| 1198 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 1199 | goto wrapup; |
| 1200 | } |
| 1201 | } |
| 1202 | |
| 1203 | argnode = sqlfun ? sqlfun->res->h : NULL; |
| 1204 | // output types |
| 1205 | for (i = 0; i < output_count; i++) { |
| 1206 | index = i; |
| 1207 | bat_type = getBatType(getArgType(mb, pci, i)); |
| 1208 | if (bat_type == TYPE_bit) { |
| 1209 | GENERATE_BAT_OUTPUT(bit); |
| 1210 | } else if (bat_type == TYPE_bte) { |
| 1211 | GENERATE_BAT_OUTPUT(bte); |
| 1212 | } else if (bat_type == TYPE_sht) { |
| 1213 | GENERATE_BAT_OUTPUT(sht); |
| 1214 | } else if (bat_type == TYPE_int) { |
| 1215 | GENERATE_BAT_OUTPUT(int); |
| 1216 | } else if (bat_type == TYPE_oid) { |
| 1217 | GENERATE_BAT_OUTPUT(oid); |
| 1218 | } else if (bat_type == TYPE_lng) { |
| 1219 | GENERATE_BAT_OUTPUT(lng); |
| 1220 | } else if (bat_type == TYPE_flt) { |
| 1221 | GENERATE_BAT_OUTPUT(flt); |
| 1222 | } else if (bat_type == TYPE_dbl) { |
| 1223 | GENERATE_BAT_OUTPUT(dbl); |
| 1224 | } else if (bat_type == TYPE_str) { |
| 1225 | GENERATE_BAT_OUTPUT_BASE(str); |
| 1226 | bat_data->null_value = NULL; |
| 1227 | } else if (bat_type == TYPE_date) { |
| 1228 | GENERATE_BAT_OUTPUT_BASE(date); |
| 1229 | data_from_date(date_nil, &bat_data->null_value); |
| 1230 | } else if (bat_type == TYPE_daytime) { |
| 1231 | GENERATE_BAT_OUTPUT_BASE(time); |
| 1232 | data_from_time(daytime_nil, &bat_data->null_value); |
| 1233 | } else if (bat_type == TYPE_timestamp) { |
| 1234 | GENERATE_BAT_OUTPUT_BASE(timestamp); |
| 1235 | data_from_timestamp(timestamp_nil, &bat_data->null_value); |
| 1236 | } else if (bat_type == TYPE_blob) { |
| 1237 | GENERATE_BAT_OUTPUT_BASE(blob); |
| 1238 | bat_data->null_value.size = ~(size_t) 0; |
| 1239 | bat_data->null_value.data = NULL; |
| 1240 | } else { |
| 1241 | // unsupported type, convert from string output |
| 1242 | GENERATE_BAT_OUTPUT_BASE(str); |
| 1243 | bat_data->null_value = NULL; |
| 1244 | } |
| 1245 | argnode = argnode ? argnode->next : NULL; |
| 1246 | } |
| 1247 | |
| 1248 | // set up a longjmp point |
| 1249 | // this longjmp point is used for some error handling in the C function |
| 1250 | // such as failed mallocs |
| 1251 | if (option_enable_longjmp) { |
| 1252 | ret = setjmp(jump_buffer[tid]); |
| 1253 | if (ret < 0) { |
| 1254 | // error value |
| 1255 | msg = createException(MAL, "cudf.eval" , "Failed setjmp: %s" , |
| 1256 | strerror(errno)); |
| 1257 | errno = 0; |
| 1258 | goto wrapup; |
| 1259 | } else if (ret > 0) { |
| 1260 | if (ret == 1) { |
| 1261 | msg = createException(MAL, "cudf.eval" , "Attempting to write to " |
| 1262 | "the input or triggered a " |
| 1263 | "segfault/bus error" ); |
| 1264 | } else if (ret == 2) { |
| 1265 | msg = createException(MAL, "cudf.eval" , |
| 1266 | "Malloc failure in internal function!" ); |
| 1267 | } else { |
| 1268 | // we jumped here |
| 1269 | msg = createException(MAL, "cudf.eval" , "We longjumped here " |
| 1270 | "because of an error, but " |
| 1271 | "we don't know which!" ); |
| 1272 | } |
| 1273 | goto wrapup; |
| 1274 | } |
| 1275 | } |
| 1276 | |
| 1277 | // set up the signal handler for catching segfaults |
| 1278 | if (option_enable_mprotect) { |
| 1279 | sa = (struct sigaction) { |
| 1280 | .sa_flags = SA_SIGINFO, |
| 1281 | .sa_sigaction = handler, |
| 1282 | }; |
| 1283 | (void) sigfillset(&sa.sa_mask); |
| 1284 | if (sigaction(SIGSEGV, &sa, &oldsa) == -1 || |
| 1285 | sigaction(SIGBUS, &sa, &oldsb) == -1) { |
| 1286 | msg = createException(MAL, "cudf.eval" , |
| 1287 | "Failed to set signal handler: %s" , |
| 1288 | strerror(errno)); |
| 1289 | errno = 0; |
| 1290 | goto wrapup; |
| 1291 | } |
| 1292 | // actually mprotect the regions now that the signal handlers are set |
| 1293 | region_iter = regions; |
| 1294 | while (region_iter) { |
| 1295 | if (mprotect(region_iter->addr, region_iter->len, PROT_READ) < 0) { |
| 1296 | goto wrapup; |
| 1297 | } |
| 1298 | region_iter = region_iter->next; |
| 1299 | } |
| 1300 | } |
| 1301 | // call the actual jitted function |
| 1302 | msg = func(inputs, outputs, wrapped_GDK_malloc, wrapped_GDK_free); |
| 1303 | |
| 1304 | |
| 1305 | if (option_enable_mprotect) { |
| 1306 | // clear any mprotected regions |
| 1307 | while (regions) { |
| 1308 | mprotected_region *next = regions->next; |
| 1309 | clear_mprotect(regions->addr, regions->len); |
| 1310 | GDKfree(regions); |
| 1311 | regions = next; |
| 1312 | } |
| 1313 | // clear the signal handlers |
| 1314 | if (sigaction(SIGSEGV, &oldsa, NULL) == -1 || |
| 1315 | sigaction(SIGBUS, &oldsb, NULL) == -1) { |
| 1316 | msg = createException(MAL, "cudf.eval" , |
| 1317 | "Failed to unset signal handler: %s" , |
| 1318 | strerror(errno)); |
| 1319 | errno = 0; |
| 1320 | goto wrapup; |
| 1321 | } |
| 1322 | sa = (struct sigaction) {.sa_flags = 0,}; |
| 1323 | } |
| 1324 | |
| 1325 | if (msg) { |
| 1326 | // failure in function |
| 1327 | msg = createException(MAL, "cudf.eval" , "%s" , msg); |
| 1328 | goto wrapup; |
| 1329 | } |
| 1330 | |
| 1331 | // create the output bats from the returned results |
| 1332 | for (i = 0; i < (size_t)pci->retc; i++) { |
| 1333 | size_t count; |
| 1334 | void *data; |
| 1335 | BAT *b; |
| 1336 | bat_type = getBatType(getArgType(mb, pci, i)); |
| 1337 | |
| 1338 | if (!outputs[i]) { |
| 1339 | msg = createException(MAL, "cudf.eval" , "No data returned." ); |
| 1340 | goto wrapup; |
| 1341 | } |
| 1342 | count = GetTypeCount(bat_type, outputs[i]); |
| 1343 | data = GetTypeData(bat_type, outputs[i]); |
| 1344 | if (!data) { |
| 1345 | msg = createException(MAL, "cudf.eval" , "No data returned." ); |
| 1346 | goto wrapup; |
| 1347 | } |
| 1348 | if (initial_output_count < 0) { |
| 1349 | initial_output_count = count; |
| 1350 | } else if ((size_t)initial_output_count != count) { |
| 1351 | msg = createException(MAL, "cudf.eval" , |
| 1352 | "Data has different cardinalities." ); |
| 1353 | goto wrapup; |
| 1354 | } |
| 1355 | if (bat_type == TYPE_bit || bat_type == TYPE_bte || |
| 1356 | bat_type == TYPE_sht || bat_type == TYPE_int || |
| 1357 | bat_type == TYPE_oid || bat_type == TYPE_lng || |
| 1358 | bat_type == TYPE_flt || bat_type == TYPE_dbl) { |
| 1359 | b = GetTypeBat(bat_type, outputs[i]); |
| 1360 | if (!b) { |
| 1361 | msg = createException(MAL, "cudf.eval" , "Output column was not properly initialized." ); |
| 1362 | goto wrapup; |
| 1363 | } |
| 1364 | } else { |
| 1365 | assert(GetTypeBat(bat_type, outputs[i]) == NULL); |
| 1366 | b = COLnew(0, bat_type, count, TRANSIENT); |
| 1367 | if (!b) { |
| 1368 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 1369 | goto wrapup; |
| 1370 | } |
| 1371 | if (bat_type == TYPE_date) { |
| 1372 | date *baseptr = (date *)Tloc(b, 0); |
| 1373 | cudf_data_date *source_base = (cudf_data_date *)data; |
| 1374 | for (j = 0; j < count; j++) { |
| 1375 | baseptr[j] = date_from_data(source_base + j); |
| 1376 | } |
| 1377 | BATsetcount(b, count); |
| 1378 | GDKfree(data); |
| 1379 | } else if (bat_type == TYPE_daytime) { |
| 1380 | daytime *baseptr = (daytime *)Tloc(b, 0); |
| 1381 | cudf_data_time *source_base = (cudf_data_time *)data; |
| 1382 | for (j = 0; j < count; j++) { |
| 1383 | baseptr[j] = time_from_data(source_base + j); |
| 1384 | } |
| 1385 | BATsetcount(b, count); |
| 1386 | GDKfree(data); |
| 1387 | } else if (bat_type == TYPE_timestamp) { |
| 1388 | timestamp *baseptr = (timestamp *)Tloc(b, 0); |
| 1389 | cudf_data_timestamp *source_base = (cudf_data_timestamp *)data; |
| 1390 | for (j = 0; j < count; j++) { |
| 1391 | baseptr[j] = timestamp_from_data(source_base + j); |
| 1392 | } |
| 1393 | BATsetcount(b, count); |
| 1394 | GDKfree(data); |
| 1395 | } else if (bat_type == TYPE_str) { |
| 1396 | char **source_base = (char **)data; |
| 1397 | for (j = 0; j < count; j++) { |
| 1398 | const char *ptr = source_base[j]; |
| 1399 | if (!ptr) { |
| 1400 | ptr = str_nil; |
| 1401 | } |
| 1402 | if (BUNappend(b, ptr, false) != GDK_SUCCEED) { |
| 1403 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 1404 | goto wrapup; |
| 1405 | } |
| 1406 | } |
| 1407 | GDKfree(data); |
| 1408 | } else if (bat_type == TYPE_blob) { |
| 1409 | cudf_data_blob *source_base = (cudf_data_blob *)data; |
| 1410 | blob *current_blob = NULL; |
| 1411 | size_t current_blob_maxsize = 0; |
| 1412 | for (j = 0; j < count; j++) { |
| 1413 | const cudf_data_blob blob = source_base[j]; |
| 1414 | |
| 1415 | if (blob.size == ~(size_t) 0) { |
| 1416 | current_blob->nitems = ~(size_t)0; |
| 1417 | } else { |
| 1418 | if (!current_blob || current_blob_maxsize < blob.size) { |
| 1419 | if (current_blob) { |
| 1420 | GDKfree(current_blob); |
| 1421 | } |
| 1422 | current_blob_maxsize = blob.size; |
| 1423 | current_blob = GDKmalloc(sizeof(size_t) + blob.size); |
| 1424 | if (!current_blob) { |
| 1425 | msg = |
| 1426 | createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 1427 | goto wrapup; |
| 1428 | } |
| 1429 | } |
| 1430 | |
| 1431 | current_blob->nitems = blob.size; |
| 1432 | memcpy(¤t_blob->data[0], blob.data, blob.size); |
| 1433 | } |
| 1434 | |
| 1435 | if (BUNappend(b, current_blob, false) != GDK_SUCCEED) { |
| 1436 | if (current_blob) { |
| 1437 | GDKfree(current_blob); |
| 1438 | } |
| 1439 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 1440 | goto wrapup; |
| 1441 | } |
| 1442 | } |
| 1443 | if (current_blob) { |
| 1444 | GDKfree(current_blob); |
| 1445 | } |
| 1446 | GDKfree(data); |
| 1447 | } else { |
| 1448 | char **source_base = (char **)data; |
| 1449 | size_t len = 0; |
| 1450 | void *element = NULL; |
| 1451 | for (j = 0; j < count; j++) { |
| 1452 | const char *ptr = source_base[j]; |
| 1453 | const void *appended_element; |
| 1454 | if (!ptr || strcmp(ptr, str_nil) == 0) { |
| 1455 | appended_element = (void *)BATatoms[bat_type].atomNull; |
| 1456 | } else { |
| 1457 | if (BATatoms[bat_type].atomFromStr(ptr, &len, &element, false) == |
| 1458 | 0) { |
| 1459 | msg = createException(MAL, "cudf.eval" , |
| 1460 | "Failed to convert output " |
| 1461 | "element from string: %s" , |
| 1462 | ptr); |
| 1463 | goto wrapup; |
| 1464 | } |
| 1465 | appended_element = element; |
| 1466 | } |
| 1467 | if (BUNappend(b, appended_element, false) != GDK_SUCCEED) { |
| 1468 | if (element) { |
| 1469 | GDKfree(element); |
| 1470 | } |
| 1471 | msg = createException(MAL, "cudf.eval" , MAL_MALLOC_FAIL); |
| 1472 | goto wrapup; |
| 1473 | } |
| 1474 | } |
| 1475 | if (element) { |
| 1476 | GDKfree(element); |
| 1477 | } |
| 1478 | GDKfree(data); |
| 1479 | } |
| 1480 | } |
| 1481 | b->tnil = false; |
| 1482 | b->tnonil = false; |
| 1483 | b->tkey = false; |
| 1484 | b->tsorted = false; |
| 1485 | b->trevsorted = false; |
| 1486 | |
| 1487 | // free the output value right now to prevent the internal data from |
| 1488 | // being freed later |
| 1489 | // as the internal data is now part of the bat we just created |
| 1490 | GDKfree(outputs[i]); |
| 1491 | outputs[i] = NULL; |
| 1492 | |
| 1493 | // return the BAT from the function |
| 1494 | if (isaBatType(getArgType(mb, pci, i))) { |
| 1495 | *getArgReference_bat(stk, pci, i) = b->batCacheid; |
| 1496 | BBPkeepref(b->batCacheid); |
| 1497 | } else { |
| 1498 | BATiter li = bat_iterator(b); |
| 1499 | if (VALinit(&stk->stk[pci->argv[i]], bat_type, |
| 1500 | BUNtail(li, 0)) == NULL) { |
| 1501 | msg = createException(MAL, "cudf.eval" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
| 1502 | } |
| 1503 | BBPunfix(b->batCacheid); |
| 1504 | } |
| 1505 | } |
| 1506 | |
| 1507 | wrapup: |
| 1508 | // cleanup |
| 1509 | // remove the signal handler, if any was set |
| 1510 | if (option_enable_mprotect) { |
| 1511 | if (sa.sa_sigaction) { |
| 1512 | (void) sigaction(SIGSEGV, &oldsa, NULL); |
| 1513 | (void) sigaction(SIGBUS, &oldsb, NULL); |
| 1514 | |
| 1515 | sa = (struct sigaction) {.sa_flags = 0,}; |
| 1516 | } |
| 1517 | // clear any mprotected regions |
| 1518 | while (regions) { |
| 1519 | mprotected_region *next = regions->next; |
| 1520 | clear_mprotect(regions->addr, regions->len); |
| 1521 | GDKfree(regions); |
| 1522 | regions = next; |
| 1523 | } |
| 1524 | } |
| 1525 | while (allocated_regions[tid]) { |
| 1526 | allocated_region *next = allocated_regions[tid]->next; |
| 1527 | GDKfree(allocated_regions[tid]); |
| 1528 | allocated_regions[tid] = next; |
| 1529 | } |
| 1530 | if (option_enable_mprotect) { |
| 1531 | // block segfaults and bus errors again after we exit |
| 1532 | (void)pthread_sigmask(SIG_BLOCK, &signal_set, NULL); |
| 1533 | } |
| 1534 | // argument names (input) |
| 1535 | if (args) { |
| 1536 | for (i = 0; i < (size_t)pci->argc; i++) { |
| 1537 | if (args[i]) { |
| 1538 | GDKfree(args[i]); |
| 1539 | } |
| 1540 | } |
| 1541 | GDKfree(args); |
| 1542 | } |
| 1543 | // output names |
| 1544 | if (output_names) { |
| 1545 | for (i = 0; i < (size_t)pci->retc; i++) { |
| 1546 | if (output_names[i]) { |
| 1547 | GDKfree(output_names[i]); |
| 1548 | } |
| 1549 | } |
| 1550 | GDKfree(output_names); |
| 1551 | } |
| 1552 | if (input_bats) { |
| 1553 | for(i = 0; i < input_count + extra_inputs; i++) { |
| 1554 | if (input_bats[i]) { |
| 1555 | BBPunfix(input_bats[i]->batCacheid); |
| 1556 | } |
| 1557 | } |
| 1558 | GDKfree(input_bats); |
| 1559 | } |
| 1560 | // input data |
| 1561 | if (inputs) { |
| 1562 | for (i = 0; i < (size_t)input_count + extra_inputs; i++) { |
| 1563 | if (inputs[i]) { |
| 1564 | if (isaBatType(getArgType(mb, pci, i))) { |
| 1565 | bat_type = getBatType(getArgType(mb, pci, i)); |
| 1566 | } |
| 1567 | if (bat_type == TYPE_str || bat_type == TYPE_date || |
| 1568 | bat_type == TYPE_daytime || |
| 1569 | bat_type == TYPE_timestamp || bat_type == TYPE_blob) { |
| 1570 | // have to free input data |
| 1571 | void *data = GetTypeData(bat_type, inputs[i]); |
| 1572 | if (data) { |
| 1573 | GDKfree(data); |
| 1574 | } |
| 1575 | } else if (bat_type > TYPE_str) { |
| 1576 | // this type was converted to individually malloced |
| 1577 | // strings |
| 1578 | // we have to free all the individual strings |
| 1579 | char **data = (char **)GetTypeData(bat_type, inputs[i]); |
| 1580 | size_t count = GetTypeCount(bat_type, inputs[i]); |
| 1581 | for (j = 0; j < count; j++) { |
| 1582 | if (data[j]) { |
| 1583 | GDKfree(data[j]); |
| 1584 | } |
| 1585 | } |
| 1586 | if (data) { |
| 1587 | GDKfree(data); |
| 1588 | } |
| 1589 | } |
| 1590 | GDKfree(inputs[i]); |
| 1591 | } |
| 1592 | } |
| 1593 | GDKfree(inputs); |
| 1594 | } |
| 1595 | // output data |
| 1596 | if (outputs) { |
| 1597 | for (i = 0; i < (size_t)output_count; i++) { |
| 1598 | bat_type = isaBatType(getArgType(mb, pci, i)) |
| 1599 | ? getBatType(getArgType(mb, pci, i)) |
| 1600 | : getArgType(mb, pci, i); |
| 1601 | if (outputs[i]) { |
| 1602 | void* b = GetTypeBat(bat_type, outputs[i]); |
| 1603 | if (b) { |
| 1604 | BBPunfix(((BAT*)b)->batCacheid); |
| 1605 | } else { |
| 1606 | void *data = GetTypeData(bat_type, outputs[i]); |
| 1607 | if (data) { |
| 1608 | GDKfree(data); |
| 1609 | } |
| 1610 | } |
| 1611 | GDKfree(outputs[i]); |
| 1612 | } |
| 1613 | } |
| 1614 | GDKfree(outputs); |
| 1615 | } |
| 1616 | if (function_parameters) { |
| 1617 | GDKfree(function_parameters); |
| 1618 | } |
| 1619 | // close the file handle |
| 1620 | if (f) { |
| 1621 | fclose(f); |
| 1622 | } |
| 1623 | // close the dll |
| 1624 | if (handle) { |
| 1625 | dlclose(handle); |
| 1626 | } |
| 1627 | // close the compiler stream |
| 1628 | if (compiler) { |
| 1629 | pclose(compiler); |
| 1630 | } |
| 1631 | if (extra_cflags) { |
| 1632 | GDKfree(extra_cflags); |
| 1633 | } |
| 1634 | if (extra_ldflags) { |
| 1635 | GDKfree(extra_ldflags); |
| 1636 | } |
| 1637 | return msg; |
| 1638 | } |
| 1639 | |
| 1640 | static const char *GetTypeName(int type) |
| 1641 | { |
| 1642 | const char *tpe = NULL; |
| 1643 | if (type == TYPE_bit || type == TYPE_bte) { |
| 1644 | tpe = "bte" ; |
| 1645 | } else if (type == TYPE_sht) { |
| 1646 | tpe = "sht" ; |
| 1647 | } else if (type == TYPE_int) { |
| 1648 | tpe = "int" ; |
| 1649 | } else if (type == TYPE_oid) { |
| 1650 | tpe = "oid" ; |
| 1651 | } else if (type == TYPE_lng) { |
| 1652 | tpe = "lng" ; |
| 1653 | } else if (type == TYPE_flt) { |
| 1654 | tpe = "flt" ; |
| 1655 | } else if (type == TYPE_dbl) { |
| 1656 | tpe = "dbl" ; |
| 1657 | } else if (type == TYPE_str) { |
| 1658 | tpe = "str" ; |
| 1659 | } else if (type == TYPE_date) { |
| 1660 | tpe = "date" ; |
| 1661 | } else if (type == TYPE_daytime) { |
| 1662 | tpe = "time" ; |
| 1663 | } else if (type == TYPE_timestamp) { |
| 1664 | tpe = "timestamp" ; |
| 1665 | } else if (type == TYPE_blob) { |
| 1666 | tpe = "blob" ; |
| 1667 | } else { |
| 1668 | // unsupported type: string |
| 1669 | tpe = "str" ; |
| 1670 | } |
| 1671 | return tpe; |
| 1672 | } |
| 1673 | |
| 1674 | void *GetTypeData(int type, void *struct_ptr) |
| 1675 | { |
| 1676 | void *data = NULL; |
| 1677 | |
| 1678 | if (type == TYPE_bit || type == TYPE_bte) { |
| 1679 | data = ((struct cudf_data_struct_bte *)struct_ptr)->data; |
| 1680 | } else if (type == TYPE_sht) { |
| 1681 | data = ((struct cudf_data_struct_sht *)struct_ptr)->data; |
| 1682 | } else if (type == TYPE_int) { |
| 1683 | data = ((struct cudf_data_struct_int *)struct_ptr)->data; |
| 1684 | } else if (type == TYPE_oid) { |
| 1685 | data = ((struct cudf_data_struct_oid *)struct_ptr)->data; |
| 1686 | } else if (type == TYPE_lng) { |
| 1687 | data = ((struct cudf_data_struct_lng *)struct_ptr)->data; |
| 1688 | } else if (type == TYPE_flt) { |
| 1689 | data = ((struct cudf_data_struct_flt *)struct_ptr)->data; |
| 1690 | } else if (type == TYPE_dbl) { |
| 1691 | data = ((struct cudf_data_struct_dbl *)struct_ptr)->data; |
| 1692 | } else if (type == TYPE_str) { |
| 1693 | data = ((struct cudf_data_struct_str *)struct_ptr)->data; |
| 1694 | } else if (type == TYPE_date) { |
| 1695 | data = ((struct cudf_data_struct_date *)struct_ptr)->data; |
| 1696 | } else if (type == TYPE_daytime) { |
| 1697 | data = ((struct cudf_data_struct_time *)struct_ptr)->data; |
| 1698 | } else if (type == TYPE_timestamp) { |
| 1699 | data = ((struct cudf_data_struct_timestamp *)struct_ptr)->data; |
| 1700 | } else if (type == TYPE_blob) { |
| 1701 | data = ((struct cudf_data_struct_blob *)struct_ptr)->data; |
| 1702 | } else { |
| 1703 | // unsupported type: string |
| 1704 | data = ((struct cudf_data_struct_str *)struct_ptr)->data; |
| 1705 | } |
| 1706 | return data; |
| 1707 | } |
| 1708 | |
| 1709 | void *GetTypeBat(int type, void *struct_ptr) |
| 1710 | { |
| 1711 | void *bat = NULL; |
| 1712 | |
| 1713 | if (type == TYPE_bit || type == TYPE_bte) { |
| 1714 | bat = ((struct cudf_data_struct_bte *)struct_ptr)->bat; |
| 1715 | } else if (type == TYPE_sht) { |
| 1716 | bat = ((struct cudf_data_struct_sht *)struct_ptr)->bat; |
| 1717 | } else if (type == TYPE_int) { |
| 1718 | bat = ((struct cudf_data_struct_int *)struct_ptr)->bat; |
| 1719 | } else if (type == TYPE_oid) { |
| 1720 | bat = ((struct cudf_data_struct_oid *)struct_ptr)->bat; |
| 1721 | } else if (type == TYPE_lng) { |
| 1722 | bat = ((struct cudf_data_struct_lng *)struct_ptr)->bat; |
| 1723 | } else if (type == TYPE_flt) { |
| 1724 | bat = ((struct cudf_data_struct_flt *)struct_ptr)->bat; |
| 1725 | } else if (type == TYPE_dbl) { |
| 1726 | bat = ((struct cudf_data_struct_dbl *)struct_ptr)->bat; |
| 1727 | } else if (type == TYPE_str) { |
| 1728 | bat = ((struct cudf_data_struct_str *)struct_ptr)->bat; |
| 1729 | } else if (type == TYPE_date) { |
| 1730 | bat = ((struct cudf_data_struct_date *)struct_ptr)->bat; |
| 1731 | } else if (type == TYPE_daytime) { |
| 1732 | bat = ((struct cudf_data_struct_time *)struct_ptr)->bat; |
| 1733 | } else if (type == TYPE_timestamp) { |
| 1734 | bat = ((struct cudf_data_struct_timestamp *)struct_ptr)->bat; |
| 1735 | } else if (type == TYPE_blob) { |
| 1736 | bat = ((struct cudf_data_struct_blob *)struct_ptr)->bat; |
| 1737 | } else { |
| 1738 | // unsupported type: string |
| 1739 | bat = ((struct cudf_data_struct_str *)struct_ptr)->bat; |
| 1740 | } |
| 1741 | return bat; |
| 1742 | } |
| 1743 | |
| 1744 | size_t GetTypeCount(int type, void *struct_ptr) |
| 1745 | { |
| 1746 | size_t count = 0; |
| 1747 | if (type == TYPE_bit || type == TYPE_bte) { |
| 1748 | count = ((struct cudf_data_struct_bte *)struct_ptr)->count; |
| 1749 | } else if (type == TYPE_sht) { |
| 1750 | count = ((struct cudf_data_struct_sht *)struct_ptr)->count; |
| 1751 | } else if (type == TYPE_int) { |
| 1752 | count = ((struct cudf_data_struct_int *)struct_ptr)->count; |
| 1753 | } else if (type == TYPE_oid) { |
| 1754 | count = ((struct cudf_data_struct_oid *)struct_ptr)->count; |
| 1755 | } else if (type == TYPE_lng) { |
| 1756 | count = ((struct cudf_data_struct_lng *)struct_ptr)->count; |
| 1757 | } else if (type == TYPE_flt) { |
| 1758 | count = ((struct cudf_data_struct_flt *)struct_ptr)->count; |
| 1759 | } else if (type == TYPE_dbl) { |
| 1760 | count = ((struct cudf_data_struct_dbl *)struct_ptr)->count; |
| 1761 | } else if (type == TYPE_str) { |
| 1762 | count = ((struct cudf_data_struct_str *)struct_ptr)->count; |
| 1763 | } else if (type == TYPE_date) { |
| 1764 | count = ((struct cudf_data_struct_date *)struct_ptr)->count; |
| 1765 | } else if (type == TYPE_daytime) { |
| 1766 | count = ((struct cudf_data_struct_time *)struct_ptr)->count; |
| 1767 | } else if (type == TYPE_timestamp) { |
| 1768 | count = ((struct cudf_data_struct_timestamp *)struct_ptr)->count; |
| 1769 | } else if (type == TYPE_blob) { |
| 1770 | count = ((struct cudf_data_struct_blob *)struct_ptr)->count; |
| 1771 | } else { |
| 1772 | // unsupported type: string |
| 1773 | count = ((struct cudf_data_struct_str *)struct_ptr)->count; |
| 1774 | } |
| 1775 | return count; |
| 1776 | } |
| 1777 | |
| 1778 | void data_from_date(date d, cudf_data_date *ptr) |
| 1779 | { |
| 1780 | ptr->day = date_day(d); |
| 1781 | ptr->month = date_month(d); |
| 1782 | ptr->year = date_year(d); |
| 1783 | } |
| 1784 | |
| 1785 | date date_from_data(cudf_data_date *ptr) |
| 1786 | { |
| 1787 | return date_create(ptr->year, ptr->month, ptr->day); |
| 1788 | } |
| 1789 | |
| 1790 | void data_from_time(daytime d, cudf_data_time *ptr) |
| 1791 | { |
| 1792 | ptr->hours = daytime_hour(d); |
| 1793 | ptr->minutes = daytime_min(d); |
| 1794 | ptr->seconds = daytime_sec(d); |
| 1795 | ptr->ms = daytime_usec(d) / 1000; |
| 1796 | } |
| 1797 | |
| 1798 | daytime time_from_data(cudf_data_time *ptr) |
| 1799 | { |
| 1800 | return daytime_create(ptr->hours, ptr->minutes, ptr->seconds, |
| 1801 | ptr->ms * 1000); |
| 1802 | } |
| 1803 | |
| 1804 | void data_from_timestamp(timestamp d, cudf_data_timestamp *ptr) |
| 1805 | { |
| 1806 | daytime tm = timestamp_daytime(d); |
| 1807 | date dt = timestamp_date(d); |
| 1808 | |
| 1809 | ptr->date.day = date_day(dt); |
| 1810 | ptr->date.month = date_month(dt); |
| 1811 | ptr->date.year = date_year(dt); |
| 1812 | ptr->time.hours = daytime_hour(tm); |
| 1813 | ptr->time.minutes = daytime_min(tm); |
| 1814 | ptr->time.seconds = daytime_sec(tm); |
| 1815 | ptr->time.ms = daytime_usec(tm) / 1000; |
| 1816 | } |
| 1817 | |
| 1818 | timestamp timestamp_from_data(cudf_data_timestamp *ptr) |
| 1819 | { |
| 1820 | return timestamp_create(date_create(ptr->date.year, |
| 1821 | ptr->date.month, |
| 1822 | ptr->date.day), |
| 1823 | daytime_create(ptr->time.hours, |
| 1824 | ptr->time.minutes, |
| 1825 | ptr->time.seconds, |
| 1826 | ptr->time.ms * 1000)); |
| 1827 | } |
| 1828 | |
| 1829 | int date_is_null(cudf_data_date value) |
| 1830 | { |
| 1831 | cudf_data_date null_value; |
| 1832 | data_from_date(date_nil, &null_value); |
| 1833 | return value.year == null_value.year && value.month == null_value.month && |
| 1834 | value.day == null_value.day; |
| 1835 | } |
| 1836 | |
| 1837 | int time_is_null(cudf_data_time value) |
| 1838 | { |
| 1839 | cudf_data_time null_value; |
| 1840 | data_from_time(daytime_nil, &null_value); |
| 1841 | return value.hours == null_value.hours && |
| 1842 | value.minutes == null_value.minutes && |
| 1843 | value.seconds == null_value.seconds && value.ms == null_value.ms; |
| 1844 | } |
| 1845 | |
| 1846 | int timestamp_is_null(cudf_data_timestamp value) |
| 1847 | { |
| 1848 | return is_timestamp_nil(timestamp_from_data(&value)); |
| 1849 | } |
| 1850 | |
| 1851 | int str_is_null(char *value) { return value == NULL; } |
| 1852 | |
| 1853 | int blob_is_null(cudf_data_blob value) { return value.size == ~(size_t) 0; } |
| 1854 | |
| 1855 | void blob_initialize(struct cudf_data_struct_blob *self, |
| 1856 | size_t count) { |
| 1857 | self->count = count; |
| 1858 | self->data = jump_GDK_malloc(count * sizeof(self->null_value)); |
| 1859 | memset(self->data, 0, count * sizeof(self->null_value)); |
| 1860 | } |
| 1861 | |