| 1 | #include <stdexcept> |
| 2 | #include <string> |
| 3 | #include <thread> |
| 4 | #include <mutex> |
| 5 | #include "pg_functions.hpp" |
| 6 | #include "parser/parser.hpp" |
| 7 | #include <stdarg.h> |
| 8 | #include <mutex> |
| 9 | #include <cstring> |
| 10 | |
| 11 | |
| 12 | // max parse tree size approx 100 MB, should be enough |
| 13 | #define PG_MALLOC_SIZE 10240 |
| 14 | |
| 15 | namespace duckdb_libpgquery { |
| 16 | |
| 17 | typedef struct pg_parser_state_str parser_state; |
| 18 | struct pg_parser_state_str { |
| 19 | int pg_err_code; |
| 20 | int pg_err_pos; |
| 21 | char pg_err_msg[BUFSIZ]; |
| 22 | |
| 23 | size_t malloc_pos; |
| 24 | size_t malloc_ptr_idx; |
| 25 | char **malloc_ptrs; |
| 26 | size_t malloc_ptr_size; |
| 27 | }; |
| 28 | |
| 29 | static __thread parser_state pg_parser_state; |
| 30 | |
| 31 | #ifndef __GNUC__ |
| 32 | __thread PGNode *duckdb_newNodeMacroHolder; |
| 33 | #endif |
| 34 | |
| 35 | static void allocate_new(parser_state *state, size_t n) { |
| 36 | if (state->malloc_ptr_idx >= state->malloc_ptr_size) { |
| 37 | size_t new_size = state->malloc_ptr_size * 2; |
| 38 | auto new_malloc_ptrs = (char **) malloc(size: sizeof(char *) * new_size); |
| 39 | memset(s: new_malloc_ptrs, c: 0, n: sizeof(char*) * new_size); |
| 40 | memcpy(dest: new_malloc_ptrs, src: state->malloc_ptrs, n: state->malloc_ptr_size * sizeof(char*)); |
| 41 | free(ptr: state->malloc_ptrs); |
| 42 | state->malloc_ptr_size = new_size; |
| 43 | state->malloc_ptrs = new_malloc_ptrs; |
| 44 | } |
| 45 | if (n < PG_MALLOC_SIZE) { |
| 46 | n = PG_MALLOC_SIZE; |
| 47 | } |
| 48 | char *base_ptr = (char *)malloc(size: n); |
| 49 | if (!base_ptr) { |
| 50 | throw std::runtime_error("Memory allocation failure" ); |
| 51 | } |
| 52 | state->malloc_ptrs[state->malloc_ptr_idx] = base_ptr; |
| 53 | state->malloc_ptr_idx++; |
| 54 | state->malloc_pos = 0; |
| 55 | } |
| 56 | |
| 57 | void *palloc(size_t n) { |
| 58 | // we need to align our pointers for the sanitizer |
| 59 | auto allocate_n = n + sizeof(size_t); |
| 60 | auto aligned_n = ((allocate_n + 7) / 8) * 8; |
| 61 | if (pg_parser_state.malloc_pos + aligned_n > PG_MALLOC_SIZE) { |
| 62 | allocate_new(state: &pg_parser_state, n: aligned_n); |
| 63 | } |
| 64 | |
| 65 | // store the length of the allocation |
| 66 | char *base_ptr = pg_parser_state.malloc_ptrs[pg_parser_state.malloc_ptr_idx - 1] + pg_parser_state.malloc_pos; |
| 67 | memcpy(dest: base_ptr, src: &n, n: sizeof(size_t)); |
| 68 | // store the actual pointer |
| 69 | char *ptr = (char*) base_ptr + sizeof(size_t); |
| 70 | memset(s: ptr, c: 0, n: n); |
| 71 | pg_parser_state.malloc_pos += aligned_n; |
| 72 | return ptr; |
| 73 | } |
| 74 | |
| 75 | void pg_parser_init() { |
| 76 | pg_parser_state.pg_err_code = PGUNDEFINED; |
| 77 | pg_parser_state.pg_err_msg[0] = '\0'; |
| 78 | |
| 79 | pg_parser_state.malloc_ptr_size = 4; |
| 80 | pg_parser_state.malloc_ptrs = (char **) malloc(size: sizeof(char *) * pg_parser_state.malloc_ptr_size); |
| 81 | memset(s: pg_parser_state.malloc_ptrs, c: 0, n: sizeof(char*) * pg_parser_state.malloc_ptr_size); |
| 82 | pg_parser_state.malloc_ptr_idx = 0; |
| 83 | allocate_new(state: &pg_parser_state, n: 1); |
| 84 | } |
| 85 | |
| 86 | void pg_parser_parse(const char *query, parse_result *res) { |
| 87 | res->parse_tree = nullptr; |
| 88 | try { |
| 89 | res->parse_tree = duckdb_libpgquery::raw_parser(str: query); |
| 90 | res->success = pg_parser_state.pg_err_code == PGUNDEFINED; |
| 91 | } catch (std::exception &ex) { |
| 92 | res->success = false; |
| 93 | res->error_message = ex.what(); |
| 94 | } |
| 95 | res->error_message = pg_parser_state.pg_err_msg; |
| 96 | res->error_location = pg_parser_state.pg_err_pos; |
| 97 | } |
| 98 | |
| 99 | void pg_parser_cleanup() { |
| 100 | for (size_t ptr_idx = 0; ptr_idx < pg_parser_state.malloc_ptr_idx; ptr_idx++) { |
| 101 | char *ptr = pg_parser_state.malloc_ptrs[ptr_idx]; |
| 102 | if (ptr) { |
| 103 | free(ptr: ptr); |
| 104 | pg_parser_state.malloc_ptrs[ptr_idx] = nullptr; |
| 105 | } |
| 106 | } |
| 107 | free(ptr: pg_parser_state.malloc_ptrs); |
| 108 | } |
| 109 | |
| 110 | int ereport(int code, ...) { |
| 111 | std::string err = "parser error : " + std::string(pg_parser_state.pg_err_msg); |
| 112 | throw std::runtime_error(err); |
| 113 | } |
| 114 | void elog(int code, const char *fmt, ...) { |
| 115 | throw std::runtime_error("elog NOT IMPLEMENTED" ); |
| 116 | } |
| 117 | int errcode(int sqlerrcode) { |
| 118 | pg_parser_state.pg_err_code = sqlerrcode; |
| 119 | return 1; |
| 120 | } |
| 121 | int errmsg(const char *fmt, ...) { |
| 122 | va_list argptr; |
| 123 | va_start(argptr, fmt); |
| 124 | vsnprintf(s: pg_parser_state.pg_err_msg, BUFSIZ, format: fmt, arg: argptr); |
| 125 | va_end(argptr); |
| 126 | return 1; |
| 127 | } |
| 128 | int errhint(const char *msg) { |
| 129 | throw std::runtime_error("errhint NOT IMPLEMENTED" ); |
| 130 | } |
| 131 | int errmsg_internal(const char *fmt, ...) { |
| 132 | throw std::runtime_error("errmsg_internal NOT IMPLEMENTED" ); |
| 133 | } |
| 134 | int errdetail(const char *fmt, ...) { |
| 135 | throw std::runtime_error("errdetail NOT IMPLEMENTED" ); |
| 136 | } |
| 137 | int errposition(int cursorpos) { |
| 138 | pg_parser_state.pg_err_pos = cursorpos; |
| 139 | return 1; |
| 140 | } |
| 141 | |
| 142 | char *psprintf(const char *fmt, ...) { |
| 143 | char buf[BUFSIZ]; |
| 144 | va_list args; |
| 145 | size_t newlen; |
| 146 | |
| 147 | // attempt one: use stack buffer and determine length |
| 148 | va_start(args, fmt); |
| 149 | newlen = vsnprintf(s: buf, BUFSIZ, format: fmt, arg: args); |
| 150 | va_end(args); |
| 151 | if (newlen < BUFSIZ) { |
| 152 | return pstrdup(in: buf); |
| 153 | } |
| 154 | |
| 155 | // attempt two, malloc |
| 156 | char *mbuf = (char *)palloc(n: newlen); |
| 157 | va_start(args, fmt); |
| 158 | vsnprintf(s: mbuf, maxlen: newlen, format: fmt, arg: args); |
| 159 | va_end(args); |
| 160 | return mbuf; |
| 161 | } |
| 162 | |
| 163 | char *pstrdup(const char *in) { |
| 164 | char *new_str = (char *)palloc(n: strlen(s: in) + 1); |
| 165 | memcpy(dest: new_str, src: in, n: strlen(s: in)); |
| 166 | return new_str; |
| 167 | } |
| 168 | |
| 169 | void pfree(void *ptr) { |
| 170 | // nop, we free up entire context on parser cleanup |
| 171 | } |
| 172 | void *palloc0fast(size_t n) { // very fast |
| 173 | return palloc(n); |
| 174 | } |
| 175 | void *repalloc(void *ptr, size_t n) { |
| 176 | // get the length of the allocation |
| 177 | size_t old_len; |
| 178 | char *old_len_ptr = (char *) ptr - sizeof(size_t); |
| 179 | memcpy(dest: (void *) &old_len, src: old_len_ptr, n: sizeof(size_t)); |
| 180 | // re-allocate and copy the data |
| 181 | void *new_buf = palloc(n); |
| 182 | memcpy(dest: new_buf, src: ptr, n: old_len); |
| 183 | return new_buf; |
| 184 | } |
| 185 | char *NameListToString(PGList *names) { |
| 186 | throw std::runtime_error("NameListToString NOT IMPLEMENTED" ); |
| 187 | } |
| 188 | void *copyObject(const void *from) { |
| 189 | throw std::runtime_error("copyObject NOT IMPLEMENTED" ); |
| 190 | } |
| 191 | bool equal(const void *a, const void *b) { |
| 192 | throw std::runtime_error("equal NOT IMPLEMENTED" ); |
| 193 | } |
| 194 | int exprLocation(const PGNode *expr) { |
| 195 | throw std::runtime_error("exprLocation NOT IMPLEMENTED" ); |
| 196 | } |
| 197 | bool pg_verifymbstr(const char *mbstr, int len, bool noError) { |
| 198 | throw std::runtime_error("pg_verifymbstr NOT IMPLEMENTED" ); |
| 199 | } |
| 200 | |
| 201 | int pg_database_encoding_max_length(void) { |
| 202 | return 4; // UTF8 |
| 203 | } |
| 204 | |
| 205 | static int pg_utf_mblen(const unsigned char *s) { |
| 206 | int len; |
| 207 | |
| 208 | if ((*s & 0x80) == 0) |
| 209 | len = 1; |
| 210 | else if ((*s & 0xe0) == 0xc0) |
| 211 | len = 2; |
| 212 | else if ((*s & 0xf0) == 0xe0) |
| 213 | len = 3; |
| 214 | else if ((*s & 0xf8) == 0xf0) |
| 215 | len = 4; |
| 216 | #ifdef NOT_USED |
| 217 | else if ((*s & 0xfc) == 0xf8) |
| 218 | len = 5; |
| 219 | else if ((*s & 0xfe) == 0xfc) |
| 220 | len = 6; |
| 221 | #endif |
| 222 | else |
| 223 | len = 1; |
| 224 | return len; |
| 225 | } |
| 226 | |
| 227 | int pg_mbstrlen_with_len(const char *mbstr, int limit) { |
| 228 | int len = 0; |
| 229 | while (limit > 0 && *mbstr) { |
| 230 | int l = pg_utf_mblen(s: (const unsigned char *)mbstr); |
| 231 | limit -= l; |
| 232 | mbstr += l; |
| 233 | len++; |
| 234 | } |
| 235 | return len; |
| 236 | } |
| 237 | |
| 238 | int pg_mbcliplen(const char *mbstr, int len, int limit) { |
| 239 | throw std::runtime_error("pg_mbcliplen NOT IMPLEMENTED" ); |
| 240 | } |
| 241 | int pg_mblen(const char *mbstr) { |
| 242 | throw std::runtime_error("pg_mblen NOT IMPLEMENTED" ); |
| 243 | } |
| 244 | PGDefElem *defWithOids(bool value) { |
| 245 | throw std::runtime_error("defWithOids NOT IMPLEMENTED" ); |
| 246 | } |
| 247 | unsigned char *unicode_to_utf8(pg_wchar c, unsigned char *utf8string) { |
| 248 | throw std::runtime_error("unicode_to_utf8 NOT IMPLEMENTED" ); |
| 249 | } |
| 250 | |
| 251 | // this replaces a brain damaged macro in nodes.hpp |
| 252 | PGNode *newNode(size_t size, PGNodeTag type) { |
| 253 | auto result = (PGNode *)palloc0fast(n: size); |
| 254 | result->type = type; |
| 255 | return result; |
| 256 | } |
| 257 | } |