1 | #include <stdexcept> |
2 | #include <string> |
3 | #include <thread> |
4 | #include <mutex> |
5 | #include "pg_functions.hpp" |
6 | #include "parser/parser.hpp" |
7 | #include <stdarg.h> |
8 | #include <mutex> |
9 | #include <cstring> |
10 | |
11 | |
12 | // max parse tree size approx 100 MB, should be enough |
13 | #define PG_MALLOC_SIZE 10240 |
14 | |
15 | namespace duckdb_libpgquery { |
16 | |
17 | typedef struct pg_parser_state_str parser_state; |
18 | struct pg_parser_state_str { |
19 | int pg_err_code; |
20 | int pg_err_pos; |
21 | char pg_err_msg[BUFSIZ]; |
22 | |
23 | size_t malloc_pos; |
24 | size_t malloc_ptr_idx; |
25 | char **malloc_ptrs; |
26 | size_t malloc_ptr_size; |
27 | }; |
28 | |
29 | static __thread parser_state pg_parser_state; |
30 | |
31 | #ifndef __GNUC__ |
32 | __thread PGNode *duckdb_newNodeMacroHolder; |
33 | #endif |
34 | |
35 | static void allocate_new(parser_state *state, size_t n) { |
36 | if (state->malloc_ptr_idx >= state->malloc_ptr_size) { |
37 | size_t new_size = state->malloc_ptr_size * 2; |
38 | auto new_malloc_ptrs = (char **) malloc(size: sizeof(char *) * new_size); |
39 | memset(s: new_malloc_ptrs, c: 0, n: sizeof(char*) * new_size); |
40 | memcpy(dest: new_malloc_ptrs, src: state->malloc_ptrs, n: state->malloc_ptr_size * sizeof(char*)); |
41 | free(ptr: state->malloc_ptrs); |
42 | state->malloc_ptr_size = new_size; |
43 | state->malloc_ptrs = new_malloc_ptrs; |
44 | } |
45 | if (n < PG_MALLOC_SIZE) { |
46 | n = PG_MALLOC_SIZE; |
47 | } |
48 | char *base_ptr = (char *)malloc(size: n); |
49 | if (!base_ptr) { |
50 | throw std::runtime_error("Memory allocation failure" ); |
51 | } |
52 | state->malloc_ptrs[state->malloc_ptr_idx] = base_ptr; |
53 | state->malloc_ptr_idx++; |
54 | state->malloc_pos = 0; |
55 | } |
56 | |
57 | void *palloc(size_t n) { |
58 | // we need to align our pointers for the sanitizer |
59 | auto allocate_n = n + sizeof(size_t); |
60 | auto aligned_n = ((allocate_n + 7) / 8) * 8; |
61 | if (pg_parser_state.malloc_pos + aligned_n > PG_MALLOC_SIZE) { |
62 | allocate_new(state: &pg_parser_state, n: aligned_n); |
63 | } |
64 | |
65 | // store the length of the allocation |
66 | char *base_ptr = pg_parser_state.malloc_ptrs[pg_parser_state.malloc_ptr_idx - 1] + pg_parser_state.malloc_pos; |
67 | memcpy(dest: base_ptr, src: &n, n: sizeof(size_t)); |
68 | // store the actual pointer |
69 | char *ptr = (char*) base_ptr + sizeof(size_t); |
70 | memset(s: ptr, c: 0, n: n); |
71 | pg_parser_state.malloc_pos += aligned_n; |
72 | return ptr; |
73 | } |
74 | |
75 | void pg_parser_init() { |
76 | pg_parser_state.pg_err_code = PGUNDEFINED; |
77 | pg_parser_state.pg_err_msg[0] = '\0'; |
78 | |
79 | pg_parser_state.malloc_ptr_size = 4; |
80 | pg_parser_state.malloc_ptrs = (char **) malloc(size: sizeof(char *) * pg_parser_state.malloc_ptr_size); |
81 | memset(s: pg_parser_state.malloc_ptrs, c: 0, n: sizeof(char*) * pg_parser_state.malloc_ptr_size); |
82 | pg_parser_state.malloc_ptr_idx = 0; |
83 | allocate_new(state: &pg_parser_state, n: 1); |
84 | } |
85 | |
86 | void pg_parser_parse(const char *query, parse_result *res) { |
87 | res->parse_tree = nullptr; |
88 | try { |
89 | res->parse_tree = duckdb_libpgquery::raw_parser(str: query); |
90 | res->success = pg_parser_state.pg_err_code == PGUNDEFINED; |
91 | } catch (std::exception &ex) { |
92 | res->success = false; |
93 | res->error_message = ex.what(); |
94 | } |
95 | res->error_message = pg_parser_state.pg_err_msg; |
96 | res->error_location = pg_parser_state.pg_err_pos; |
97 | } |
98 | |
99 | void pg_parser_cleanup() { |
100 | for (size_t ptr_idx = 0; ptr_idx < pg_parser_state.malloc_ptr_idx; ptr_idx++) { |
101 | char *ptr = pg_parser_state.malloc_ptrs[ptr_idx]; |
102 | if (ptr) { |
103 | free(ptr: ptr); |
104 | pg_parser_state.malloc_ptrs[ptr_idx] = nullptr; |
105 | } |
106 | } |
107 | free(ptr: pg_parser_state.malloc_ptrs); |
108 | } |
109 | |
110 | int ereport(int code, ...) { |
111 | std::string err = "parser error : " + std::string(pg_parser_state.pg_err_msg); |
112 | throw std::runtime_error(err); |
113 | } |
114 | void elog(int code, const char *fmt, ...) { |
115 | throw std::runtime_error("elog NOT IMPLEMENTED" ); |
116 | } |
117 | int errcode(int sqlerrcode) { |
118 | pg_parser_state.pg_err_code = sqlerrcode; |
119 | return 1; |
120 | } |
121 | int errmsg(const char *fmt, ...) { |
122 | va_list argptr; |
123 | va_start(argptr, fmt); |
124 | vsnprintf(s: pg_parser_state.pg_err_msg, BUFSIZ, format: fmt, arg: argptr); |
125 | va_end(argptr); |
126 | return 1; |
127 | } |
128 | int errhint(const char *msg) { |
129 | throw std::runtime_error("errhint NOT IMPLEMENTED" ); |
130 | } |
131 | int errmsg_internal(const char *fmt, ...) { |
132 | throw std::runtime_error("errmsg_internal NOT IMPLEMENTED" ); |
133 | } |
134 | int errdetail(const char *fmt, ...) { |
135 | throw std::runtime_error("errdetail NOT IMPLEMENTED" ); |
136 | } |
137 | int errposition(int cursorpos) { |
138 | pg_parser_state.pg_err_pos = cursorpos; |
139 | return 1; |
140 | } |
141 | |
142 | char *psprintf(const char *fmt, ...) { |
143 | char buf[BUFSIZ]; |
144 | va_list args; |
145 | size_t newlen; |
146 | |
147 | // attempt one: use stack buffer and determine length |
148 | va_start(args, fmt); |
149 | newlen = vsnprintf(s: buf, BUFSIZ, format: fmt, arg: args); |
150 | va_end(args); |
151 | if (newlen < BUFSIZ) { |
152 | return pstrdup(in: buf); |
153 | } |
154 | |
155 | // attempt two, malloc |
156 | char *mbuf = (char *)palloc(n: newlen); |
157 | va_start(args, fmt); |
158 | vsnprintf(s: mbuf, maxlen: newlen, format: fmt, arg: args); |
159 | va_end(args); |
160 | return mbuf; |
161 | } |
162 | |
163 | char *pstrdup(const char *in) { |
164 | char *new_str = (char *)palloc(n: strlen(s: in) + 1); |
165 | memcpy(dest: new_str, src: in, n: strlen(s: in)); |
166 | return new_str; |
167 | } |
168 | |
169 | void pfree(void *ptr) { |
170 | // nop, we free up entire context on parser cleanup |
171 | } |
172 | void *palloc0fast(size_t n) { // very fast |
173 | return palloc(n); |
174 | } |
175 | void *repalloc(void *ptr, size_t n) { |
176 | // get the length of the allocation |
177 | size_t old_len; |
178 | char *old_len_ptr = (char *) ptr - sizeof(size_t); |
179 | memcpy(dest: (void *) &old_len, src: old_len_ptr, n: sizeof(size_t)); |
180 | // re-allocate and copy the data |
181 | void *new_buf = palloc(n); |
182 | memcpy(dest: new_buf, src: ptr, n: old_len); |
183 | return new_buf; |
184 | } |
185 | char *NameListToString(PGList *names) { |
186 | throw std::runtime_error("NameListToString NOT IMPLEMENTED" ); |
187 | } |
188 | void *copyObject(const void *from) { |
189 | throw std::runtime_error("copyObject NOT IMPLEMENTED" ); |
190 | } |
191 | bool equal(const void *a, const void *b) { |
192 | throw std::runtime_error("equal NOT IMPLEMENTED" ); |
193 | } |
194 | int exprLocation(const PGNode *expr) { |
195 | throw std::runtime_error("exprLocation NOT IMPLEMENTED" ); |
196 | } |
197 | bool pg_verifymbstr(const char *mbstr, int len, bool noError) { |
198 | throw std::runtime_error("pg_verifymbstr NOT IMPLEMENTED" ); |
199 | } |
200 | |
201 | int pg_database_encoding_max_length(void) { |
202 | return 4; // UTF8 |
203 | } |
204 | |
205 | static int pg_utf_mblen(const unsigned char *s) { |
206 | int len; |
207 | |
208 | if ((*s & 0x80) == 0) |
209 | len = 1; |
210 | else if ((*s & 0xe0) == 0xc0) |
211 | len = 2; |
212 | else if ((*s & 0xf0) == 0xe0) |
213 | len = 3; |
214 | else if ((*s & 0xf8) == 0xf0) |
215 | len = 4; |
216 | #ifdef NOT_USED |
217 | else if ((*s & 0xfc) == 0xf8) |
218 | len = 5; |
219 | else if ((*s & 0xfe) == 0xfc) |
220 | len = 6; |
221 | #endif |
222 | else |
223 | len = 1; |
224 | return len; |
225 | } |
226 | |
227 | int pg_mbstrlen_with_len(const char *mbstr, int limit) { |
228 | int len = 0; |
229 | while (limit > 0 && *mbstr) { |
230 | int l = pg_utf_mblen(s: (const unsigned char *)mbstr); |
231 | limit -= l; |
232 | mbstr += l; |
233 | len++; |
234 | } |
235 | return len; |
236 | } |
237 | |
238 | int pg_mbcliplen(const char *mbstr, int len, int limit) { |
239 | throw std::runtime_error("pg_mbcliplen NOT IMPLEMENTED" ); |
240 | } |
241 | int pg_mblen(const char *mbstr) { |
242 | throw std::runtime_error("pg_mblen NOT IMPLEMENTED" ); |
243 | } |
244 | PGDefElem *defWithOids(bool value) { |
245 | throw std::runtime_error("defWithOids NOT IMPLEMENTED" ); |
246 | } |
247 | unsigned char *unicode_to_utf8(pg_wchar c, unsigned char *utf8string) { |
248 | throw std::runtime_error("unicode_to_utf8 NOT IMPLEMENTED" ); |
249 | } |
250 | |
251 | // this replaces a brain damaged macro in nodes.hpp |
252 | PGNode *newNode(size_t size, PGNodeTag type) { |
253 | auto result = (PGNode *)palloc0fast(n: size); |
254 | result->type = type; |
255 | return result; |
256 | } |
257 | } |