1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | /* |
10 | * M.L. Kersten |
11 | * XML multiplexes |
12 | * SQL/XML requires a handful of instructions. |
13 | * The collection of routines provided here are map operations |
14 | * for the atom xml primitives. |
15 | * |
16 | * In line with the batcalc module, we assume that if two bat operands |
17 | * are provided that they are aligned. |
18 | * |
19 | * The implementation is focussed on functionality. At a later stage |
20 | * we may postpone string contstruction until it is really needed. |
21 | */ |
22 | |
23 | |
24 | |
25 | #include "monetdb_config.h" |
26 | #include "gdk.h" |
27 | #include <ctype.h> |
28 | #include <string.h> |
29 | #ifdef HAVE_LIBXML |
30 | #include <libxml/parser.h> |
31 | #endif |
32 | #include "mal_interpreter.h" |
33 | #include "mal_function.h" |
34 | #include "xml.h" |
35 | |
36 | mal_export str BATXMLxml2str(bat *ret, const bat *bid); |
37 | mal_export str BATXMLxmltext(bat *ret, const bat *bid); |
38 | mal_export str BATXMLstr2xml(bat *ret, const bat *bid); |
39 | mal_export str BATXMLdocument(bat *ret, const bat *bid); |
40 | mal_export str BATXMLcontent(bat *ret, const bat *bid); |
41 | mal_export str BATXMLisdocument(bat *ret, const bat *bid); |
42 | mal_export str BATXMLelementSmall(bat *ret, const char * const *name, const bat *bid); |
43 | mal_export str BATXMLoptions(bat *ret, const char * const *name, const char * const *options, const bat *bid); |
44 | mal_export str BATXMLcomment(bat *ret, const bat *bid); |
45 | mal_export str BATXMLparse(bat *ret, const char * const *doccont, const bat *bid, const char * const *option); |
46 | mal_export str BATXMLxquery(bat *ret, const bat *bid, const char * const *expr); |
47 | mal_export str BATXMLpi(bat *ret, const char * const *tgt, const bat *bid); |
48 | mal_export str BATXMLroot(bat *ret, const bat *bid, const char * const *version, const char * const *standalone); |
49 | mal_export str BATXMLattribute(bat *ret, const char * const *name, const bat *bid); |
50 | mal_export str BATXMLelement(bat *ret, const char * const *name, xml *ns, xml *attr, const bat *bid); |
51 | mal_export str BATXMLconcat(bat *ret, const bat *bid, const bat *rid); |
52 | mal_export str BATXMLforest(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p); |
53 | mal_export str BATXMLgroup(xml *ret, const bat *bid); |
54 | mal_export str AGGRsubxmlcand(bat *retval, const bat *bid, const bat *gid, const bat *eid, const bat *sid, const bit *skip_nils); |
55 | mal_export str AGGRsubxml(bat *retval, const bat *bid, const bat *gid, const bat *eid, const bit *skip_nils); |
56 | |
57 | #ifdef HAVE_LIBXML |
58 | |
59 | #define prepareResult(X,Y,tpe,Z,free) \ |
60 | do { \ |
61 | (X) = COLnew((Y)->hseqbase, (tpe), BATcount(Y), TRANSIENT); \ |
62 | if ((X) == NULL) { \ |
63 | BBPunfix((Y)->batCacheid); \ |
64 | free; \ |
65 | throw(MAL, "xml." Z, SQLSTATE(HY001) MAL_MALLOC_FAIL); \ |
66 | } \ |
67 | (X)->tsorted = false; \ |
68 | (X)->trevsorted = false; \ |
69 | (X)->tnonil = true; \ |
70 | } while (0) |
71 | |
72 | #define finalizeResult(X,Y,Z) \ |
73 | do { \ |
74 | BATsetcount((Y), (Y)->batCount); \ |
75 | (Y)->theap.dirty |= (Y)->batCount > 0; \ |
76 | *(X) = (Y)->batCacheid; \ |
77 | BBPkeepref(*(X)); \ |
78 | BBPunfix((Z)->batCacheid); \ |
79 | } while (0) |
80 | |
81 | str |
82 | BATXMLxml2str(bat *ret, const bat *bid) |
83 | { |
84 | BAT *b, *bn; |
85 | BUN p, q; |
86 | BATiter bi; |
87 | |
88 | if ((b = BATdescriptor(*bid)) == NULL) |
89 | throw(MAL, "xml.str" , INTERNAL_BAT_ACCESS); |
90 | prepareResult(bn, b, TYPE_str, "str" , (void) 0); |
91 | bi = bat_iterator(b); |
92 | BATloop(b, p, q) { |
93 | const char *t = (const char *) BUNtvar(bi, p); |
94 | |
95 | if (strNil(t)) { |
96 | bunfastappVAR(bn, t); |
97 | bn->tnonil = false; |
98 | } else { |
99 | assert(*t == 'A' || *t == 'C' || *t == 'D'); |
100 | bunfastappVAR(bn, t + 1); |
101 | } |
102 | } |
103 | finalizeResult(ret, bn, b); |
104 | return MAL_SUCCEED; |
105 | bunins_failed: |
106 | BBPunfix(b->batCacheid); |
107 | BBPunfix(bn->batCacheid); |
108 | throw(MAL, "xml.str" , OPERATION_FAILED " during bulk coercion" ); |
109 | } |
110 | |
111 | str |
112 | BATXMLxmltext(bat *ret, const bat *bid) |
113 | { |
114 | BAT *b, *bn; |
115 | BUN p, q; |
116 | BATiter bi; |
117 | size_t size = 0; |
118 | str buf = NULL; |
119 | xmlDocPtr doc = NULL; |
120 | xmlNodePtr elem; |
121 | str content = NULL; |
122 | const char *err = OPERATION_FAILED; |
123 | |
124 | if ((b = BATdescriptor(*bid)) == NULL) |
125 | throw(MAL, "xml.text" , INTERNAL_BAT_ACCESS); |
126 | prepareResult(bn, b, TYPE_str, "text" , (void) 0); |
127 | bi = bat_iterator(b); |
128 | BATloop(b, p, q) { |
129 | const char *t = (const char *) BUNtvar(bi, p); |
130 | size_t len; |
131 | |
132 | if (strNil(t)) { |
133 | bunfastappVAR(bn, t); |
134 | bn->tnonil = false; |
135 | continue; |
136 | } |
137 | len = strlen(t); |
138 | switch (*t) { |
139 | case 'D': { |
140 | xmlDocPtr d = xmlParseMemory(t + 1, (int) (len - 1)); |
141 | elem = xmlDocGetRootElement(d); |
142 | content = (str) xmlNodeGetContent(elem); |
143 | xmlFreeDoc(d); |
144 | if (content == NULL) { |
145 | err = SQLSTATE(HY001) MAL_MALLOC_FAIL; |
146 | goto bunins_failed; |
147 | } |
148 | break; |
149 | } |
150 | case 'C': |
151 | if (doc == NULL) |
152 | doc = xmlParseMemory("<doc/>" , 6); |
153 | xmlParseInNodeContext(xmlDocGetRootElement(doc), t + 1, (int) (len - 1), 0, &elem); |
154 | content = (str) xmlNodeGetContent(elem); |
155 | xmlFreeNodeList(elem); |
156 | if (content == NULL) { |
157 | err = SQLSTATE(HY001) MAL_MALLOC_FAIL; |
158 | goto bunins_failed; |
159 | } |
160 | break; |
161 | case 'A': { |
162 | str s; |
163 | |
164 | if (buf == NULL || size < len) { |
165 | size = len + 128; |
166 | if (buf != NULL) |
167 | GDKfree(buf); |
168 | buf = GDKmalloc(size); |
169 | if (buf == NULL) { |
170 | err = SQLSTATE(HY001) MAL_MALLOC_FAIL; |
171 | goto bunins_failed; |
172 | } |
173 | } |
174 | s = buf; |
175 | t++; |
176 | while (*t) { |
177 | if (*t == '"' || *t == '\'') { |
178 | char q = *t++; |
179 | |
180 | s += XMLunquotestring(&t, q, s); |
181 | } |
182 | t++; |
183 | } |
184 | *s = 0; |
185 | break; |
186 | } |
187 | default: |
188 | assert(*t == 'A' || *t == 'C' || *t == 'D'); |
189 | bunfastappVAR(bn, str_nil); |
190 | bn->tnonil = false; |
191 | continue; |
192 | } |
193 | assert(content != NULL || buf != NULL); |
194 | bunfastappVAR(bn, content != NULL ? content : buf); |
195 | if (content != NULL) |
196 | GDKfree(content); |
197 | content = NULL; |
198 | } |
199 | finalizeResult(ret, bn, b); |
200 | if (buf != NULL) |
201 | GDKfree(buf); |
202 | if (doc != NULL) |
203 | xmlFreeDoc(doc); |
204 | return MAL_SUCCEED; |
205 | bunins_failed: |
206 | BBPunfix(b->batCacheid); |
207 | BBPunfix(bn->batCacheid); |
208 | if (buf != NULL) |
209 | GDKfree(buf); |
210 | if (doc != NULL) |
211 | xmlFreeDoc(doc); |
212 | if (content != NULL) |
213 | GDKfree(content); |
214 | throw(MAL, "xml.text" , "%s" , err); |
215 | } |
216 | |
217 | /* |
218 | * The core of the activity is str2xml, where the actual strings |
219 | * are constructed. |
220 | * To avoid repetitive copying we make sure that the garbage |
221 | * collector does not remove the xml intermediates. |
222 | * This way, we know that as long as the xml-variables are not |
223 | * reused, the complete structure of the xml document(s) are available. |
224 | * We merely have to collect the pieces. |
225 | * [FOR LATER, FIRST GO FOR THE EASY IMPLEMENTATION] |
226 | * XML values are represented by strings already. |
227 | */ |
228 | str |
229 | BATXMLstr2xml(bat *ret, const bat *bid) |
230 | { |
231 | BAT *b, *bn; |
232 | BUN p, q; |
233 | size_t size = BUFSIZ; |
234 | str buf; |
235 | const char *err= OPERATION_FAILED; |
236 | BATiter bi; |
237 | |
238 | buf = GDKmalloc(size); |
239 | if (buf == NULL) |
240 | throw(MAL,"xml.str2xml" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
241 | if ((b = BATdescriptor(*bid)) == NULL) { |
242 | GDKfree(buf); |
243 | throw(MAL, "xml.xml" , INTERNAL_BAT_ACCESS); |
244 | } |
245 | prepareResult(bn, b, TYPE_xml, "xml" , GDKfree(buf)); |
246 | bi = bat_iterator(b); |
247 | BATloop(b, p, q) { |
248 | const char *t = (const char *) BUNtvar(bi, p); |
249 | size_t len; |
250 | |
251 | if (strNil(t)) { |
252 | bunfastappVAR(bn, str_nil); |
253 | bn->tnonil = false; |
254 | continue; |
255 | } |
256 | |
257 | len = strlen(t) * 6 + 1; |
258 | if (size < len) { |
259 | size = len + 128; |
260 | GDKfree(buf); |
261 | buf = GDKmalloc(size); |
262 | if (buf == NULL) { |
263 | err = SQLSTATE(HY001) MAL_MALLOC_FAIL; |
264 | goto bunins_failed; |
265 | } |
266 | } |
267 | buf[0] = 'C'; |
268 | XMLquotestring(t, buf + 1, size - 1); |
269 | bunfastappVAR(bn, buf); |
270 | } |
271 | GDKfree(buf); |
272 | finalizeResult(ret, bn, b); |
273 | return MAL_SUCCEED; |
274 | bunins_failed: |
275 | BBPunfix(b->batCacheid); |
276 | BBPunfix(bn->batCacheid); |
277 | if (buf != NULL) |
278 | GDKfree(buf); |
279 | throw(MAL, "xml.xml" , "%s" , err); |
280 | } |
281 | |
282 | str |
283 | BATXMLdocument(bat *ret, const bat *bid) |
284 | { |
285 | BAT *b, *bn; |
286 | BUN p, q; |
287 | BATiter bi; |
288 | size_t size = BUFSIZ; |
289 | str buf = GDKmalloc(size); |
290 | const char *err = OPERATION_FAILED; |
291 | |
292 | if (buf == NULL) |
293 | throw(MAL,"xml.document" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
294 | if ((b = BATdescriptor(*bid)) == NULL) { |
295 | GDKfree(buf); |
296 | throw(MAL, "xml.document" , INTERNAL_BAT_ACCESS); |
297 | } |
298 | prepareResult(bn, b, TYPE_xml, "document" , GDKfree(buf)); |
299 | bi = bat_iterator(b); |
300 | BATloop(b, p, q) { |
301 | const char *t = (const char *) BUNtvar(bi, p); |
302 | xmlDocPtr doc; |
303 | int len; |
304 | xmlChar *s; |
305 | |
306 | if (strNil(t)) { |
307 | bunfastappVAR(bn, str_nil); |
308 | bn->tnonil = false; |
309 | continue; |
310 | } |
311 | len = (int) strlen(t); |
312 | doc = xmlParseMemory(t, len); |
313 | if (doc == NULL) { |
314 | err = OPERATION_FAILED XML_PARSE_ERROR; |
315 | goto bunins_failed; |
316 | } |
317 | xmlDocDumpMemory(doc, &s, &len); |
318 | xmlFreeDoc(doc); |
319 | if ((size_t) len + 2 >= size) { |
320 | GDKfree(buf); |
321 | size = (size_t) len + 128; |
322 | buf = GDKmalloc(size); |
323 | if (buf == NULL) { |
324 | err= MAL_MALLOC_FAIL; |
325 | goto bunins_failed; |
326 | } |
327 | } |
328 | buf[0] = 'D'; |
329 | strcpy(buf + 1, (char *) s); |
330 | bunfastappVAR(bn, buf); |
331 | } |
332 | GDKfree(buf); |
333 | finalizeResult(ret, bn, b); |
334 | return MAL_SUCCEED; |
335 | bunins_failed: |
336 | GDKfree(buf); |
337 | BBPunfix(b->batCacheid); |
338 | BBPunfix(bn->batCacheid); |
339 | throw(MAL, "xml.document" , "%s" , err); |
340 | } |
341 | |
342 | str |
343 | BATXMLcontent(bat *ret, const bat *bid) |
344 | { |
345 | BAT *b, *bn; |
346 | BUN p, q; |
347 | BATiter bi; |
348 | xmlDocPtr doc; |
349 | xmlNodePtr root; |
350 | size_t size = BUFSIZ; |
351 | str buf = GDKmalloc(size); |
352 | const char *err = OPERATION_FAILED; |
353 | xmlBufferPtr xbuf; |
354 | |
355 | if (buf == NULL) |
356 | throw(MAL,"xml.content" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
357 | if ((b = BATdescriptor(*bid)) == NULL) { |
358 | GDKfree(buf); |
359 | throw(MAL, "xml.content" , INTERNAL_BAT_ACCESS); |
360 | } |
361 | doc = xmlParseMemory("<doc/>" , 6); |
362 | root = xmlDocGetRootElement(doc); |
363 | prepareResult(bn, b, TYPE_xml, "content" , GDKfree(buf)); |
364 | bi = bat_iterator(b); |
365 | xbuf = xmlBufferCreate(); |
366 | BATloop(b, p, q) { |
367 | const char *t = (const char *) BUNtvar(bi, p); |
368 | size_t len; |
369 | xmlNodePtr elem; |
370 | xmlParserErrors xerr; |
371 | const xmlChar *s; |
372 | |
373 | if (strNil(t)) { |
374 | bunfastappVAR(bn, str_nil); |
375 | bn->tnonil = false; |
376 | continue; |
377 | } |
378 | len = strlen(t); |
379 | xerr = xmlParseInNodeContext(root, t, (int) len, 0, &elem); |
380 | if (xerr != XML_ERR_OK) { |
381 | err = XML_PARSE_ERROR; |
382 | goto bunins_failed; |
383 | } |
384 | xmlNodeDump(xbuf, doc, elem, 0, 0); |
385 | s = xmlBufferContent(xbuf); |
386 | len = strlen((const char *) s); |
387 | if (len + 2 >= size) { |
388 | GDKfree(buf); |
389 | size = len + 128; |
390 | buf = GDKmalloc(size); |
391 | if (buf == NULL) { |
392 | err = SQLSTATE(HY001) MAL_MALLOC_FAIL; |
393 | goto bunins_failed; |
394 | } |
395 | } |
396 | buf[0] = 'C'; |
397 | strcpy(buf + 1, (const char *) s); |
398 | bunfastappVAR(bn, buf); |
399 | xmlBufferEmpty(xbuf); |
400 | xmlFreeNodeList(elem); |
401 | } |
402 | xmlBufferFree(xbuf); |
403 | xmlFreeDoc(doc); |
404 | GDKfree(buf); |
405 | finalizeResult(ret, bn, b); |
406 | return MAL_SUCCEED; |
407 | bunins_failed: |
408 | xmlBufferFree(xbuf); |
409 | xmlFreeDoc(doc); |
410 | if (buf != NULL) |
411 | GDKfree(buf); |
412 | BBPunfix(b->batCacheid); |
413 | BBPunfix(bn->batCacheid); |
414 | throw(MAL, "xml.document" , "%s" , err); |
415 | } |
416 | |
417 | str |
418 | BATXMLisdocument(bat *ret, const bat *bid) |
419 | { |
420 | BAT *b, *bn; |
421 | BUN p, q; |
422 | BATiter bi; |
423 | |
424 | if ((b = BATdescriptor(*bid)) == NULL) |
425 | throw(MAL, "xml.isdocument" , INTERNAL_BAT_ACCESS); |
426 | prepareResult(bn, b, TYPE_bit, "isdocument" , (void) 0); |
427 | bi = bat_iterator(b); |
428 | BATloop(b, p, q) { |
429 | const char *t = (const char *) BUNtvar(bi, p); |
430 | xmlDocPtr doc; |
431 | bit val; |
432 | |
433 | if (strNil(t)) { |
434 | val = bit_nil; |
435 | bn->tnonil = false; |
436 | } else { |
437 | doc = xmlParseMemory(t, (int) strlen(t)); |
438 | if (doc == NULL) { |
439 | val = 0; |
440 | } else { |
441 | xmlFreeDoc(doc); |
442 | val = 1; |
443 | } |
444 | } |
445 | bunfastappTYPE(bit, bn, &val); |
446 | } |
447 | finalizeResult(ret, bn, b); |
448 | return MAL_SUCCEED; |
449 | bunins_failed: |
450 | BBPunfix(b->batCacheid); |
451 | BBPunfix(bn->batCacheid); |
452 | throw(MAL, "xml.isdocument" , OPERATION_FAILED " During bulk processing" ); |
453 | } |
454 | |
455 | /* |
456 | * The standard supports specific mappings for |
457 | * NULL values,i.e. {null,absent,empty,nil,niloncontent) |
458 | * in the context of an element and forest construction. |
459 | * The standard should be studied in more detail, because |
460 | * the syntax(rules) seem ambiguous. |
461 | * It applies to all components of an element or their |
462 | * concatenation. |
463 | * |
464 | * For the time being, the variaton on XMLtag seems the |
465 | * most reasonable interpretation. |
466 | */ |
467 | str |
468 | BATXMLoptions(bat *ret, const char * const *name, const char * const *options, const bat *bid) |
469 | { |
470 | BAT *b, *bn; |
471 | BUN p, q; |
472 | str buf = GDKmalloc(BUFSIZ); |
473 | str val = GDKmalloc(BUFSIZ); |
474 | size_t size = BUFSIZ, len = strlen(*name); |
475 | BATiter bi; |
476 | const char *err = OPERATION_FAILED " During bulk options analysis" ; |
477 | |
478 | if (val == NULL || buf == NULL) { |
479 | if (val != NULL) |
480 | GDKfree(val); |
481 | if (buf != NULL) |
482 | GDKfree(buf); |
483 | throw(MAL, "batxml.options" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
484 | } |
485 | if ((b = BATdescriptor(*bid)) == NULL) { |
486 | GDKfree(val); |
487 | GDKfree(buf); |
488 | throw(MAL, "xml.options" , INTERNAL_BAT_ACCESS); |
489 | } |
490 | prepareResult(bn, b, TYPE_xml, "options" , GDKfree(val); GDKfree(buf)); |
491 | |
492 | if (strcmp(*options, "absent" ) == 0) |
493 | buf[0] = 0; |
494 | else if (strcmp(*options, "empty" ) == 0) |
495 | snprintf(buf, size, "<%s></%s>" , *name, *name); |
496 | else if (strcmp(*options, "null" ) == 0) |
497 | snprintf(buf, size, "null" ); |
498 | else if (strcmp(*options, "nil" ) == 0) |
499 | snprintf(buf, size, "nil" ); |
500 | else { |
501 | /*if(strcmp(*options,"niloncontent")==0) */ |
502 | err = SQLSTATE(0A000) PROGRAM_NYI; |
503 | goto bunins_failed; |
504 | } |
505 | |
506 | snprintf(val, size, "<%s>" , *name); |
507 | bi = bat_iterator(b); |
508 | BATloop(b, p, q) { |
509 | const char *t = (const char *) BUNtvar(bi, p); |
510 | |
511 | if (strNil(t)) { |
512 | bunfastappVAR(bn, buf); |
513 | } else { |
514 | if (strlen(t) > size - 2 * len - 6) { |
515 | char *tmp; |
516 | size += strlen(t); |
517 | tmp = (char *) GDKrealloc(val, size + strlen(t)); |
518 | if (tmp == NULL) { |
519 | err = SQLSTATE(HY001) MAL_MALLOC_FAIL; |
520 | goto bunins_failed; |
521 | } |
522 | val = tmp; |
523 | } |
524 | snprintf(val + len + 2, size - len, "%s</%s>" , t, *name); |
525 | bunfastappVAR(bn, val); |
526 | } |
527 | } |
528 | GDKfree(val); |
529 | GDKfree(buf); |
530 | finalizeResult(ret, bn, b); |
531 | return MAL_SUCCEED; |
532 | bunins_failed: |
533 | BBPunfix(b->batCacheid); |
534 | BBPunfix(bn->batCacheid); |
535 | if (buf != NULL) |
536 | GDKfree(buf); |
537 | if (val != NULL) |
538 | GDKfree(val); |
539 | throw(MAL, "batxml.options" , "%s" , err); |
540 | } |
541 | |
542 | str |
543 | (bat *ret, const bat *bid) |
544 | { |
545 | BAT *b, *bn; |
546 | BUN p, q; |
547 | size_t size = BUFSIZ; |
548 | str buf = GDKmalloc(size); |
549 | BATiter bi; |
550 | const char *err= OPERATION_FAILED; |
551 | |
552 | if (buf == NULL) |
553 | throw(MAL, "xml.comment" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
554 | if ((b = BATdescriptor(*bid)) == NULL) { |
555 | GDKfree(buf); |
556 | throw(MAL, "xml.comment" , INTERNAL_BAT_ACCESS); |
557 | } |
558 | prepareResult(bn, b, TYPE_xml, "comment" , GDKfree(buf)); |
559 | bi = bat_iterator(b); |
560 | BATloop(b, p, q) { |
561 | const char *t = (const char *) BUNtvar(bi, p); |
562 | size_t len; |
563 | |
564 | if (strNil(t)) { |
565 | bunfastappVAR(bn, str_nil); |
566 | bn->tnonil = false; |
567 | continue; |
568 | } |
569 | if (strstr(t, "--" ) != NULL) { |
570 | err = XML_COMMENT_ERROR; |
571 | goto bunins_failed; |
572 | } |
573 | len = strlen(t); |
574 | if (len + 9 >= size) { |
575 | /* make sure there is enough space */ |
576 | size = len + 128; |
577 | /* free/malloc so we don't copy */ |
578 | GDKfree(buf); |
579 | buf = GDKmalloc(size); |
580 | if (buf == NULL) { |
581 | err = SQLSTATE(HY001) MAL_MALLOC_FAIL; |
582 | goto bunins_failed; |
583 | } |
584 | } |
585 | snprintf(buf, size, "C<!--%s-->" , t); |
586 | bunfastappVAR(bn, buf); |
587 | } |
588 | GDKfree(buf); |
589 | finalizeResult(ret, bn, b); |
590 | return MAL_SUCCEED; |
591 | bunins_failed: |
592 | BBPunfix(b->batCacheid); |
593 | BBPunfix(bn->batCacheid); |
594 | if (buf != NULL) |
595 | GDKfree(buf); |
596 | throw(MAL, "xml.comment" , "%s" , err); |
597 | } |
598 | |
599 | str |
600 | BATXMLparse(bat *ret, const char * const *doccont, const bat *bid, const char * const *option) |
601 | { |
602 | (void) option; |
603 | if (strcmp(*doccont, "content" ) == 0) |
604 | return BATXMLcontent(ret, bid); |
605 | if (strcmp(*doccont, "document" ) == 0) |
606 | return BATXMLdocument(ret, bid); |
607 | throw(MAL, "xml.parse" , ILLEGAL_ARGUMENT " <document> or <content> expected" ); |
608 | } |
609 | |
610 | str |
611 | BATXMLpi(bat *ret, const char * const *target, const bat *bid) |
612 | { |
613 | BAT *b, *bn; |
614 | BUN p, q; |
615 | size_t size = BUFSIZ; |
616 | str buf; |
617 | BATiter bi; |
618 | size_t tgtlen; |
619 | const char *err = OPERATION_FAILED; |
620 | |
621 | if (strNil(*target)) |
622 | throw(MAL, "xml.pi" , XML_PI_ERROR); |
623 | buf = GDKmalloc(size); |
624 | if (buf == NULL) |
625 | throw(MAL, "xml.pi" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
626 | |
627 | tgtlen = strlen(*target) + 6; |
628 | if ((b = BATdescriptor(*bid)) == NULL) { |
629 | GDKfree(buf); |
630 | throw(MAL, "xml.pi" , INTERNAL_BAT_ACCESS); |
631 | } |
632 | prepareResult(bn, b, TYPE_xml, "pi" , GDKfree(buf)); |
633 | bi = bat_iterator(b); |
634 | BATloop(b, p, q) { |
635 | const char *t = (const char *) BUNtvar(bi, p); |
636 | size_t len; |
637 | |
638 | len = tgtlen; |
639 | if (!strNil(t)) |
640 | len += strlen(t) * 6 + 1; |
641 | if (len >= size) { |
642 | /* make sure there is enough space */ |
643 | size = len + 128; |
644 | /* free/malloc so we don't copy */ |
645 | GDKfree(buf); |
646 | buf = GDKmalloc(size); |
647 | if (buf == NULL) { |
648 | err = SQLSTATE(HY001) MAL_MALLOC_FAIL; |
649 | goto bunins_failed; |
650 | } |
651 | } |
652 | if (strNil(t)) |
653 | snprintf(buf, size, "C<?%s?>" , *target); |
654 | else { |
655 | int n = snprintf(buf, size, "C<?%s " , *target); |
656 | size_t m = XMLquotestring(t, buf + n, size - n); |
657 | strcpy(buf + n + m, "?>" ); |
658 | } |
659 | bunfastappVAR(bn, buf); |
660 | } |
661 | GDKfree(buf); |
662 | finalizeResult(ret, bn, b); |
663 | return MAL_SUCCEED; |
664 | bunins_failed: |
665 | BBPunfix(b->batCacheid); |
666 | BBPunfix(bn->batCacheid); |
667 | if (buf != NULL) |
668 | GDKfree(buf); |
669 | throw(MAL, "xml.pi" , "%s" , err); |
670 | } |
671 | |
672 | str |
673 | BATXMLroot(bat *ret, const bat *bid, const char * const *version, const char * const *standalone) |
674 | { |
675 | BAT *b, *bn; |
676 | BUN p, q; |
677 | size_t size = BUFSIZ; |
678 | str buf; |
679 | BATiter bi; |
680 | size_t hdrlen; |
681 | const char *err = OPERATION_FAILED; |
682 | |
683 | hdrlen = 8; |
684 | if (!strNil(*version) && **version) { |
685 | if (strcmp(*version, "1.0" ) != 0 && strcmp(*version, "1.1" ) != 0) |
686 | throw(MAL, "xml.root" , XML_VERSION_ERROR); |
687 | hdrlen += 11 + strlen(*version); /* strlen(" version=\"\"") */ |
688 | } |
689 | if (!strNil(*standalone) && **standalone) { |
690 | if (strcmp(*standalone, "yes" ) != 0 && strcmp(*standalone, "no" ) != 0) |
691 | throw(MAL, "xml.root" , XML_STANDALONE_ERROR "illegal XML standalone value" ); |
692 | hdrlen += 14 + strlen(*standalone); /* strlen(" standalone=\"\"") */ |
693 | } |
694 | buf = GDKmalloc(size); |
695 | if (buf == NULL) |
696 | throw(MAL, "xml.root" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
697 | if ((b = BATdescriptor(*bid)) == NULL) { |
698 | GDKfree(buf); |
699 | throw(MAL, "xml.pi" , INTERNAL_BAT_ACCESS); |
700 | } |
701 | prepareResult(bn, b, TYPE_xml, "pi" , GDKfree(buf)); |
702 | bi = bat_iterator(b); |
703 | BATloop(b, p, q) { |
704 | const char *t = (const char *) BUNtvar(bi, p); |
705 | size_t len, i; |
706 | bit isdoc; |
707 | |
708 | len = hdrlen; |
709 | if (!strNil(t)) |
710 | len += strlen(t); |
711 | if (len >= size) { |
712 | /* make sure there is enough space */ |
713 | size = len + 128; |
714 | /* free/malloc so we don't copy */ |
715 | GDKfree(buf); |
716 | buf = GDKmalloc(size); |
717 | if (buf == NULL) { |
718 | err = SQLSTATE(HY001) MAL_MALLOC_FAIL; |
719 | goto bunins_failed; |
720 | } |
721 | } |
722 | if (strNil(t)) { |
723 | strcpy(buf, str_nil); |
724 | bn->tnonil = false; |
725 | } else { |
726 | strcpy(buf, "D<?xml" ); |
727 | i = strlen(buf); |
728 | if (!strNil(*version) && **version) |
729 | i += snprintf(buf + i, len - i, " version=\"%s\"" , *version); |
730 | if (!strNil(*standalone) && **standalone) |
731 | i += snprintf(buf + i, len - i, " standalone=\"%s\"" , *standalone); |
732 | snprintf(buf + i, len - i, "?>%s" , t + 1); |
733 | buf++; |
734 | XMLisdocument(&isdoc, &buf); /* check well-formedness */ |
735 | buf--; |
736 | if (!isdoc) { |
737 | err = XML_NOT_WELL_FORMED; |
738 | goto bunins_failed; |
739 | } |
740 | } |
741 | bunfastappVAR(bn, buf); |
742 | } |
743 | GDKfree(buf); |
744 | finalizeResult(ret, bn, b); |
745 | return MAL_SUCCEED; |
746 | bunins_failed: |
747 | BBPunfix(b->batCacheid); |
748 | BBPunfix(bn->batCacheid); |
749 | if (buf != NULL) |
750 | GDKfree(buf); |
751 | throw(MAL, "xml.root" , "%s" , err); |
752 | } |
753 | |
754 | str |
755 | BATXMLattribute(bat *ret, const char * const *name, const bat *bid) |
756 | { |
757 | BAT *b, *bn; |
758 | BUN p, q; |
759 | size_t size = BUFSIZ; |
760 | str buf; |
761 | BATiter bi; |
762 | size_t attrlen; |
763 | const char *err = OPERATION_FAILED; |
764 | |
765 | if (strNil(*name)) |
766 | throw(MAL, "xml.attribute" , XML_ATTRIBUTE_ERROR); |
767 | if (xmlValidateName((xmlChar *) *name, 0) != 0) |
768 | throw(MAL, "xml.attribute" , XML_ATTRIBUTE_INVALID); |
769 | attrlen = strlen(*name) + 5; |
770 | buf = GDKmalloc(size); |
771 | if (buf == NULL) |
772 | throw(MAL, "xml.attribute" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
773 | if ((b = BATdescriptor(*bid)) == NULL) { |
774 | GDKfree(buf); |
775 | throw(MAL, "xml.attribute" , INTERNAL_BAT_ACCESS); |
776 | } |
777 | prepareResult(bn, b, TYPE_xml, "attribute" , GDKfree(buf)); |
778 | bi = bat_iterator(b); |
779 | BATloop(b, p, q) { |
780 | const char *t = (const char *) BUNtvar(bi, p); |
781 | size_t len; |
782 | |
783 | len = attrlen; |
784 | if (!strNil(t)) |
785 | len += strlen(t) * 6 + 1; |
786 | if (len >= size) { |
787 | /* make sure there is enough space */ |
788 | size = len + 128; |
789 | /* free/malloc so we don't copy */ |
790 | GDKfree(buf); |
791 | buf = GDKmalloc(size); |
792 | if (buf == NULL) { |
793 | err = SQLSTATE(HY001) MAL_MALLOC_FAIL; |
794 | goto bunins_failed; |
795 | } |
796 | } |
797 | if (strNil(t)) { |
798 | strcpy(buf, str_nil); |
799 | bn->tnonil = false; |
800 | } else { |
801 | int n = snprintf(buf, size, "A%s = \"" , *name); |
802 | size_t m = XMLquotestring(t, buf + n, size - n); |
803 | strcpy(buf + n + m, "\"" ); |
804 | } |
805 | bunfastappVAR(bn, buf); |
806 | } |
807 | GDKfree(buf); |
808 | finalizeResult(ret, bn, b); |
809 | return MAL_SUCCEED; |
810 | bunins_failed: |
811 | BBPunfix(b->batCacheid); |
812 | BBPunfix(bn->batCacheid); |
813 | if (buf != NULL) |
814 | GDKfree(buf); |
815 | throw(MAL, "xml.attribute" , "%s" , err); |
816 | } |
817 | |
818 | str |
819 | BATXMLelement(bat *ret, const char * const *name, xml *nspace, xml *attr, const bat *bid) |
820 | { |
821 | BAT *b, *bn; |
822 | BUN p, q; |
823 | size_t size = BUFSIZ; |
824 | str buf; |
825 | BATiter bi; |
826 | size_t elemlen, namelen; |
827 | const char *err = OPERATION_FAILED; |
828 | |
829 | if (strNil(*name)) |
830 | throw(MAL, "xml.element" , XML_NO_ELEMENT); |
831 | if (xmlValidateName((xmlChar *) *name, 0) != 0) |
832 | throw(MAL, "xml.element" , XML_ATTRIBUTE_INVALID); |
833 | if (nspace && !strNil(*nspace) && **nspace) |
834 | throw(MAL, "xml.element" , XML_NO_NAMESPACE); |
835 | namelen = strlen(*name); |
836 | elemlen = namelen + 5; |
837 | if (nspace && !strNil(*nspace)) { |
838 | if (**nspace != 'A') |
839 | throw(MAL, "xml.element" , XML_ILLEGAL_NAMESPACE); |
840 | elemlen += strlen(*nspace); /* " " + nspace (nspace contains initial 'A' which is replaced by space) */ |
841 | } |
842 | if (attr && !strNil(*attr)) { |
843 | if (**attr != 'A') |
844 | throw(MAL, "xml.element" , XML_ILLEGAL_ATTRIBUTE); |
845 | elemlen += strlen(*attr); /* " " + attr (attr contains initial 'A' which is replaced by space) */ |
846 | } |
847 | buf = GDKmalloc(size); |
848 | if (buf == NULL) |
849 | throw(MAL, "xml.attribute" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
850 | if ((b = BATdescriptor(*bid)) == NULL) { |
851 | GDKfree(buf); |
852 | throw(MAL, "xml.element" , INTERNAL_BAT_ACCESS); |
853 | } |
854 | prepareResult(bn, b, TYPE_xml, "element" , GDKfree(buf)); |
855 | bi = bat_iterator(b); |
856 | BATloop(b, p, q) { |
857 | const char *t = (const char *) BUNtvar(bi, p); |
858 | size_t len; |
859 | |
860 | len = elemlen; |
861 | if (!strNil(t)) { |
862 | if (*t != 'C') { |
863 | err = XML_ILLEGAL_CONTENT; |
864 | goto bunins_failed; |
865 | } |
866 | len += strlen(t + 1) + namelen + 2; /* extra "<", ">", and name ("/" already counted) */ |
867 | } |
868 | if (len >= size) { |
869 | /* make sure there is enough space */ |
870 | size = len + 128; |
871 | /* free/malloc so we don't copy */ |
872 | GDKfree(buf); |
873 | buf = GDKmalloc(size); |
874 | if (buf == NULL) { |
875 | err = SQLSTATE(HY001) MAL_MALLOC_FAIL; |
876 | goto bunins_failed; |
877 | } |
878 | } |
879 | if (strNil(t) && (!attr || strNil(*attr))) { |
880 | strcpy(buf, str_nil); |
881 | bn->tnonil = false; |
882 | } else { |
883 | int i = snprintf(buf, size, "C<%s" , *name); |
884 | if (nspace && !strNil(*nspace)) |
885 | i += snprintf(buf + i, size - i, " %s" , *nspace + 1); |
886 | if (attr && !strNil(*attr)) |
887 | i += snprintf(buf + i, size - i, " %s" , *attr + 1); |
888 | if (!strNil(t)) |
889 | i += snprintf(buf + i, size - i, ">%s</%s>" , t + 1, *name); |
890 | else |
891 | i += snprintf(buf + i, size - i, "/>" ); |
892 | } |
893 | bunfastappVAR(bn, buf); |
894 | } |
895 | GDKfree(buf); |
896 | finalizeResult(ret, bn, b); |
897 | return MAL_SUCCEED; |
898 | bunins_failed: |
899 | BBPunfix(b->batCacheid); |
900 | BBPunfix(bn->batCacheid); |
901 | if (buf != NULL) |
902 | GDKfree(buf); |
903 | throw(MAL, "xml.element" , "%s" , err); |
904 | } |
905 | |
906 | str |
907 | BATXMLelementSmall(bat *ret, const char * const *name, const bat *bid) |
908 | { |
909 | return BATXMLelement(ret, name, NULL, NULL, bid); |
910 | } |
911 | |
912 | str |
913 | BATXMLforest(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) |
914 | { |
915 | bat *ret = getArgReference_bat(stk, pci, 0); |
916 | BAT *bn; |
917 | BATiter *bi; |
918 | BUN *p, *q; |
919 | str buf; |
920 | int i; |
921 | size_t offset, len, size = BUFSIZ; |
922 | const char *err = OPERATION_FAILED; |
923 | |
924 | (void) mb; |
925 | (void) cntxt; |
926 | buf = GDKmalloc(size); |
927 | bi = GDKmalloc(sizeof(BATiter) * pci->argc); |
928 | p = GDKmalloc(sizeof(BUN) * pci->argc); |
929 | q = GDKmalloc(sizeof(BUN) * pci->argc); |
930 | if (buf == NULL || bi == NULL || p == NULL || q == NULL) { |
931 | if (buf) |
932 | GDKfree(buf); |
933 | if (bi) |
934 | GDKfree(bi); |
935 | if (p) |
936 | GDKfree(p); |
937 | if (q) |
938 | GDKfree(q); |
939 | throw(MAL, "xml.forest" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
940 | } |
941 | |
942 | /* collect the admin for the xml elements */ |
943 | for (i = pci->retc; i < pci->argc; i++) { |
944 | if ((bi[i].b = BATdescriptor(*getArgReference_bat(stk, pci, i))) == NULL) |
945 | break; |
946 | p[i] = 0; |
947 | q[i] = BUNlast(bi[i].b); |
948 | } |
949 | /* check for errors */ |
950 | if (i != pci->argc) { |
951 | for (i--; i >= pci->retc; i--) |
952 | if (bi[i].b) |
953 | BBPunfix(bi[i].b->batCacheid); |
954 | GDKfree(bi); |
955 | GDKfree(p); |
956 | GDKfree(q); |
957 | GDKfree(buf); |
958 | throw(MAL, "xml.forest" , INTERNAL_BAT_ACCESS); |
959 | } |
960 | |
961 | prepareResult(bn, bi[pci->retc].b, TYPE_xml, "forest" , |
962 | for (i = pci->retc; i < pci->argc; i++) BBPunfix(bi[i].b->batCacheid); |
963 | GDKfree(bi); GDKfree(p); GDKfree(q); GDKfree(buf)); |
964 | |
965 | while (p[pci->retc] < q[pci->retc]) { |
966 | const char *t; |
967 | |
968 | /* fetch the elements */ |
969 | offset = 0; |
970 | strcpy(buf, str_nil); |
971 | for (i = pci->retc; i < pci->argc; i++) { |
972 | int n; |
973 | |
974 | t = (const char *) BUNtvar(bi[i], p[i]); |
975 | if (strNil(t)) |
976 | continue; |
977 | |
978 | if ((len = strlen(t)) >= size - offset) { |
979 | char *tmp; |
980 | size += len + 128; |
981 | tmp = GDKrealloc(buf, size); |
982 | if (tmp == NULL) { |
983 | err = SQLSTATE(HY001) MAL_MALLOC_FAIL; |
984 | goto bunins_failed; |
985 | } |
986 | buf = tmp; |
987 | } |
988 | if (offset == 0) |
989 | n = snprintf(buf, size, "%s" , t); |
990 | else if (buf[0] != *t) { |
991 | err = "incompatible values in forest" ; |
992 | goto bunins_failed; |
993 | } else if (buf[0] == 'A') |
994 | n = snprintf(buf + offset, size - offset, " %s" , t + 1); |
995 | else if (buf[0] == 'C') |
996 | n = snprintf(buf + offset, size - offset, "%s" , t + 1); |
997 | else { |
998 | err = "can only combine attributes and element content" ; |
999 | goto bunins_failed; |
1000 | } |
1001 | offset += n; |
1002 | } |
1003 | bunfastappVAR(bn, buf); |
1004 | if (offset == 0) |
1005 | bn->tnonil = false; |
1006 | |
1007 | for (i = pci->retc; i < pci->argc; i++) |
1008 | if (bi[i].b) |
1009 | p[i]++; |
1010 | } |
1011 | GDKfree(buf); |
1012 | finalizeResult(ret, bn, bi[pci->retc].b); |
1013 | GDKfree(bi); |
1014 | GDKfree(p); |
1015 | GDKfree(q); |
1016 | return MAL_SUCCEED; |
1017 | bunins_failed: |
1018 | for (i = pci->retc; i < pci->argc; i++) |
1019 | if (bi[i].b) |
1020 | BBPunfix(bi[i].b->batCacheid); |
1021 | BBPunfix(bn->batCacheid); |
1022 | if (buf != NULL) |
1023 | GDKfree(buf); |
1024 | GDKfree(bi); |
1025 | GDKfree(p); |
1026 | GDKfree(q); |
1027 | throw(MAL, "xml.forest" , "%s" , err); |
1028 | } |
1029 | |
1030 | str |
1031 | BATXMLconcat(bat *ret, const bat *bid, const bat *rid) |
1032 | { |
1033 | BAT *b, *r = 0, *bn; |
1034 | BUN p, q, rp = 0; |
1035 | size_t len, size = BUFSIZ; |
1036 | str buf = GDKmalloc(size); |
1037 | BATiter bi, ri; |
1038 | const char *err = OPERATION_FAILED; |
1039 | |
1040 | if (buf == NULL) |
1041 | throw(MAL, "xml.concat" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
1042 | b = BATdescriptor(*bid); |
1043 | r = BATdescriptor(*rid); |
1044 | if (b == NULL || r == NULL) { |
1045 | GDKfree(buf); |
1046 | if (b) |
1047 | BBPunfix(b->batCacheid); |
1048 | if (r) |
1049 | BBPunfix(r->batCacheid); |
1050 | throw(MAL, "xml.concat" , INTERNAL_BAT_ACCESS); |
1051 | } |
1052 | p = 0; |
1053 | q = BUNlast(b); |
1054 | rp = 0; |
1055 | |
1056 | prepareResult(bn, b, TYPE_xml, "concat" , |
1057 | GDKfree(buf); BBPunfix(r->batCacheid)); |
1058 | |
1059 | bi = bat_iterator(b); |
1060 | ri = bat_iterator(r); |
1061 | while (p < q) { |
1062 | const char *t = (const char *) BUNtvar(bi, p); |
1063 | const char *v = (const char *) BUNtvar(ri, rp); |
1064 | |
1065 | len = strlen(t) + strlen(v) + 1; |
1066 | |
1067 | if (len >= size) { |
1068 | GDKfree(buf); |
1069 | size = len + 128; |
1070 | buf = GDKmalloc(size); |
1071 | if (buf == NULL) { |
1072 | err= MAL_MALLOC_FAIL; |
1073 | goto bunins_failed; |
1074 | } |
1075 | } |
1076 | if (strNil(t)) { |
1077 | if (strNil(v)) { |
1078 | strcpy(buf, str_nil); |
1079 | bn->tnonil = false; |
1080 | } else |
1081 | strcpy(buf, v); |
1082 | } else { |
1083 | if (strNil(v)) |
1084 | strcpy(buf, t); |
1085 | else if (*t != *v) { |
1086 | err = "arguments not compatible" ; |
1087 | goto bunins_failed; |
1088 | } else if (*t == 'A') |
1089 | snprintf(buf, size, "A%s %s" , t + 1, v + 1); |
1090 | else if (*t == 'C') |
1091 | snprintf(buf, size, "C%s%s" , t + 1, v + 1); |
1092 | else { |
1093 | err = "can only concatenate attributes and element content" ; |
1094 | goto bunins_failed; |
1095 | } |
1096 | } |
1097 | bunfastappVAR(bn, buf); |
1098 | rp++; |
1099 | p++; |
1100 | } |
1101 | GDKfree(buf); |
1102 | finalizeResult(ret, bn, b); |
1103 | return MAL_SUCCEED; |
1104 | bunins_failed: |
1105 | BBPunfix(r->batCacheid); |
1106 | BBPunfix(b->batCacheid); |
1107 | BBPunfix(bn->batCacheid); |
1108 | if (buf != NULL) |
1109 | GDKfree(buf); |
1110 | throw(MAL, "xml.concat" , "%s" , err); |
1111 | } |
1112 | |
1113 | str |
1114 | BATXMLgroup(xml *ret, const bat *bid) |
1115 | { |
1116 | BAT *b; |
1117 | BUN p, q; |
1118 | const char *t; |
1119 | size_t len, size = BUFSIZ, offset; |
1120 | str buf = GDKmalloc(size); |
1121 | BATiter bi; |
1122 | const char *err = NULL; |
1123 | |
1124 | if (buf == NULL) |
1125 | throw(MAL, "xml.aggr" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
1126 | if ((b = BATdescriptor(*bid)) == NULL) { |
1127 | GDKfree(buf); |
1128 | throw(MAL, "xml.aggr" , SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); |
1129 | } |
1130 | |
1131 | strcpy(buf, str_nil); |
1132 | offset = 0; |
1133 | bi = bat_iterator(b); |
1134 | BATloop(b, p, q) { |
1135 | int n; |
1136 | |
1137 | t = (const char *) BUNtvar(bi, p); |
1138 | |
1139 | if (strNil(t)) |
1140 | continue; |
1141 | len = strlen(t) + 1; |
1142 | if (len >= size - offset) { |
1143 | char *tmp; |
1144 | size += len + 128; |
1145 | tmp = GDKrealloc(buf, size); |
1146 | if (tmp == NULL) { |
1147 | err= MAL_MALLOC_FAIL; |
1148 | goto failed; |
1149 | } |
1150 | buf = tmp; |
1151 | } |
1152 | if (offset == 0) |
1153 | n = snprintf(buf, size, "%s" , t); |
1154 | else if (buf[0] != *t) { |
1155 | err = "incompatible values in group" ; |
1156 | goto failed; |
1157 | } else if (buf[0] == 'A') |
1158 | n = snprintf(buf + offset, size - offset, " %s" , t + 1); |
1159 | else if (buf[0] == 'C') |
1160 | n = snprintf(buf + offset, size - offset, "%s" , t + 1); |
1161 | else { |
1162 | err = "can only group attributes and element content" ; |
1163 | goto failed; |
1164 | } |
1165 | offset += n; |
1166 | } |
1167 | BBPunfix(b->batCacheid); |
1168 | *ret = buf; |
1169 | return MAL_SUCCEED; |
1170 | failed: |
1171 | BBPunfix(b->batCacheid); |
1172 | if (buf != NULL) |
1173 | GDKfree(buf); |
1174 | throw(MAL, "xml.aggr" , "%s" , err); |
1175 | } |
1176 | |
1177 | static const char * |
1178 | BATxmlaggr(BAT **bnp, BAT *b, BAT *g, BAT *e, BAT *s, int skip_nils) |
1179 | { |
1180 | BAT *bn = NULL, *t1, *t2 = NULL; |
1181 | BATiter bi; |
1182 | oid min, max; |
1183 | BUN ngrp; |
1184 | BUN nils = 0; |
1185 | BUN ncand; |
1186 | struct canditer ci; |
1187 | int isnil; |
1188 | const char *v; |
1189 | const oid *grps, *map; |
1190 | oid mapoff = 0; |
1191 | oid prev; |
1192 | BUN p, q; |
1193 | int freeb = 0, freeg = 0; |
1194 | char *buf = NULL; |
1195 | size_t buflen, maxlen, len; |
1196 | const char *err; |
1197 | char *tmp; |
1198 | |
1199 | if ((err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &ci, &ncand)) != NULL) { |
1200 | return err; |
1201 | } |
1202 | assert(b->ttype == TYPE_xml); |
1203 | if (BATcount(b) == 0 || ngrp == 0) { |
1204 | bn = BATconstant(ngrp == 0 ? 0 : min, TYPE_xml, ATOMnilptr(TYPE_xml), ngrp, TRANSIENT); |
1205 | if (bn == NULL) |
1206 | return MAL_MALLOC_FAIL; |
1207 | *bnp = bn; |
1208 | return NULL; |
1209 | } |
1210 | if (s) { |
1211 | b = BATproject(s, b); |
1212 | if (b == NULL) { |
1213 | err = "internal project failed" ; |
1214 | goto out; |
1215 | } |
1216 | freeb = 1; |
1217 | if (g) { |
1218 | g = BATproject(s, g); |
1219 | if (g == NULL) { |
1220 | err = "internal project failed" ; |
1221 | goto out; |
1222 | } |
1223 | freeg = 1; |
1224 | } |
1225 | } |
1226 | if (g && BATtdense(g)) { |
1227 | /* singleton groups: return group ID's (g's tail) and original |
1228 | * values from b */ |
1229 | bn = VIEWcreate(g->tseqbase, b); |
1230 | goto out; |
1231 | } |
1232 | |
1233 | maxlen = BUFSIZ; |
1234 | if ((buf = GDKmalloc(maxlen)) == NULL) { |
1235 | err = SQLSTATE(HY001) MAL_MALLOC_FAIL; |
1236 | goto out; |
1237 | } |
1238 | buflen = 0; |
1239 | bn = COLnew(min, TYPE_xml, ngrp, TRANSIENT); |
1240 | if (bn == NULL) { |
1241 | err = SQLSTATE(HY001) MAL_MALLOC_FAIL; |
1242 | goto out; |
1243 | } |
1244 | bi = bat_iterator(b); |
1245 | if (g) { |
1246 | /* stable sort g */ |
1247 | if (BATsort(&t1, &t2, NULL, g, NULL, NULL, false, false, true) != GDK_SUCCEED) { |
1248 | BBPreclaim(bn); |
1249 | bn = NULL; |
1250 | err = "internal sort failed" ; |
1251 | goto out; |
1252 | } |
1253 | if (freeg) |
1254 | BBPunfix(g->batCacheid); |
1255 | g = t1; |
1256 | freeg = 1; |
1257 | if (t2->ttype == TYPE_void) { |
1258 | map = NULL; |
1259 | mapoff = b->tseqbase; |
1260 | } else { |
1261 | map = (const oid *) Tloc(t2, 0); |
1262 | } |
1263 | grps = (const oid *) Tloc(g, 0); |
1264 | prev = grps[0]; |
1265 | isnil = 0; |
1266 | for (p = 0, q = BATcount(g); p <= q; p++) { |
1267 | if (p == q || grps[p] != prev) { |
1268 | while (BATcount(bn) < prev - min) { |
1269 | bunfastapp_nocheckVAR(bn, BUNlast(bn), str_nil, Tsize(bn)); |
1270 | nils++; |
1271 | } |
1272 | bunfastapp_nocheckVAR(bn, BUNlast(bn), buf, Tsize(bn)); |
1273 | nils += strNil(buf); |
1274 | strncpy(buf, str_nil, maxlen); |
1275 | buflen = 0; |
1276 | if (p == q) |
1277 | break; |
1278 | prev = grps[p]; |
1279 | isnil = 0; |
1280 | } |
1281 | if (isnil) |
1282 | continue; |
1283 | v = (const char *) BUNtvar(bi, (map ? (BUN) map[p] : p + mapoff)); |
1284 | if (strNil(v)) { |
1285 | if (skip_nils) |
1286 | continue; |
1287 | strncpy(buf, str_nil, buflen); |
1288 | isnil = 1; |
1289 | } else { |
1290 | len = strlen(v); |
1291 | if (len >= maxlen - buflen) { |
1292 | maxlen += len + BUFSIZ; |
1293 | tmp = GDKrealloc(buf, maxlen); |
1294 | if (tmp == NULL) { |
1295 | err = SQLSTATE(HY001) MAL_MALLOC_FAIL; |
1296 | goto bunins_failed; |
1297 | } |
1298 | buf = tmp; |
1299 | } |
1300 | if (buflen == 0) { |
1301 | strncpy(buf, v, maxlen); |
1302 | buflen += len; |
1303 | } else if (buf[0] != v[0]) { |
1304 | err = "incompatible values in group" ; |
1305 | goto bunins_failed; |
1306 | } else if (buf[0] == 'A') { |
1307 | snprintf(buf + buflen, maxlen - buflen, " %s" , v + 1); |
1308 | buflen += len; |
1309 | } else if (buf[0] == 'C') { |
1310 | snprintf(buf + buflen, maxlen - buflen, "%s" , v + 1); |
1311 | buflen += len - 1; |
1312 | } else { |
1313 | err = "can only group attributes and element content" ; |
1314 | goto bunins_failed; |
1315 | } |
1316 | } |
1317 | } |
1318 | BBPunfix(t2->batCacheid); |
1319 | t2 = NULL; |
1320 | } else { |
1321 | for (p = 0, q = p + BATcount(b); p < q; p++) { |
1322 | v = (const char *) BUNtvar(bi, p); |
1323 | if (strNil(v)) { |
1324 | if (skip_nils) |
1325 | continue; |
1326 | strncpy(buf, str_nil, buflen); |
1327 | nils++; |
1328 | break; |
1329 | } |
1330 | len = strlen(v); |
1331 | if (len >= maxlen - buflen) { |
1332 | maxlen += len + BUFSIZ; |
1333 | tmp = GDKrealloc(buf, maxlen); |
1334 | if (tmp == NULL) { |
1335 | err = SQLSTATE(HY001) MAL_MALLOC_FAIL; |
1336 | goto bunins_failed; |
1337 | } |
1338 | buf = tmp; |
1339 | } |
1340 | if (buflen == 0) { |
1341 | strncpy(buf, v, maxlen); |
1342 | buflen += len; |
1343 | } else if (buf[0] != v[0]) { |
1344 | err = "incompatible values in group" ; |
1345 | goto bunins_failed; |
1346 | } else if (buf[0] == 'A') { |
1347 | snprintf(buf + buflen, maxlen - buflen, " %s" , v + 1); |
1348 | buflen += len; |
1349 | } else if (buf[0] == 'C') { |
1350 | snprintf(buf + buflen, maxlen - buflen, "%s" , v + 1); |
1351 | buflen += len - 1; |
1352 | } else { |
1353 | err = "can only group attributes and element content" ; |
1354 | goto bunins_failed; |
1355 | } |
1356 | } |
1357 | bunfastapp_nocheckVAR(bn, BUNlast(bn), buf, Tsize(bn)); |
1358 | } |
1359 | bn->theap.dirty = true; |
1360 | bn->tnil = nils != 0; |
1361 | bn->tnonil = nils == 0; |
1362 | bn->tsorted = BATcount(bn) <= 1; |
1363 | bn->trevsorted = BATcount(bn) <= 1; |
1364 | bn->tkey = BATcount(bn) <= 1; |
1365 | |
1366 | out: |
1367 | if (t2) |
1368 | BBPunfix(t2->batCacheid); |
1369 | if (freeb && b) |
1370 | BBPunfix(b->batCacheid); |
1371 | if (freeg && g) |
1372 | BBPunfix(g->batCacheid); |
1373 | if (buf) |
1374 | GDKfree(buf); |
1375 | *bnp = bn; |
1376 | return err; |
1377 | |
1378 | bunins_failed: |
1379 | BBPreclaim(bn); |
1380 | bn = NULL; |
1381 | if (err == NULL) |
1382 | err = SQLSTATE(HY001) MAL_MALLOC_FAIL; /* insertion into result BAT failed */ |
1383 | goto out; |
1384 | } |
1385 | |
1386 | str |
1387 | AGGRsubxmlcand(bat *retval, const bat *bid, const bat *gid, const bat *eid, const bat *sid, const bit *skip_nils) |
1388 | { |
1389 | BAT *b, *g, *e, *s, *bn = NULL; |
1390 | const char *err; |
1391 | |
1392 | b = BATdescriptor(*bid); |
1393 | g = gid ? BATdescriptor(*gid) : NULL; |
1394 | e = eid ? BATdescriptor(*eid) : NULL; |
1395 | if (b == NULL || (gid != NULL && g == NULL) || (eid != NULL && e == NULL)) { |
1396 | if (b) |
1397 | BBPunfix(b->batCacheid); |
1398 | if (g) |
1399 | BBPunfix(g->batCacheid); |
1400 | if (e) |
1401 | BBPunfix(e->batCacheid); |
1402 | throw(MAL, "aggr.subxml" , SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); |
1403 | } |
1404 | if (sid) { |
1405 | s = BATdescriptor(*sid); |
1406 | if (s == NULL) { |
1407 | BBPunfix(b->batCacheid); |
1408 | if (g) |
1409 | BBPunfix(g->batCacheid); |
1410 | if (e) |
1411 | BBPunfix(e->batCacheid); |
1412 | throw(MAL, "aggr.subxml" , SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); |
1413 | } |
1414 | } else { |
1415 | s = NULL; |
1416 | } |
1417 | err = BATxmlaggr(&bn, b, g, e, s, *skip_nils); |
1418 | BBPunfix(b->batCacheid); |
1419 | if (g) |
1420 | BBPunfix(g->batCacheid); |
1421 | if (e) |
1422 | BBPunfix(e->batCacheid); |
1423 | if (s) |
1424 | BBPunfix(s->batCacheid); |
1425 | if (err != NULL) |
1426 | throw(MAL, "aggr.subxml" , "%s" , err); |
1427 | |
1428 | *retval = bn->batCacheid; |
1429 | BBPkeepref(bn->batCacheid); |
1430 | return MAL_SUCCEED; |
1431 | } |
1432 | |
1433 | str |
1434 | AGGRsubxml(bat *retval, const bat *bid, const bat *gid, const bat *eid, const bit *skip_nils) |
1435 | { |
1436 | return AGGRsubxmlcand(retval, bid, gid, eid, NULL, skip_nils); |
1437 | } |
1438 | |
1439 | str |
1440 | BATXMLxquery(bat *ret, const bat *bid, const char * const *expr) |
1441 | { |
1442 | (void) ret; |
1443 | (void) bid; |
1444 | (void) expr; |
1445 | /* use external library to solve this */ |
1446 | throw(MAL, "xml.xquery" , SQLSTATE(0A000) PROGRAM_NYI); |
1447 | } |
1448 | |
1449 | #else |
1450 | |
1451 | #define NO_LIBXML_FATAL "batxml: MonetDB was built without libxml, but what you are trying to do requires it." |
1452 | |
1453 | str BATXMLxml2str(bat *ret, const bat *bid) { |
1454 | (void) ret; |
1455 | (void) bid; |
1456 | return createException(MAL, "batxml.xml2str" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1457 | } |
1458 | str BATXMLxmltext(bat *ret, const bat *bid) { |
1459 | (void) ret; |
1460 | (void) bid; |
1461 | return createException(MAL, "batxml.xmltext" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1462 | } |
1463 | str BATXMLstr2xml(bat *ret, const bat *bid) { |
1464 | (void) ret; |
1465 | (void) bid; |
1466 | return createException(MAL, "batxml.str2xml" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1467 | } |
1468 | str BATXMLdocument(bat *ret, const bat *bid) { |
1469 | (void) ret; |
1470 | (void) bid; |
1471 | return createException(MAL, "batxml.document" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1472 | } |
1473 | str BATXMLcontent(bat *ret, const bat *bid) { |
1474 | (void) ret; |
1475 | (void) bid; |
1476 | return createException(MAL, "batxml.content" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1477 | } |
1478 | str BATXMLisdocument(bat *ret, const bat *bid) { |
1479 | (void) ret; |
1480 | (void) bid; |
1481 | return createException(MAL, "batxml.isdocument" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1482 | } |
1483 | str BATXMLelementSmall(bat *ret, const char * const *name, const bat *bid) { |
1484 | (void) ret; |
1485 | (void) name; |
1486 | (void) bid; |
1487 | return createException(MAL, "batxml.elementSmall" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1488 | } |
1489 | str BATXMLoptions(bat *ret, const char * const *name, const char * const *options, const bat *bid) { |
1490 | (void) ret; |
1491 | (void) name; |
1492 | (void) options; |
1493 | (void) bid; |
1494 | return createException(MAL, "batxml.options" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1495 | } |
1496 | str BATXMLcomment(bat *ret, const bat *bid) { |
1497 | (void) ret; |
1498 | (void) bid; |
1499 | return createException(MAL, "batxml.comment" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1500 | } |
1501 | str BATXMLparse(bat *ret, const char * const *doccont, const bat *bid, const char * const *option) { |
1502 | (void) ret; |
1503 | (void) doccont; |
1504 | (void) bid; |
1505 | (void) option; |
1506 | return createException(MAL, "batxml.parse" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1507 | } |
1508 | str BATXMLxquery(bat *ret, const bat *bid, const char * const *expr) { |
1509 | (void) ret; |
1510 | (void) bid; |
1511 | (void) expr; |
1512 | return createException(MAL, "batxml.xquery" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1513 | } |
1514 | str BATXMLpi(bat *ret, const char * const *tgt, const bat *bid) { |
1515 | (void) ret; |
1516 | (void) tgt; |
1517 | (void) bid; |
1518 | return createException(MAL, "batxml.pi" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1519 | } |
1520 | str BATXMLroot(bat *ret, const bat *bid, const char * const *version, const char * const *standalone) { |
1521 | (void) ret; |
1522 | (void) bid; |
1523 | (void) version; |
1524 | (void) standalone; |
1525 | return createException(MAL, "batxml.root" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1526 | } |
1527 | str BATXMLattribute(bat *ret, const char * const *name, const bat *bid) { |
1528 | (void) ret; |
1529 | (void) name; |
1530 | (void) bid; |
1531 | return createException(MAL, "batxml.attribute" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1532 | } |
1533 | str BATXMLelement(bat *ret, const char * const *name, xml *ns, xml *attr, const bat *bid) { |
1534 | (void) ret; |
1535 | (void) name; |
1536 | (void) ns; |
1537 | (void) attr; |
1538 | (void) bid; |
1539 | return createException(MAL, "batxml.element" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1540 | } |
1541 | str BATXMLconcat(bat *ret, const bat *bid, const bat *rid) { |
1542 | (void) ret; |
1543 | (void) bid; |
1544 | (void) rid; |
1545 | return createException(MAL, "batxml.concat" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1546 | } |
1547 | str BATXMLforest(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) { |
1548 | (void) cntxt; |
1549 | (void) mb; |
1550 | (void) stk; |
1551 | (void) p; |
1552 | return createException(MAL, "batxml.forest" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1553 | } |
1554 | str BATXMLgroup(xml *ret, const bat *bid) { |
1555 | (void) ret; |
1556 | (void) bid; |
1557 | return createException(MAL, "batxml.group" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1558 | } |
1559 | str AGGRsubxmlcand(bat *retval, const bat *bid, const bat *gid, const bat *eid, const bat *sid, const bit *skip_nils) { |
1560 | (void) retval; |
1561 | (void) bid; |
1562 | (void) gid; |
1563 | (void) eid; |
1564 | (void) sid; |
1565 | (void) skip_nils; |
1566 | return createException(MAL, "batxml.subxmlcand" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1567 | } |
1568 | str AGGRsubxml(bat *retval, const bat *bid, const bat *gid, const bat *eid, const bit *skip_nils) { |
1569 | (void) retval; |
1570 | (void) bid; |
1571 | (void) gid; |
1572 | (void) eid; |
1573 | (void) skip_nils; |
1574 | return createException(MAL, "batxml.subxml" , SQLSTATE(HY005) NO_LIBXML_FATAL); |
1575 | } |
1576 | |
1577 | #endif |
1578 | |