1/*-------------------------------------------------------------------------
2 *
3 * xml.c
4 * XML data type support.
5 *
6 *
7 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * src/backend/utils/adt/xml.c
11 *
12 *-------------------------------------------------------------------------
13 */
14
15/*
16 * Generally, XML type support is only available when libxml use was
17 * configured during the build. But even if that is not done, the
18 * type and all the functions are available, but most of them will
19 * fail. For one thing, this avoids having to manage variant catalog
20 * installations. But it also has nice effects such as that you can
21 * dump a database containing XML type data even if the server is not
22 * linked with libxml. Thus, make sure xml_out() works even if nothing
23 * else does.
24 */
25
26/*
27 * Notes on memory management:
28 *
29 * Sometimes libxml allocates global structures in the hope that it can reuse
30 * them later on. This makes it impractical to change the xmlMemSetup
31 * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32 * allocated with malloc() or vice versa. Since libxml might be used by
33 * loadable modules, eg libperl, our only safe choices are to change the
34 * functions at postmaster/backend launch or not at all. Since we'd rather
35 * not activate libxml in sessions that might never use it, the latter choice
36 * is the preferred one. However, for debugging purposes it can be awfully
37 * handy to constrain libxml's allocations to be done in a specific palloc
38 * context, where they're easy to track. Therefore there is code here that
39 * can be enabled in debug builds to redirect libxml's allocations into a
40 * special context LibxmlContext. It's not recommended to turn this on in
41 * a production build because of the possibility of bad interactions with
42 * external modules.
43 */
44/* #define USE_LIBXMLCONTEXT */
45
46#include "postgres.h"
47
48#ifdef USE_LIBXML
49#include <libxml/chvalid.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/tree.h>
53#include <libxml/uri.h>
54#include <libxml/xmlerror.h>
55#include <libxml/xmlversion.h>
56#include <libxml/xmlwriter.h>
57#include <libxml/xpath.h>
58#include <libxml/xpathInternals.h>
59
60/*
61 * We used to check for xmlStructuredErrorContext via a configure test; but
62 * that doesn't work on Windows, so instead use this grottier method of
63 * testing the library version number.
64 */
65#if LIBXML_VERSION >= 20704
66#define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
67#endif
68#endif /* USE_LIBXML */
69
70#include "access/htup_details.h"
71#include "access/table.h"
72#include "catalog/namespace.h"
73#include "catalog/pg_class.h"
74#include "catalog/pg_type.h"
75#include "commands/dbcommands.h"
76#include "executor/spi.h"
77#include "executor/tablefunc.h"
78#include "fmgr.h"
79#include "lib/stringinfo.h"
80#include "libpq/pqformat.h"
81#include "mb/pg_wchar.h"
82#include "miscadmin.h"
83#include "nodes/execnodes.h"
84#include "nodes/nodeFuncs.h"
85#include "utils/array.h"
86#include "utils/builtins.h"
87#include "utils/date.h"
88#include "utils/datetime.h"
89#include "utils/lsyscache.h"
90#include "utils/memutils.h"
91#include "utils/rel.h"
92#include "utils/syscache.h"
93#include "utils/xml.h"
94
95
96/* GUC variables */
97int xmlbinary;
98int xmloption;
99
100#ifdef USE_LIBXML
101
102/* random number to identify PgXmlErrorContext */
103#define ERRCXT_MAGIC 68275028
104
105struct PgXmlErrorContext
106{
107 int magic;
108 /* strictness argument passed to pg_xml_init */
109 PgXmlStrictness strictness;
110 /* current error status and accumulated message, if any */
111 bool err_occurred;
112 StringInfoData err_buf;
113 /* previous libxml error handling state (saved by pg_xml_init) */
114 xmlStructuredErrorFunc saved_errfunc;
115 void *saved_errcxt;
116 /* previous libxml entity handler (saved by pg_xml_init) */
117 xmlExternalEntityLoader saved_entityfunc;
118};
119
120static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
121 xmlParserCtxtPtr ctxt);
122static void xml_errorHandler(void *data, xmlErrorPtr error);
123static void xml_ereport_by_code(int level, int sqlcode,
124 const char *msg, int errcode);
125static void chopStringInfoNewlines(StringInfo str);
126static void appendStringInfoLineSeparator(StringInfo str);
127
128#ifdef USE_LIBXMLCONTEXT
129
130static MemoryContext LibxmlContext = NULL;
131
132static void xml_memory_init(void);
133static void *xml_palloc(size_t size);
134static void *xml_repalloc(void *ptr, size_t size);
135static void xml_pfree(void *ptr);
136static char *xml_pstrdup(const char *string);
137#endif /* USE_LIBXMLCONTEXT */
138
139static xmlChar *xml_text2xmlChar(text *in);
140static int parse_xml_decl(const xmlChar *str, size_t *lenp,
141 xmlChar **version, xmlChar **encoding, int *standalone);
142static bool print_xml_decl(StringInfo buf, const xmlChar *version,
143 pg_enc encoding, int standalone);
144static bool xml_doctype_in_content(const xmlChar *str);
145static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
146 bool preserve_whitespace, int encoding);
147static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
148static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
149 ArrayBuildState *astate,
150 PgXmlErrorContext *xmlerrcxt);
151static xmlChar *pg_xmlCharStrndup(const char *str, size_t len);
152#endif /* USE_LIBXML */
153
154static void xmldata_root_element_start(StringInfo result, const char *eltname,
155 const char *xmlschema, const char *targetns,
156 bool top_level);
157static void xmldata_root_element_end(StringInfo result, const char *eltname);
158static StringInfo query_to_xml_internal(const char *query, char *tablename,
159 const char *xmlschema, bool nulls, bool tableforest,
160 const char *targetns, bool top_level);
161static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
162 bool nulls, bool tableforest, const char *targetns);
163static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
164 List *relid_list, bool nulls,
165 bool tableforest, const char *targetns);
166static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
167 bool nulls, bool tableforest,
168 const char *targetns);
169static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
170static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
171static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
172static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
173 char *tablename, bool nulls, bool tableforest,
174 const char *targetns, bool top_level);
175
176/* XMLTABLE support */
177#ifdef USE_LIBXML
178/* random number to identify XmlTableContext */
179#define XMLTABLE_CONTEXT_MAGIC 46922182
180typedef struct XmlTableBuilderData
181{
182 int magic;
183 int natts;
184 long int row_count;
185 PgXmlErrorContext *xmlerrcxt;
186 xmlParserCtxtPtr ctxt;
187 xmlDocPtr doc;
188 xmlXPathContextPtr xpathcxt;
189 xmlXPathCompExprPtr xpathcomp;
190 xmlXPathObjectPtr xpathobj;
191 xmlXPathCompExprPtr *xpathscomp;
192} XmlTableBuilderData;
193#endif
194
195static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
196static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
197static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name,
198 const char *uri);
199static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path);
200static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
201 const char *path, int colnum);
202static bool XmlTableFetchRow(struct TableFuncScanState *state);
203static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
204 Oid typid, int32 typmod, bool *isnull);
205static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
206
207const TableFuncRoutine XmlTableRoutine =
208{
209 XmlTableInitOpaque,
210 XmlTableSetDocument,
211 XmlTableSetNamespace,
212 XmlTableSetRowFilter,
213 XmlTableSetColumnFilter,
214 XmlTableFetchRow,
215 XmlTableGetValue,
216 XmlTableDestroyOpaque
217};
218
219#define NO_XML_SUPPORT() \
220 ereport(ERROR, \
221 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
222 errmsg("unsupported XML feature"), \
223 errdetail("This functionality requires the server to be built with libxml support."), \
224 errhint("You need to rebuild PostgreSQL using --with-libxml.")))
225
226
227/* from SQL/XML:2008 section 4.9 */
228#define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
229#define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
230#define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
231
232
233#ifdef USE_LIBXML
234
235static int
236xmlChar_to_encoding(const xmlChar *encoding_name)
237{
238 int encoding = pg_char_to_encoding((const char *) encoding_name);
239
240 if (encoding < 0)
241 ereport(ERROR,
242 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
243 errmsg("invalid encoding name \"%s\"",
244 (const char *) encoding_name)));
245 return encoding;
246}
247#endif
248
249
250/*
251 * xml_in uses a plain C string to VARDATA conversion, so for the time being
252 * we use the conversion function for the text datatype.
253 *
254 * This is only acceptable so long as xmltype and text use the same
255 * representation.
256 */
257Datum
258xml_in(PG_FUNCTION_ARGS)
259{
260#ifdef USE_LIBXML
261 char *s = PG_GETARG_CSTRING(0);
262 xmltype *vardata;
263 xmlDocPtr doc;
264
265 vardata = (xmltype *) cstring_to_text(s);
266
267 /*
268 * Parse the data to check if it is well-formed XML data. Assume that
269 * ERROR occurred if parsing failed.
270 */
271 doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding());
272 xmlFreeDoc(doc);
273
274 PG_RETURN_XML_P(vardata);
275#else
276 NO_XML_SUPPORT();
277 return 0;
278#endif
279}
280
281
282#define PG_XML_DEFAULT_VERSION "1.0"
283
284
285/*
286 * xml_out_internal uses a plain VARDATA to C string conversion, so for the
287 * time being we use the conversion function for the text datatype.
288 *
289 * This is only acceptable so long as xmltype and text use the same
290 * representation.
291 */
292static char *
293xml_out_internal(xmltype *x, pg_enc target_encoding)
294{
295 char *str = text_to_cstring((text *) x);
296
297#ifdef USE_LIBXML
298 size_t len = strlen(str);
299 xmlChar *version;
300 int standalone;
301 int res_code;
302
303 if ((res_code = parse_xml_decl((xmlChar *) str,
304 &len, &version, NULL, &standalone)) == 0)
305 {
306 StringInfoData buf;
307
308 initStringInfo(&buf);
309
310 if (!print_xml_decl(&buf, version, target_encoding, standalone))
311 {
312 /*
313 * If we are not going to produce an XML declaration, eat a single
314 * newline in the original string to prevent empty first lines in
315 * the output.
316 */
317 if (*(str + len) == '\n')
318 len += 1;
319 }
320 appendStringInfoString(&buf, str + len);
321
322 pfree(str);
323
324 return buf.data;
325 }
326
327 xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
328 "could not parse XML declaration in stored value",
329 res_code);
330#endif
331 return str;
332}
333
334
335Datum
336xml_out(PG_FUNCTION_ARGS)
337{
338 xmltype *x = PG_GETARG_XML_P(0);
339
340 /*
341 * xml_out removes the encoding property in all cases. This is because we
342 * cannot control from here whether the datum will be converted to a
343 * different client encoding, so we'd do more harm than good by including
344 * it.
345 */
346 PG_RETURN_CSTRING(xml_out_internal(x, 0));
347}
348
349
350Datum
351xml_recv(PG_FUNCTION_ARGS)
352{
353#ifdef USE_LIBXML
354 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
355 xmltype *result;
356 char *str;
357 char *newstr;
358 int nbytes;
359 xmlDocPtr doc;
360 xmlChar *encodingStr = NULL;
361 int encoding;
362
363 /*
364 * Read the data in raw format. We don't know yet what the encoding is, as
365 * that information is embedded in the xml declaration; so we have to
366 * parse that before converting to server encoding.
367 */
368 nbytes = buf->len - buf->cursor;
369 str = (char *) pq_getmsgbytes(buf, nbytes);
370
371 /*
372 * We need a null-terminated string to pass to parse_xml_decl(). Rather
373 * than make a separate copy, make the temporary result one byte bigger
374 * than it needs to be.
375 */
376 result = palloc(nbytes + 1 + VARHDRSZ);
377 SET_VARSIZE(result, nbytes + VARHDRSZ);
378 memcpy(VARDATA(result), str, nbytes);
379 str = VARDATA(result);
380 str[nbytes] = '\0';
381
382 parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
383
384 /*
385 * If encoding wasn't explicitly specified in the XML header, treat it as
386 * UTF-8, as that's the default in XML. This is different from xml_in(),
387 * where the input has to go through the normal client to server encoding
388 * conversion.
389 */
390 encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
391
392 /*
393 * Parse the data to check if it is well-formed XML data. Assume that
394 * xml_parse will throw ERROR if not.
395 */
396 doc = xml_parse(result, xmloption, true, encoding);
397 xmlFreeDoc(doc);
398
399 /* Now that we know what we're dealing with, convert to server encoding */
400 newstr = pg_any_to_server(str, nbytes, encoding);
401
402 if (newstr != str)
403 {
404 pfree(result);
405 result = (xmltype *) cstring_to_text(newstr);
406 pfree(newstr);
407 }
408
409 PG_RETURN_XML_P(result);
410#else
411 NO_XML_SUPPORT();
412 return 0;
413#endif
414}
415
416
417Datum
418xml_send(PG_FUNCTION_ARGS)
419{
420 xmltype *x = PG_GETARG_XML_P(0);
421 char *outval;
422 StringInfoData buf;
423
424 /*
425 * xml_out_internal doesn't convert the encoding, it just prints the right
426 * declaration. pq_sendtext will do the conversion.
427 */
428 outval = xml_out_internal(x, pg_get_client_encoding());
429
430 pq_begintypsend(&buf);
431 pq_sendtext(&buf, outval, strlen(outval));
432 pfree(outval);
433 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
434}
435
436
437#ifdef USE_LIBXML
438static void
439appendStringInfoText(StringInfo str, const text *t)
440{
441 appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
442}
443#endif
444
445
446static xmltype *
447stringinfo_to_xmltype(StringInfo buf)
448{
449 return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
450}
451
452
453static xmltype *
454cstring_to_xmltype(const char *string)
455{
456 return (xmltype *) cstring_to_text(string);
457}
458
459
460#ifdef USE_LIBXML
461static xmltype *
462xmlBuffer_to_xmltype(xmlBufferPtr buf)
463{
464 return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
465 xmlBufferLength(buf));
466}
467#endif
468
469
470Datum
471xmlcomment(PG_FUNCTION_ARGS)
472{
473#ifdef USE_LIBXML
474 text *arg = PG_GETARG_TEXT_PP(0);
475 char *argdata = VARDATA_ANY(arg);
476 int len = VARSIZE_ANY_EXHDR(arg);
477 StringInfoData buf;
478 int i;
479
480 /* check for "--" in string or "-" at the end */
481 for (i = 1; i < len; i++)
482 {
483 if (argdata[i] == '-' && argdata[i - 1] == '-')
484 ereport(ERROR,
485 (errcode(ERRCODE_INVALID_XML_COMMENT),
486 errmsg("invalid XML comment")));
487 }
488 if (len > 0 && argdata[len - 1] == '-')
489 ereport(ERROR,
490 (errcode(ERRCODE_INVALID_XML_COMMENT),
491 errmsg("invalid XML comment")));
492
493 initStringInfo(&buf);
494 appendStringInfoString(&buf, "<!--");
495 appendStringInfoText(&buf, arg);
496 appendStringInfoString(&buf, "-->");
497
498 PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
499#else
500 NO_XML_SUPPORT();
501 return 0;
502#endif
503}
504
505
506
507/*
508 * TODO: xmlconcat needs to merge the notations and unparsed entities
509 * of the argument values. Not very important in practice, though.
510 */
511xmltype *
512xmlconcat(List *args)
513{
514#ifdef USE_LIBXML
515 int global_standalone = 1;
516 xmlChar *global_version = NULL;
517 bool global_version_no_value = false;
518 StringInfoData buf;
519 ListCell *v;
520
521 initStringInfo(&buf);
522 foreach(v, args)
523 {
524 xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
525 size_t len;
526 xmlChar *version;
527 int standalone;
528 char *str;
529
530 len = VARSIZE(x) - VARHDRSZ;
531 str = text_to_cstring((text *) x);
532
533 parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
534
535 if (standalone == 0 && global_standalone == 1)
536 global_standalone = 0;
537 if (standalone < 0)
538 global_standalone = -1;
539
540 if (!version)
541 global_version_no_value = true;
542 else if (!global_version)
543 global_version = version;
544 else if (xmlStrcmp(version, global_version) != 0)
545 global_version_no_value = true;
546
547 appendStringInfoString(&buf, str + len);
548 pfree(str);
549 }
550
551 if (!global_version_no_value || global_standalone >= 0)
552 {
553 StringInfoData buf2;
554
555 initStringInfo(&buf2);
556
557 print_xml_decl(&buf2,
558 (!global_version_no_value) ? global_version : NULL,
559 0,
560 global_standalone);
561
562 appendStringInfoString(&buf2, buf.data);
563 buf = buf2;
564 }
565
566 return stringinfo_to_xmltype(&buf);
567#else
568 NO_XML_SUPPORT();
569 return NULL;
570#endif
571}
572
573
574/*
575 * XMLAGG support
576 */
577Datum
578xmlconcat2(PG_FUNCTION_ARGS)
579{
580 if (PG_ARGISNULL(0))
581 {
582 if (PG_ARGISNULL(1))
583 PG_RETURN_NULL();
584 else
585 PG_RETURN_XML_P(PG_GETARG_XML_P(1));
586 }
587 else if (PG_ARGISNULL(1))
588 PG_RETURN_XML_P(PG_GETARG_XML_P(0));
589 else
590 PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
591 PG_GETARG_XML_P(1))));
592}
593
594
595Datum
596texttoxml(PG_FUNCTION_ARGS)
597{
598 text *data = PG_GETARG_TEXT_PP(0);
599
600 PG_RETURN_XML_P(xmlparse(data, xmloption, true));
601}
602
603
604Datum
605xmltotext(PG_FUNCTION_ARGS)
606{
607 xmltype *data = PG_GETARG_XML_P(0);
608
609 /* It's actually binary compatible. */
610 PG_RETURN_TEXT_P((text *) data);
611}
612
613
614text *
615xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg)
616{
617 if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
618 ereport(ERROR,
619 (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
620 errmsg("not an XML document")));
621
622 /* It's actually binary compatible, save for the above check. */
623 return (text *) data;
624}
625
626
627xmltype *
628xmlelement(XmlExpr *xexpr,
629 Datum *named_argvalue, bool *named_argnull,
630 Datum *argvalue, bool *argnull)
631{
632#ifdef USE_LIBXML
633 xmltype *result;
634 List *named_arg_strings;
635 List *arg_strings;
636 int i;
637 ListCell *arg;
638 ListCell *narg;
639 PgXmlErrorContext *xmlerrcxt;
640 volatile xmlBufferPtr buf = NULL;
641 volatile xmlTextWriterPtr writer = NULL;
642
643 /*
644 * All arguments are already evaluated, and their values are passed in the
645 * named_argvalue/named_argnull or argvalue/argnull arrays. This avoids
646 * issues if one of the arguments involves a call to some other function
647 * or subsystem that wants to use libxml on its own terms. We examine the
648 * original XmlExpr to identify the numbers and types of the arguments.
649 */
650 named_arg_strings = NIL;
651 i = 0;
652 foreach(arg, xexpr->named_args)
653 {
654 Expr *e = (Expr *) lfirst(arg);
655 char *str;
656
657 if (named_argnull[i])
658 str = NULL;
659 else
660 str = map_sql_value_to_xml_value(named_argvalue[i],
661 exprType((Node *) e),
662 false);
663 named_arg_strings = lappend(named_arg_strings, str);
664 i++;
665 }
666
667 arg_strings = NIL;
668 i = 0;
669 foreach(arg, xexpr->args)
670 {
671 Expr *e = (Expr *) lfirst(arg);
672 char *str;
673
674 /* here we can just forget NULL elements immediately */
675 if (!argnull[i])
676 {
677 str = map_sql_value_to_xml_value(argvalue[i],
678 exprType((Node *) e),
679 true);
680 arg_strings = lappend(arg_strings, str);
681 }
682 i++;
683 }
684
685 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
686
687 PG_TRY();
688 {
689 buf = xmlBufferCreate();
690 if (buf == NULL || xmlerrcxt->err_occurred)
691 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
692 "could not allocate xmlBuffer");
693 writer = xmlNewTextWriterMemory(buf, 0);
694 if (writer == NULL || xmlerrcxt->err_occurred)
695 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
696 "could not allocate xmlTextWriter");
697
698 xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
699
700 forboth(arg, named_arg_strings, narg, xexpr->arg_names)
701 {
702 char *str = (char *) lfirst(arg);
703 char *argname = strVal(lfirst(narg));
704
705 if (str)
706 xmlTextWriterWriteAttribute(writer,
707 (xmlChar *) argname,
708 (xmlChar *) str);
709 }
710
711 foreach(arg, arg_strings)
712 {
713 char *str = (char *) lfirst(arg);
714
715 xmlTextWriterWriteRaw(writer, (xmlChar *) str);
716 }
717
718 xmlTextWriterEndElement(writer);
719
720 /* we MUST do this now to flush data out to the buffer ... */
721 xmlFreeTextWriter(writer);
722 writer = NULL;
723
724 result = xmlBuffer_to_xmltype(buf);
725 }
726 PG_CATCH();
727 {
728 if (writer)
729 xmlFreeTextWriter(writer);
730 if (buf)
731 xmlBufferFree(buf);
732
733 pg_xml_done(xmlerrcxt, true);
734
735 PG_RE_THROW();
736 }
737 PG_END_TRY();
738
739 xmlBufferFree(buf);
740
741 pg_xml_done(xmlerrcxt, false);
742
743 return result;
744#else
745 NO_XML_SUPPORT();
746 return NULL;
747#endif
748}
749
750
751xmltype *
752xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
753{
754#ifdef USE_LIBXML
755 xmlDocPtr doc;
756
757 doc = xml_parse(data, xmloption_arg, preserve_whitespace,
758 GetDatabaseEncoding());
759 xmlFreeDoc(doc);
760
761 return (xmltype *) data;
762#else
763 NO_XML_SUPPORT();
764 return NULL;
765#endif
766}
767
768
769xmltype *
770xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null)
771{
772#ifdef USE_LIBXML
773 xmltype *result;
774 StringInfoData buf;
775
776 if (pg_strcasecmp(target, "xml") == 0)
777 ereport(ERROR,
778 (errcode(ERRCODE_SYNTAX_ERROR), /* really */
779 errmsg("invalid XML processing instruction"),
780 errdetail("XML processing instruction target name cannot be \"%s\".", target)));
781
782 /*
783 * Following the SQL standard, the null check comes after the syntax check
784 * above.
785 */
786 *result_is_null = arg_is_null;
787 if (*result_is_null)
788 return NULL;
789
790 initStringInfo(&buf);
791
792 appendStringInfo(&buf, "<?%s", target);
793
794 if (arg != NULL)
795 {
796 char *string;
797
798 string = text_to_cstring(arg);
799 if (strstr(string, "?>") != NULL)
800 ereport(ERROR,
801 (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
802 errmsg("invalid XML processing instruction"),
803 errdetail("XML processing instruction cannot contain \"?>\".")));
804
805 appendStringInfoChar(&buf, ' ');
806 appendStringInfoString(&buf, string + strspn(string, " "));
807 pfree(string);
808 }
809 appendStringInfoString(&buf, "?>");
810
811 result = stringinfo_to_xmltype(&buf);
812 pfree(buf.data);
813 return result;
814#else
815 NO_XML_SUPPORT();
816 return NULL;
817#endif
818}
819
820
821xmltype *
822xmlroot(xmltype *data, text *version, int standalone)
823{
824#ifdef USE_LIBXML
825 char *str;
826 size_t len;
827 xmlChar *orig_version;
828 int orig_standalone;
829 StringInfoData buf;
830
831 len = VARSIZE(data) - VARHDRSZ;
832 str = text_to_cstring((text *) data);
833
834 parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
835
836 if (version)
837 orig_version = xml_text2xmlChar(version);
838 else
839 orig_version = NULL;
840
841 switch (standalone)
842 {
843 case XML_STANDALONE_YES:
844 orig_standalone = 1;
845 break;
846 case XML_STANDALONE_NO:
847 orig_standalone = 0;
848 break;
849 case XML_STANDALONE_NO_VALUE:
850 orig_standalone = -1;
851 break;
852 case XML_STANDALONE_OMITTED:
853 /* leave original value */
854 break;
855 }
856
857 initStringInfo(&buf);
858 print_xml_decl(&buf, orig_version, 0, orig_standalone);
859 appendStringInfoString(&buf, str + len);
860
861 return stringinfo_to_xmltype(&buf);
862#else
863 NO_XML_SUPPORT();
864 return NULL;
865#endif
866}
867
868
869/*
870 * Validate document (given as string) against DTD (given as external link)
871 *
872 * This has been removed because it is a security hole: unprivileged users
873 * should not be able to use Postgres to fetch arbitrary external files,
874 * which unfortunately is exactly what libxml is willing to do with the DTD
875 * parameter.
876 */
877Datum
878xmlvalidate(PG_FUNCTION_ARGS)
879{
880 ereport(ERROR,
881 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
882 errmsg("xmlvalidate is not implemented")));
883 return 0;
884}
885
886
887bool
888xml_is_document(xmltype *arg)
889{
890#ifdef USE_LIBXML
891 bool result;
892 volatile xmlDocPtr doc = NULL;
893 MemoryContext ccxt = CurrentMemoryContext;
894
895 /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
896 PG_TRY();
897 {
898 doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
899 GetDatabaseEncoding());
900 result = true;
901 }
902 PG_CATCH();
903 {
904 ErrorData *errdata;
905 MemoryContext ecxt;
906
907 ecxt = MemoryContextSwitchTo(ccxt);
908 errdata = CopyErrorData();
909 if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT)
910 {
911 FlushErrorState();
912 result = false;
913 }
914 else
915 {
916 MemoryContextSwitchTo(ecxt);
917 PG_RE_THROW();
918 }
919 }
920 PG_END_TRY();
921
922 if (doc)
923 xmlFreeDoc(doc);
924
925 return result;
926#else /* not USE_LIBXML */
927 NO_XML_SUPPORT();
928 return false;
929#endif /* not USE_LIBXML */
930}
931
932
933#ifdef USE_LIBXML
934
935/*
936 * pg_xml_init_library --- set up for use of libxml
937 *
938 * This should be called by each function that is about to use libxml
939 * facilities but doesn't require error handling. It initializes libxml
940 * and verifies compatibility with the loaded libxml version. These are
941 * once-per-session activities.
942 *
943 * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
944 * check)
945 */
946void
947pg_xml_init_library(void)
948{
949 static bool first_time = true;
950
951 if (first_time)
952 {
953 /* Stuff we need do only once per session */
954
955 /*
956 * Currently, we have no pure UTF-8 support for internals -- check if
957 * we can work.
958 */
959 if (sizeof(char) != sizeof(xmlChar))
960 ereport(ERROR,
961 (errmsg("could not initialize XML library"),
962 errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
963 (int) sizeof(char), (int) sizeof(xmlChar))));
964
965#ifdef USE_LIBXMLCONTEXT
966 /* Set up libxml's memory allocation our way */
967 xml_memory_init();
968#endif
969
970 /* Check library compatibility */
971 LIBXML_TEST_VERSION;
972
973 first_time = false;
974 }
975}
976
977/*
978 * pg_xml_init --- set up for use of libxml and register an error handler
979 *
980 * This should be called by each function that is about to use libxml
981 * facilities and requires error handling. It initializes libxml with
982 * pg_xml_init_library() and establishes our libxml error handler.
983 *
984 * strictness determines which errors are reported and which are ignored.
985 *
986 * Calls to this function MUST be followed by a PG_TRY block that guarantees
987 * that pg_xml_done() is called during either normal or error exit.
988 *
989 * This is exported for use by contrib/xml2, as well as other code that might
990 * wish to share use of this module's libxml error handler.
991 */
992PgXmlErrorContext *
993pg_xml_init(PgXmlStrictness strictness)
994{
995 PgXmlErrorContext *errcxt;
996 void *new_errcxt;
997
998 /* Do one-time setup if needed */
999 pg_xml_init_library();
1000
1001 /* Create error handling context structure */
1002 errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
1003 errcxt->magic = ERRCXT_MAGIC;
1004 errcxt->strictness = strictness;
1005 errcxt->err_occurred = false;
1006 initStringInfo(&errcxt->err_buf);
1007
1008 /*
1009 * Save original error handler and install ours. libxml originally didn't
1010 * distinguish between the contexts for generic and for structured error
1011 * handlers. If we're using an old libxml version, we must thus save the
1012 * generic error context, even though we're using a structured error
1013 * handler.
1014 */
1015 errcxt->saved_errfunc = xmlStructuredError;
1016
1017#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1018 errcxt->saved_errcxt = xmlStructuredErrorContext;
1019#else
1020 errcxt->saved_errcxt = xmlGenericErrorContext;
1021#endif
1022
1023 xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
1024
1025 /*
1026 * Verify that xmlSetStructuredErrorFunc set the context variable we
1027 * expected it to. If not, the error context pointer we just saved is not
1028 * the correct thing to restore, and since that leaves us without a way to
1029 * restore the context in pg_xml_done, we must fail.
1030 *
1031 * The only known situation in which this test fails is if we compile with
1032 * headers from a libxml2 that doesn't track the structured error context
1033 * separately (< 2.7.4), but at runtime use a version that does, or vice
1034 * versa. The libxml2 authors did not treat that change as constituting
1035 * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1036 * fails to protect us from this.
1037 */
1038
1039#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1040 new_errcxt = xmlStructuredErrorContext;
1041#else
1042 new_errcxt = xmlGenericErrorContext;
1043#endif
1044
1045 if (new_errcxt != (void *) errcxt)
1046 ereport(ERROR,
1047 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1048 errmsg("could not set up XML error handler"),
1049 errhint("This probably indicates that the version of libxml2"
1050 " being used is not compatible with the libxml2"
1051 " header files that PostgreSQL was built with.")));
1052
1053 /*
1054 * Also, install an entity loader to prevent unwanted fetches of external
1055 * files and URLs.
1056 */
1057 errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1058 xmlSetExternalEntityLoader(xmlPgEntityLoader);
1059
1060 return errcxt;
1061}
1062
1063
1064/*
1065 * pg_xml_done --- restore previous libxml error handling
1066 *
1067 * Resets libxml's global error-handling state to what it was before
1068 * pg_xml_init() was called.
1069 *
1070 * This routine verifies that all pending errors have been dealt with
1071 * (in assert-enabled builds, anyway).
1072 */
1073void
1074pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1075{
1076 void *cur_errcxt;
1077
1078 /* An assert seems like enough protection here */
1079 Assert(errcxt->magic == ERRCXT_MAGIC);
1080
1081 /*
1082 * In a normal exit, there should be no un-handled libxml errors. But we
1083 * shouldn't try to enforce this during error recovery, since the longjmp
1084 * could have been thrown before xml_ereport had a chance to run.
1085 */
1086 Assert(!errcxt->err_occurred || isError);
1087
1088 /*
1089 * Check that libxml's global state is correct, warn if not. This is a
1090 * real test and not an Assert because it has a higher probability of
1091 * happening.
1092 */
1093#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1094 cur_errcxt = xmlStructuredErrorContext;
1095#else
1096 cur_errcxt = xmlGenericErrorContext;
1097#endif
1098
1099 if (cur_errcxt != (void *) errcxt)
1100 elog(WARNING, "libxml error handling state is out of sync with xml.c");
1101
1102 /* Restore the saved handlers */
1103 xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1104 xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1105
1106 /*
1107 * Mark the struct as invalid, just in case somebody somehow manages to
1108 * call xml_errorHandler or xml_ereport with it.
1109 */
1110 errcxt->magic = 0;
1111
1112 /* Release memory */
1113 pfree(errcxt->err_buf.data);
1114 pfree(errcxt);
1115}
1116
1117
1118/*
1119 * pg_xml_error_occurred() --- test the error flag
1120 */
1121bool
1122pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1123{
1124 return errcxt->err_occurred;
1125}
1126
1127
1128/*
1129 * SQL/XML allows storing "XML documents" or "XML content". "XML
1130 * documents" are specified by the XML specification and are parsed
1131 * easily by libxml. "XML content" is specified by SQL/XML as the
1132 * production "XMLDecl? content". But libxml can only parse the
1133 * "content" part, so we have to parse the XML declaration ourselves
1134 * to complete this.
1135 */
1136
1137#define CHECK_XML_SPACE(p) \
1138 do { \
1139 if (!xmlIsBlank_ch(*(p))) \
1140 return XML_ERR_SPACE_REQUIRED; \
1141 } while (0)
1142
1143#define SKIP_XML_SPACE(p) \
1144 while (xmlIsBlank_ch(*(p))) (p)++
1145
1146/* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1147/* Beware of multiple evaluations of argument! */
1148#define PG_XMLISNAMECHAR(c) \
1149 (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1150 || xmlIsDigit_ch(c) \
1151 || c == '.' || c == '-' || c == '_' || c == ':' \
1152 || xmlIsCombiningQ(c) \
1153 || xmlIsExtender_ch(c))
1154
1155/* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1156static xmlChar *
1157xml_pnstrdup(const xmlChar *str, size_t len)
1158{
1159 xmlChar *result;
1160
1161 result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1162 memcpy(result, str, len * sizeof(xmlChar));
1163 result[len] = 0;
1164 return result;
1165}
1166
1167/* Ditto, except input is char* */
1168static xmlChar *
1169pg_xmlCharStrndup(const char *str, size_t len)
1170{
1171 xmlChar *result;
1172
1173 result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1174 memcpy(result, str, len);
1175 result[len] = '\0';
1176
1177 return result;
1178}
1179
1180/*
1181 * Copy xmlChar string to PostgreSQL-owned memory, freeing the input.
1182 *
1183 * The input xmlChar is freed regardless of success of the copy.
1184 */
1185static char *
1186xml_pstrdup_and_free(xmlChar *str)
1187{
1188 char *result;
1189
1190 if (str)
1191 {
1192 PG_TRY();
1193 {
1194 result = pstrdup((char *) str);
1195 }
1196 PG_CATCH();
1197 {
1198 xmlFree(str);
1199 PG_RE_THROW();
1200 }
1201 PG_END_TRY();
1202 xmlFree(str);
1203 }
1204 else
1205 result = NULL;
1206
1207 return result;
1208}
1209
1210/*
1211 * str is the null-terminated input string. Remaining arguments are
1212 * output arguments; each can be NULL if value is not wanted.
1213 * version and encoding are returned as locally-palloc'd strings.
1214 * Result is 0 if OK, an error code if not.
1215 */
1216static int
1217parse_xml_decl(const xmlChar *str, size_t *lenp,
1218 xmlChar **version, xmlChar **encoding, int *standalone)
1219{
1220 const xmlChar *p;
1221 const xmlChar *save_p;
1222 size_t len;
1223 int utf8char;
1224 int utf8len;
1225
1226 /*
1227 * Only initialize libxml. We don't need error handling here, but we do
1228 * need to make sure libxml is initialized before calling any of its
1229 * functions. Note that this is safe (and a no-op) if caller has already
1230 * done pg_xml_init().
1231 */
1232 pg_xml_init_library();
1233
1234 /* Initialize output arguments to "not present" */
1235 if (version)
1236 *version = NULL;
1237 if (encoding)
1238 *encoding = NULL;
1239 if (standalone)
1240 *standalone = -1;
1241
1242 p = str;
1243
1244 if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1245 goto finished;
1246
1247 /*
1248 * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
1249 * rather than an XMLDecl, so we have done what we came to do and found no
1250 * XMLDecl.
1251 *
1252 * We need an input length value for xmlGetUTF8Char, but there's no need
1253 * to count the whole document size, so use strnlen not strlen.
1254 */
1255 utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
1256 utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1257 if (PG_XMLISNAMECHAR(utf8char))
1258 goto finished;
1259
1260 p += 5;
1261
1262 /* version */
1263 CHECK_XML_SPACE(p);
1264 SKIP_XML_SPACE(p);
1265 if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1266 return XML_ERR_VERSION_MISSING;
1267 p += 7;
1268 SKIP_XML_SPACE(p);
1269 if (*p != '=')
1270 return XML_ERR_VERSION_MISSING;
1271 p += 1;
1272 SKIP_XML_SPACE(p);
1273
1274 if (*p == '\'' || *p == '"')
1275 {
1276 const xmlChar *q;
1277
1278 q = xmlStrchr(p + 1, *p);
1279 if (!q)
1280 return XML_ERR_VERSION_MISSING;
1281
1282 if (version)
1283 *version = xml_pnstrdup(p + 1, q - p - 1);
1284 p = q + 1;
1285 }
1286 else
1287 return XML_ERR_VERSION_MISSING;
1288
1289 /* encoding */
1290 save_p = p;
1291 SKIP_XML_SPACE(p);
1292 if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1293 {
1294 CHECK_XML_SPACE(save_p);
1295 p += 8;
1296 SKIP_XML_SPACE(p);
1297 if (*p != '=')
1298 return XML_ERR_MISSING_ENCODING;
1299 p += 1;
1300 SKIP_XML_SPACE(p);
1301
1302 if (*p == '\'' || *p == '"')
1303 {
1304 const xmlChar *q;
1305
1306 q = xmlStrchr(p + 1, *p);
1307 if (!q)
1308 return XML_ERR_MISSING_ENCODING;
1309
1310 if (encoding)
1311 *encoding = xml_pnstrdup(p + 1, q - p - 1);
1312 p = q + 1;
1313 }
1314 else
1315 return XML_ERR_MISSING_ENCODING;
1316 }
1317 else
1318 {
1319 p = save_p;
1320 }
1321
1322 /* standalone */
1323 save_p = p;
1324 SKIP_XML_SPACE(p);
1325 if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1326 {
1327 CHECK_XML_SPACE(save_p);
1328 p += 10;
1329 SKIP_XML_SPACE(p);
1330 if (*p != '=')
1331 return XML_ERR_STANDALONE_VALUE;
1332 p += 1;
1333 SKIP_XML_SPACE(p);
1334 if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1335 xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1336 {
1337 if (standalone)
1338 *standalone = 1;
1339 p += 5;
1340 }
1341 else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1342 xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1343 {
1344 if (standalone)
1345 *standalone = 0;
1346 p += 4;
1347 }
1348 else
1349 return XML_ERR_STANDALONE_VALUE;
1350 }
1351 else
1352 {
1353 p = save_p;
1354 }
1355
1356 SKIP_XML_SPACE(p);
1357 if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1358 return XML_ERR_XMLDECL_NOT_FINISHED;
1359 p += 2;
1360
1361finished:
1362 len = p - str;
1363
1364 for (p = str; p < str + len; p++)
1365 if (*p > 127)
1366 return XML_ERR_INVALID_CHAR;
1367
1368 if (lenp)
1369 *lenp = len;
1370
1371 return XML_ERR_OK;
1372}
1373
1374
1375/*
1376 * Write an XML declaration. On output, we adjust the XML declaration
1377 * as follows. (These rules are the moral equivalent of the clause
1378 * "Serialization of an XML value" in the SQL standard.)
1379 *
1380 * We try to avoid generating an XML declaration if possible. This is
1381 * so that you don't get trivial things like xml '<foo/>' resulting in
1382 * '<?xml version="1.0"?><foo/>', which would surely be annoying. We
1383 * must provide a declaration if the standalone property is specified
1384 * or if we include an encoding declaration. If we have a
1385 * declaration, we must specify a version (XML requires this).
1386 * Otherwise we only make a declaration if the version is not "1.0",
1387 * which is the default version specified in SQL:2003.
1388 */
1389static bool
1390print_xml_decl(StringInfo buf, const xmlChar *version,
1391 pg_enc encoding, int standalone)
1392{
1393 if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1394 || (encoding && encoding != PG_UTF8)
1395 || standalone != -1)
1396 {
1397 appendStringInfoString(buf, "<?xml");
1398
1399 if (version)
1400 appendStringInfo(buf, " version=\"%s\"", version);
1401 else
1402 appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1403
1404 if (encoding && encoding != PG_UTF8)
1405 {
1406 /*
1407 * XXX might be useful to convert this to IANA names (ISO-8859-1
1408 * instead of LATIN1 etc.); needs field experience
1409 */
1410 appendStringInfo(buf, " encoding=\"%s\"",
1411 pg_encoding_to_char(encoding));
1412 }
1413
1414 if (standalone == 1)
1415 appendStringInfoString(buf, " standalone=\"yes\"");
1416 else if (standalone == 0)
1417 appendStringInfoString(buf, " standalone=\"no\"");
1418 appendStringInfoString(buf, "?>");
1419
1420 return true;
1421 }
1422 else
1423 return false;
1424}
1425
1426/*
1427 * Test whether an input that is to be parsed as CONTENT contains a DTD.
1428 *
1429 * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
1430 * satisfied by a document with a DTD, which is a bit of a wart, as it means
1431 * the CONTENT type is not a proper superset of DOCUMENT. SQL/XML:2006 and
1432 * later fix that, by redefining content with reference to the "more
1433 * permissive" Document Node of the XQuery/XPath Data Model, such that any
1434 * DOCUMENT value is indeed also a CONTENT value. That definition is more
1435 * useful, as CONTENT becomes usable for parsing input of unknown form (think
1436 * pg_restore).
1437 *
1438 * As used below in parse_xml when parsing for CONTENT, libxml does not give
1439 * us the 2006+ behavior, but only the 2003; it will choke if the input has
1440 * a DTD. But we can provide the 2006+ definition of CONTENT easily enough,
1441 * by detecting this case first and simply doing the parse as DOCUMENT.
1442 *
1443 * A DTD can be found arbitrarily far in, but that would be a contrived case;
1444 * it will ordinarily start within a few dozen characters. The only things
1445 * that can precede it are an XMLDecl (here, the caller will have called
1446 * parse_xml_decl already), whitespace, comments, and processing instructions.
1447 * This function need only return true if it sees a valid sequence of such
1448 * things leading to <!DOCTYPE. It can simply return false in any other
1449 * cases, including malformed input; that will mean the input gets parsed as
1450 * CONTENT as originally planned, with libxml reporting any errors.
1451 *
1452 * This is only to be called from xml_parse, when pg_xml_init has already
1453 * been called. The input is already in UTF8 encoding.
1454 */
1455static bool
1456xml_doctype_in_content(const xmlChar *str)
1457{
1458 const xmlChar *p = str;
1459
1460 for (;;)
1461 {
1462 const xmlChar *e;
1463
1464 SKIP_XML_SPACE(p);
1465 if (*p != '<')
1466 return false;
1467 p++;
1468
1469 if (*p == '!')
1470 {
1471 p++;
1472
1473 /* if we see <!DOCTYPE, we can return true */
1474 if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
1475 return true;
1476
1477 /* otherwise, if it's not a comment, fail */
1478 if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
1479 return false;
1480 /* find end of comment: find -- and a > must follow */
1481 p = xmlStrstr(p + 2, (xmlChar *) "--");
1482 if (!p || p[2] != '>')
1483 return false;
1484 /* advance over comment, and keep scanning */
1485 p += 3;
1486 continue;
1487 }
1488
1489 /* otherwise, if it's not a PI <?target something?>, fail */
1490 if (*p != '?')
1491 return false;
1492 p++;
1493
1494 /* find end of PI (the string ?> is forbidden within a PI) */
1495 e = xmlStrstr(p, (xmlChar *) "?>");
1496 if (!e)
1497 return false;
1498
1499 /* advance over PI, keep scanning */
1500 p = e + 2;
1501 }
1502}
1503
1504
1505/*
1506 * Convert a C string to XML internal representation
1507 *
1508 * Note: it is caller's responsibility to xmlFreeDoc() the result,
1509 * else a permanent memory leak will ensue!
1510 *
1511 * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1512 * yet do not use SAX - see xmlreader.c)
1513 */
1514static xmlDocPtr
1515xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1516 int encoding)
1517{
1518 int32 len;
1519 xmlChar *string;
1520 xmlChar *utf8string;
1521 PgXmlErrorContext *xmlerrcxt;
1522 volatile xmlParserCtxtPtr ctxt = NULL;
1523 volatile xmlDocPtr doc = NULL;
1524
1525 len = VARSIZE_ANY_EXHDR(data); /* will be useful later */
1526 string = xml_text2xmlChar(data);
1527
1528 utf8string = pg_do_encoding_conversion(string,
1529 len,
1530 encoding,
1531 PG_UTF8);
1532
1533 /* Start up libxml and its parser */
1534 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1535
1536 /* Use a TRY block to ensure we clean up correctly */
1537 PG_TRY();
1538 {
1539 bool parse_as_document = false;
1540 int res_code;
1541 size_t count = 0;
1542 xmlChar *version = NULL;
1543 int standalone = 0;
1544
1545 xmlInitParser();
1546
1547 ctxt = xmlNewParserCtxt();
1548 if (ctxt == NULL || xmlerrcxt->err_occurred)
1549 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1550 "could not allocate parser context");
1551
1552 /* Decide whether to parse as document or content */
1553 if (xmloption_arg == XMLOPTION_DOCUMENT)
1554 parse_as_document = true;
1555 else
1556 {
1557 /* Parse and skip over the XML declaration, if any */
1558 res_code = parse_xml_decl(utf8string,
1559 &count, &version, NULL, &standalone);
1560 if (res_code != 0)
1561 xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
1562 "invalid XML content: invalid XML declaration",
1563 res_code);
1564
1565 /* Is there a DOCTYPE element? */
1566 if (xml_doctype_in_content(utf8string + count))
1567 parse_as_document = true;
1568 }
1569
1570 if (parse_as_document)
1571 {
1572 /*
1573 * Note, that here we try to apply DTD defaults
1574 * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
1575 * 'Default values defined by internal DTD are applied'. As for
1576 * external DTDs, we try to support them too, (see SQL/XML:2008 GR
1577 * 10.16.7.e)
1578 */
1579 doc = xmlCtxtReadDoc(ctxt, utf8string,
1580 NULL,
1581 "UTF-8",
1582 XML_PARSE_NOENT | XML_PARSE_DTDATTR
1583 | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
1584 if (doc == NULL || xmlerrcxt->err_occurred)
1585 {
1586 /* Use original option to decide which error code to throw */
1587 if (xmloption_arg == XMLOPTION_DOCUMENT)
1588 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
1589 "invalid XML document");
1590 else
1591 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1592 "invalid XML content");
1593 }
1594 }
1595 else
1596 {
1597 doc = xmlNewDoc(version);
1598 Assert(doc->encoding == NULL);
1599 doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1600 doc->standalone = standalone;
1601
1602 /* allow empty content */
1603 if (*(utf8string + count))
1604 {
1605 res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1606 utf8string + count, NULL);
1607 if (res_code != 0 || xmlerrcxt->err_occurred)
1608 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1609 "invalid XML content");
1610 }
1611 }
1612 }
1613 PG_CATCH();
1614 {
1615 if (doc != NULL)
1616 xmlFreeDoc(doc);
1617 if (ctxt != NULL)
1618 xmlFreeParserCtxt(ctxt);
1619
1620 pg_xml_done(xmlerrcxt, true);
1621
1622 PG_RE_THROW();
1623 }
1624 PG_END_TRY();
1625
1626 xmlFreeParserCtxt(ctxt);
1627
1628 pg_xml_done(xmlerrcxt, false);
1629
1630 return doc;
1631}
1632
1633
1634/*
1635 * xmlChar<->text conversions
1636 */
1637static xmlChar *
1638xml_text2xmlChar(text *in)
1639{
1640 return (xmlChar *) text_to_cstring(in);
1641}
1642
1643
1644#ifdef USE_LIBXMLCONTEXT
1645
1646/*
1647 * Manage the special context used for all libxml allocations (but only
1648 * in special debug builds; see notes at top of file)
1649 */
1650static void
1651xml_memory_init(void)
1652{
1653 /* Create memory context if not there already */
1654 if (LibxmlContext == NULL)
1655 LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1656 "Libxml context",
1657 ALLOCSET_DEFAULT_SIZES);
1658
1659 /* Re-establish the callbacks even if already set */
1660 xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1661}
1662
1663/*
1664 * Wrappers for memory management functions
1665 */
1666static void *
1667xml_palloc(size_t size)
1668{
1669 return MemoryContextAlloc(LibxmlContext, size);
1670}
1671
1672
1673static void *
1674xml_repalloc(void *ptr, size_t size)
1675{
1676 return repalloc(ptr, size);
1677}
1678
1679
1680static void
1681xml_pfree(void *ptr)
1682{
1683 /* At least some parts of libxml assume xmlFree(NULL) is allowed */
1684 if (ptr)
1685 pfree(ptr);
1686}
1687
1688
1689static char *
1690xml_pstrdup(const char *string)
1691{
1692 return MemoryContextStrdup(LibxmlContext, string);
1693}
1694#endif /* USE_LIBXMLCONTEXT */
1695
1696
1697/*
1698 * xmlPgEntityLoader --- entity loader callback function
1699 *
1700 * Silently prevent any external entity URL from being loaded. We don't want
1701 * to throw an error, so instead make the entity appear to expand to an empty
1702 * string.
1703 *
1704 * We would prefer to allow loading entities that exist in the system's
1705 * global XML catalog; but the available libxml2 APIs make that a complex
1706 * and fragile task. For now, just shut down all external access.
1707 */
1708static xmlParserInputPtr
1709xmlPgEntityLoader(const char *URL, const char *ID,
1710 xmlParserCtxtPtr ctxt)
1711{
1712 return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
1713}
1714
1715
1716/*
1717 * xml_ereport --- report an XML-related error
1718 *
1719 * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
1720 * standard. This function adds libxml's native error message, if any, as
1721 * detail.
1722 *
1723 * This is exported for modules that want to share the core libxml error
1724 * handler. Note that pg_xml_init() *must* have been called previously.
1725 */
1726void
1727xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
1728{
1729 char *detail;
1730
1731 /* Defend against someone passing us a bogus context struct */
1732 if (errcxt->magic != ERRCXT_MAGIC)
1733 elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
1734
1735 /* Flag that the current libxml error has been reported */
1736 errcxt->err_occurred = false;
1737
1738 /* Include detail only if we have some text from libxml */
1739 if (errcxt->err_buf.len > 0)
1740 detail = errcxt->err_buf.data;
1741 else
1742 detail = NULL;
1743
1744 ereport(level,
1745 (errcode(sqlcode),
1746 errmsg_internal("%s", msg),
1747 detail ? errdetail_internal("%s", detail) : 0));
1748}
1749
1750
1751/*
1752 * Error handler for libxml errors and warnings
1753 */
1754static void
1755xml_errorHandler(void *data, xmlErrorPtr error)
1756{
1757 PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
1758 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
1759 xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
1760 xmlNodePtr node = error->node;
1761 const xmlChar *name = (node != NULL &&
1762 node->type == XML_ELEMENT_NODE) ? node->name : NULL;
1763 int domain = error->domain;
1764 int level = error->level;
1765 StringInfo errorBuf;
1766
1767 /*
1768 * Defend against someone passing us a bogus context struct.
1769 *
1770 * We force a backend exit if this check fails because longjmp'ing out of
1771 * libxml would likely render it unsafe to use further.
1772 */
1773 if (xmlerrcxt->magic != ERRCXT_MAGIC)
1774 elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
1775
1776 /*----------
1777 * Older libxml versions report some errors differently.
1778 * First, some errors were previously reported as coming from the parser
1779 * domain but are now reported as coming from the namespace domain.
1780 * Second, some warnings were upgraded to errors.
1781 * We attempt to compensate for that here.
1782 *----------
1783 */
1784 switch (error->code)
1785 {
1786 case XML_WAR_NS_URI:
1787 level = XML_ERR_ERROR;
1788 domain = XML_FROM_NAMESPACE;
1789 break;
1790
1791 case XML_ERR_NS_DECL_ERROR:
1792 case XML_WAR_NS_URI_RELATIVE:
1793 case XML_WAR_NS_COLUMN:
1794 case XML_NS_ERR_XML_NAMESPACE:
1795 case XML_NS_ERR_UNDEFINED_NAMESPACE:
1796 case XML_NS_ERR_QNAME:
1797 case XML_NS_ERR_ATTRIBUTE_REDEFINED:
1798 case XML_NS_ERR_EMPTY:
1799 domain = XML_FROM_NAMESPACE;
1800 break;
1801 }
1802
1803 /* Decide whether to act on the error or not */
1804 switch (domain)
1805 {
1806 case XML_FROM_PARSER:
1807 case XML_FROM_NONE:
1808 case XML_FROM_MEMORY:
1809 case XML_FROM_IO:
1810
1811 /*
1812 * Suppress warnings about undeclared entities. We need to do
1813 * this to avoid problems due to not loading DTD definitions.
1814 */
1815 if (error->code == XML_WAR_UNDECLARED_ENTITY)
1816 return;
1817
1818 /* Otherwise, accept error regardless of the parsing purpose */
1819 break;
1820
1821 default:
1822 /* Ignore error if only doing well-formedness check */
1823 if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
1824 return;
1825 break;
1826 }
1827
1828 /* Prepare error message in errorBuf */
1829 errorBuf = makeStringInfo();
1830
1831 if (error->line > 0)
1832 appendStringInfo(errorBuf, "line %d: ", error->line);
1833 if (name != NULL)
1834 appendStringInfo(errorBuf, "element %s: ", name);
1835 if (error->message != NULL)
1836 appendStringInfoString(errorBuf, error->message);
1837 else
1838 appendStringInfoString(errorBuf, "(no message provided)");
1839
1840 /*
1841 * Append context information to errorBuf.
1842 *
1843 * xmlParserPrintFileContext() uses libxml's "generic" error handler to
1844 * write the context. Since we don't want to duplicate libxml
1845 * functionality here, we set up a generic error handler temporarily.
1846 *
1847 * We use appendStringInfo() directly as libxml's generic error handler.
1848 * This should work because it has essentially the same signature as
1849 * libxml expects, namely (void *ptr, const char *msg, ...).
1850 */
1851 if (input != NULL)
1852 {
1853 xmlGenericErrorFunc errFuncSaved = xmlGenericError;
1854 void *errCtxSaved = xmlGenericErrorContext;
1855
1856 xmlSetGenericErrorFunc((void *) errorBuf,
1857 (xmlGenericErrorFunc) appendStringInfo);
1858
1859 /* Add context information to errorBuf */
1860 appendStringInfoLineSeparator(errorBuf);
1861
1862 xmlParserPrintFileContext(input);
1863
1864 /* Restore generic error func */
1865 xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
1866 }
1867
1868 /* Get rid of any trailing newlines in errorBuf */
1869 chopStringInfoNewlines(errorBuf);
1870
1871 /*
1872 * Legacy error handling mode. err_occurred is never set, we just add the
1873 * message to err_buf. This mode exists because the xml2 contrib module
1874 * uses our error-handling infrastructure, but we don't want to change its
1875 * behaviour since it's deprecated anyway. This is also why we don't
1876 * distinguish between notices, warnings and errors here --- the old-style
1877 * generic error handler wouldn't have done that either.
1878 */
1879 if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
1880 {
1881 appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1882 appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
1883
1884 pfree(errorBuf->data);
1885 pfree(errorBuf);
1886 return;
1887 }
1888
1889 /*
1890 * We don't want to ereport() here because that'd probably leave libxml in
1891 * an inconsistent state. Instead, we remember the error and ereport()
1892 * from xml_ereport().
1893 *
1894 * Warnings and notices can be reported immediately since they won't cause
1895 * a longjmp() out of libxml.
1896 */
1897 if (level >= XML_ERR_ERROR)
1898 {
1899 appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1900 appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
1901
1902 xmlerrcxt->err_occurred = true;
1903 }
1904 else if (level >= XML_ERR_WARNING)
1905 {
1906 ereport(WARNING,
1907 (errmsg_internal("%s", errorBuf->data)));
1908 }
1909 else
1910 {
1911 ereport(NOTICE,
1912 (errmsg_internal("%s", errorBuf->data)));
1913 }
1914
1915 pfree(errorBuf->data);
1916 pfree(errorBuf);
1917}
1918
1919
1920/*
1921 * Wrapper for "ereport" function for XML-related errors. The "msg"
1922 * is the SQL-level message; some can be adopted from the SQL/XML
1923 * standard. This function uses "code" to create a textual detail
1924 * message. At the moment, we only need to cover those codes that we
1925 * may raise in this file.
1926 */
1927static void
1928xml_ereport_by_code(int level, int sqlcode,
1929 const char *msg, int code)
1930{
1931 const char *det;
1932
1933 switch (code)
1934 {
1935 case XML_ERR_INVALID_CHAR:
1936 det = gettext_noop("Invalid character value.");
1937 break;
1938 case XML_ERR_SPACE_REQUIRED:
1939 det = gettext_noop("Space required.");
1940 break;
1941 case XML_ERR_STANDALONE_VALUE:
1942 det = gettext_noop("standalone accepts only 'yes' or 'no'.");
1943 break;
1944 case XML_ERR_VERSION_MISSING:
1945 det = gettext_noop("Malformed declaration: missing version.");
1946 break;
1947 case XML_ERR_MISSING_ENCODING:
1948 det = gettext_noop("Missing encoding in text declaration.");
1949 break;
1950 case XML_ERR_XMLDECL_NOT_FINISHED:
1951 det = gettext_noop("Parsing XML declaration: '?>' expected.");
1952 break;
1953 default:
1954 det = gettext_noop("Unrecognized libxml error code: %d.");
1955 break;
1956 }
1957
1958 ereport(level,
1959 (errcode(sqlcode),
1960 errmsg_internal("%s", msg),
1961 errdetail(det, code)));
1962}
1963
1964
1965/*
1966 * Remove all trailing newlines from a StringInfo string
1967 */
1968static void
1969chopStringInfoNewlines(StringInfo str)
1970{
1971 while (str->len > 0 && str->data[str->len - 1] == '\n')
1972 str->data[--str->len] = '\0';
1973}
1974
1975
1976/*
1977 * Append a newline after removing any existing trailing newlines
1978 */
1979static void
1980appendStringInfoLineSeparator(StringInfo str)
1981{
1982 chopStringInfoNewlines(str);
1983 if (str->len > 0)
1984 appendStringInfoChar(str, '\n');
1985}
1986
1987
1988/*
1989 * Convert one char in the current server encoding to a Unicode codepoint.
1990 */
1991static pg_wchar
1992sqlchar_to_unicode(const char *s)
1993{
1994 char *utf8string;
1995 pg_wchar ret[2]; /* need space for trailing zero */
1996
1997 /* note we're not assuming s is null-terminated */
1998 utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
1999
2000 pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
2001 pg_encoding_mblen(PG_UTF8, utf8string));
2002
2003 if (utf8string != s)
2004 pfree(utf8string);
2005
2006 return ret[0];
2007}
2008
2009
2010static bool
2011is_valid_xml_namefirst(pg_wchar c)
2012{
2013 /* (Letter | '_' | ':') */
2014 return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2015 || c == '_' || c == ':');
2016}
2017
2018
2019static bool
2020is_valid_xml_namechar(pg_wchar c)
2021{
2022 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
2023 return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2024 || xmlIsDigitQ(c)
2025 || c == '.' || c == '-' || c == '_' || c == ':'
2026 || xmlIsCombiningQ(c)
2027 || xmlIsExtenderQ(c));
2028}
2029#endif /* USE_LIBXML */
2030
2031
2032/*
2033 * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
2034 */
2035char *
2036map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
2037 bool escape_period)
2038{
2039#ifdef USE_LIBXML
2040 StringInfoData buf;
2041 const char *p;
2042
2043 /*
2044 * SQL/XML doesn't make use of this case anywhere, so it's probably a
2045 * mistake.
2046 */
2047 Assert(fully_escaped || !escape_period);
2048
2049 initStringInfo(&buf);
2050
2051 for (p = ident; *p; p += pg_mblen(p))
2052 {
2053 if (*p == ':' && (p == ident || fully_escaped))
2054 appendStringInfoString(&buf, "_x003A_");
2055 else if (*p == '_' && *(p + 1) == 'x')
2056 appendStringInfoString(&buf, "_x005F_");
2057 else if (fully_escaped && p == ident &&
2058 pg_strncasecmp(p, "xml", 3) == 0)
2059 {
2060 if (*p == 'x')
2061 appendStringInfoString(&buf, "_x0078_");
2062 else
2063 appendStringInfoString(&buf, "_x0058_");
2064 }
2065 else if (escape_period && *p == '.')
2066 appendStringInfoString(&buf, "_x002E_");
2067 else
2068 {
2069 pg_wchar u = sqlchar_to_unicode(p);
2070
2071 if ((p == ident)
2072 ? !is_valid_xml_namefirst(u)
2073 : !is_valid_xml_namechar(u))
2074 appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
2075 else
2076 appendBinaryStringInfo(&buf, p, pg_mblen(p));
2077 }
2078 }
2079
2080 return buf.data;
2081#else /* not USE_LIBXML */
2082 NO_XML_SUPPORT();
2083 return NULL;
2084#endif /* not USE_LIBXML */
2085}
2086
2087
2088/*
2089 * Map a Unicode codepoint into the current server encoding.
2090 */
2091static char *
2092unicode_to_sqlchar(pg_wchar c)
2093{
2094 char utf8string[8]; /* need room for trailing zero */
2095 char *result;
2096
2097 memset(utf8string, 0, sizeof(utf8string));
2098 unicode_to_utf8(c, (unsigned char *) utf8string);
2099
2100 result = pg_any_to_server(utf8string, strlen(utf8string), PG_UTF8);
2101 /* if pg_any_to_server didn't strdup, we must */
2102 if (result == utf8string)
2103 result = pstrdup(result);
2104 return result;
2105}
2106
2107
2108/*
2109 * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
2110 */
2111char *
2112map_xml_name_to_sql_identifier(const char *name)
2113{
2114 StringInfoData buf;
2115 const char *p;
2116
2117 initStringInfo(&buf);
2118
2119 for (p = name; *p; p += pg_mblen(p))
2120 {
2121 if (*p == '_' && *(p + 1) == 'x'
2122 && isxdigit((unsigned char) *(p + 2))
2123 && isxdigit((unsigned char) *(p + 3))
2124 && isxdigit((unsigned char) *(p + 4))
2125 && isxdigit((unsigned char) *(p + 5))
2126 && *(p + 6) == '_')
2127 {
2128 unsigned int u;
2129
2130 sscanf(p + 2, "%X", &u);
2131 appendStringInfoString(&buf, unicode_to_sqlchar(u));
2132 p += 6;
2133 }
2134 else
2135 appendBinaryStringInfo(&buf, p, pg_mblen(p));
2136 }
2137
2138 return buf.data;
2139}
2140
2141/*
2142 * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2143 *
2144 * When xml_escape_strings is true, then certain characters in string
2145 * values are replaced by entity references (&lt; etc.), as specified
2146 * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is
2147 * wanted. The false case is mainly useful when the resulting value
2148 * is used with xmlTextWriterWriteAttribute() to write out an
2149 * attribute, because that function does the escaping itself.
2150 */
2151char *
2152map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2153{
2154 if (type_is_array_domain(type))
2155 {
2156 ArrayType *array;
2157 Oid elmtype;
2158 int16 elmlen;
2159 bool elmbyval;
2160 char elmalign;
2161 int num_elems;
2162 Datum *elem_values;
2163 bool *elem_nulls;
2164 StringInfoData buf;
2165 int i;
2166
2167 array = DatumGetArrayTypeP(value);
2168 elmtype = ARR_ELEMTYPE(array);
2169 get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2170
2171 deconstruct_array(array, elmtype,
2172 elmlen, elmbyval, elmalign,
2173 &elem_values, &elem_nulls,
2174 &num_elems);
2175
2176 initStringInfo(&buf);
2177
2178 for (i = 0; i < num_elems; i++)
2179 {
2180 if (elem_nulls[i])
2181 continue;
2182 appendStringInfoString(&buf, "<element>");
2183 appendStringInfoString(&buf,
2184 map_sql_value_to_xml_value(elem_values[i],
2185 elmtype, true));
2186 appendStringInfoString(&buf, "</element>");
2187 }
2188
2189 pfree(elem_values);
2190 pfree(elem_nulls);
2191
2192 return buf.data;
2193 }
2194 else
2195 {
2196 Oid typeOut;
2197 bool isvarlena;
2198 char *str;
2199
2200 /*
2201 * Flatten domains; the special-case treatments below should apply to,
2202 * eg, domains over boolean not just boolean.
2203 */
2204 type = getBaseType(type);
2205
2206 /*
2207 * Special XSD formatting for some data types
2208 */
2209 switch (type)
2210 {
2211 case BOOLOID:
2212 if (DatumGetBool(value))
2213 return "true";
2214 else
2215 return "false";
2216
2217 case DATEOID:
2218 {
2219 DateADT date;
2220 struct pg_tm tm;
2221 char buf[MAXDATELEN + 1];
2222
2223 date = DatumGetDateADT(value);
2224 /* XSD doesn't support infinite values */
2225 if (DATE_NOT_FINITE(date))
2226 ereport(ERROR,
2227 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2228 errmsg("date out of range"),
2229 errdetail("XML does not support infinite date values.")));
2230 j2date(date + POSTGRES_EPOCH_JDATE,
2231 &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2232 EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2233
2234 return pstrdup(buf);
2235 }
2236
2237 case TIMESTAMPOID:
2238 {
2239 Timestamp timestamp;
2240 struct pg_tm tm;
2241 fsec_t fsec;
2242 char buf[MAXDATELEN + 1];
2243
2244 timestamp = DatumGetTimestamp(value);
2245
2246 /* XSD doesn't support infinite values */
2247 if (TIMESTAMP_NOT_FINITE(timestamp))
2248 ereport(ERROR,
2249 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2250 errmsg("timestamp out of range"),
2251 errdetail("XML does not support infinite timestamp values.")));
2252 else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2253 EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2254 else
2255 ereport(ERROR,
2256 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2257 errmsg("timestamp out of range")));
2258
2259 return pstrdup(buf);
2260 }
2261
2262 case TIMESTAMPTZOID:
2263 {
2264 TimestampTz timestamp;
2265 struct pg_tm tm;
2266 int tz;
2267 fsec_t fsec;
2268 const char *tzn = NULL;
2269 char buf[MAXDATELEN + 1];
2270
2271 timestamp = DatumGetTimestamp(value);
2272
2273 /* XSD doesn't support infinite values */
2274 if (TIMESTAMP_NOT_FINITE(timestamp))
2275 ereport(ERROR,
2276 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2277 errmsg("timestamp out of range"),
2278 errdetail("XML does not support infinite timestamp values.")));
2279 else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2280 EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2281 else
2282 ereport(ERROR,
2283 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2284 errmsg("timestamp out of range")));
2285
2286 return pstrdup(buf);
2287 }
2288
2289#ifdef USE_LIBXML
2290 case BYTEAOID:
2291 {
2292 bytea *bstr = DatumGetByteaPP(value);
2293 PgXmlErrorContext *xmlerrcxt;
2294 volatile xmlBufferPtr buf = NULL;
2295 volatile xmlTextWriterPtr writer = NULL;
2296 char *result;
2297
2298 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2299
2300 PG_TRY();
2301 {
2302 buf = xmlBufferCreate();
2303 if (buf == NULL || xmlerrcxt->err_occurred)
2304 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2305 "could not allocate xmlBuffer");
2306 writer = xmlNewTextWriterMemory(buf, 0);
2307 if (writer == NULL || xmlerrcxt->err_occurred)
2308 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2309 "could not allocate xmlTextWriter");
2310
2311 if (xmlbinary == XMLBINARY_BASE64)
2312 xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2313 0, VARSIZE_ANY_EXHDR(bstr));
2314 else
2315 xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2316 0, VARSIZE_ANY_EXHDR(bstr));
2317
2318 /* we MUST do this now to flush data out to the buffer */
2319 xmlFreeTextWriter(writer);
2320 writer = NULL;
2321
2322 result = pstrdup((const char *) xmlBufferContent(buf));
2323 }
2324 PG_CATCH();
2325 {
2326 if (writer)
2327 xmlFreeTextWriter(writer);
2328 if (buf)
2329 xmlBufferFree(buf);
2330
2331 pg_xml_done(xmlerrcxt, true);
2332
2333 PG_RE_THROW();
2334 }
2335 PG_END_TRY();
2336
2337 xmlBufferFree(buf);
2338
2339 pg_xml_done(xmlerrcxt, false);
2340
2341 return result;
2342 }
2343#endif /* USE_LIBXML */
2344
2345 }
2346
2347 /*
2348 * otherwise, just use the type's native text representation
2349 */
2350 getTypeOutputInfo(type, &typeOut, &isvarlena);
2351 str = OidOutputFunctionCall(typeOut, value);
2352
2353 /* ... exactly as-is for XML, and when escaping is not wanted */
2354 if (type == XMLOID || !xml_escape_strings)
2355 return str;
2356
2357 /* otherwise, translate special characters as needed */
2358 return escape_xml(str);
2359 }
2360}
2361
2362
2363/*
2364 * Escape characters in text that have special meanings in XML.
2365 *
2366 * Returns a palloc'd string.
2367 *
2368 * NB: this is intentionally not dependent on libxml.
2369 */
2370char *
2371escape_xml(const char *str)
2372{
2373 StringInfoData buf;
2374 const char *p;
2375
2376 initStringInfo(&buf);
2377 for (p = str; *p; p++)
2378 {
2379 switch (*p)
2380 {
2381 case '&':
2382 appendStringInfoString(&buf, "&amp;");
2383 break;
2384 case '<':
2385 appendStringInfoString(&buf, "&lt;");
2386 break;
2387 case '>':
2388 appendStringInfoString(&buf, "&gt;");
2389 break;
2390 case '\r':
2391 appendStringInfoString(&buf, "&#x0d;");
2392 break;
2393 default:
2394 appendStringInfoCharMacro(&buf, *p);
2395 break;
2396 }
2397 }
2398 return buf.data;
2399}
2400
2401
2402static char *
2403_SPI_strdup(const char *s)
2404{
2405 size_t len = strlen(s) + 1;
2406 char *ret = SPI_palloc(len);
2407
2408 memcpy(ret, s, len);
2409 return ret;
2410}
2411
2412
2413/*
2414 * SQL to XML mapping functions
2415 *
2416 * What follows below was at one point intentionally organized so that
2417 * you can read along in the SQL/XML standard. The functions are
2418 * mostly split up the way the clauses lay out in the standards
2419 * document, and the identifiers are also aligned with the standard
2420 * text. Unfortunately, SQL/XML:2006 reordered the clauses
2421 * differently than SQL/XML:2003, so the order below doesn't make much
2422 * sense anymore.
2423 *
2424 * There are many things going on there:
2425 *
2426 * There are two kinds of mappings: Mapping SQL data (table contents)
2427 * to XML documents, and mapping SQL structure (the "schema") to XML
2428 * Schema. And there are functions that do both at the same time.
2429 *
2430 * Then you can map a database, a schema, or a table, each in both
2431 * ways. This breaks down recursively: Mapping a database invokes
2432 * mapping schemas, which invokes mapping tables, which invokes
2433 * mapping rows, which invokes mapping columns, although you can't
2434 * call the last two from the outside. Because of this, there are a
2435 * number of xyz_internal() functions which are to be called both from
2436 * the function manager wrapper and from some upper layer in a
2437 * recursive call.
2438 *
2439 * See the documentation about what the common function arguments
2440 * nulls, tableforest, and targetns mean.
2441 *
2442 * Some style guidelines for XML output: Use double quotes for quoting
2443 * XML attributes. Indent XML elements by two spaces, but remember
2444 * that a lot of code is called recursively at different levels, so
2445 * it's better not to indent rather than create output that indents
2446 * and outdents weirdly. Add newlines to make the output look nice.
2447 */
2448
2449
2450/*
2451 * Visibility of objects for XML mappings; see SQL/XML:2008 section
2452 * 4.10.8.
2453 */
2454
2455/*
2456 * Given a query, which must return type oid as first column, produce
2457 * a list of Oids with the query results.
2458 */
2459static List *
2460query_to_oid_list(const char *query)
2461{
2462 uint64 i;
2463 List *list = NIL;
2464
2465 SPI_execute(query, true, 0);
2466
2467 for (i = 0; i < SPI_processed; i++)
2468 {
2469 Datum oid;
2470 bool isnull;
2471
2472 oid = SPI_getbinval(SPI_tuptable->vals[i],
2473 SPI_tuptable->tupdesc,
2474 1,
2475 &isnull);
2476 if (!isnull)
2477 list = lappend_oid(list, DatumGetObjectId(oid));
2478 }
2479
2480 return list;
2481}
2482
2483
2484static List *
2485schema_get_xml_visible_tables(Oid nspid)
2486{
2487 StringInfoData query;
2488
2489 initStringInfo(&query);
2490 appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
2491 " WHERE relnamespace = %u AND relkind IN ("
2492 CppAsString2(RELKIND_RELATION) ","
2493 CppAsString2(RELKIND_MATVIEW) ","
2494 CppAsString2(RELKIND_VIEW) ")"
2495 " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
2496 " ORDER BY relname;", nspid);
2497
2498 return query_to_oid_list(query.data);
2499}
2500
2501
2502/*
2503 * Including the system schemas is probably not useful for a database
2504 * mapping.
2505 */
2506#define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2507
2508#define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2509
2510
2511static List *
2512database_get_xml_visible_schemas(void)
2513{
2514 return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2515}
2516
2517
2518static List *
2519database_get_xml_visible_tables(void)
2520{
2521 /* At the moment there is no order required here. */
2522 return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
2523 " WHERE relkind IN ("
2524 CppAsString2(RELKIND_RELATION) ","
2525 CppAsString2(RELKIND_MATVIEW) ","
2526 CppAsString2(RELKIND_VIEW) ")"
2527 " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
2528 " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2529}
2530
2531
2532/*
2533 * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2534 * section 9.11.
2535 */
2536
2537static StringInfo
2538table_to_xml_internal(Oid relid,
2539 const char *xmlschema, bool nulls, bool tableforest,
2540 const char *targetns, bool top_level)
2541{
2542 StringInfoData query;
2543
2544 initStringInfo(&query);
2545 appendStringInfo(&query, "SELECT * FROM %s",
2546 DatumGetCString(DirectFunctionCall1(regclassout,
2547 ObjectIdGetDatum(relid))));
2548 return query_to_xml_internal(query.data, get_rel_name(relid),
2549 xmlschema, nulls, tableforest,
2550 targetns, top_level);
2551}
2552
2553
2554Datum
2555table_to_xml(PG_FUNCTION_ARGS)
2556{
2557 Oid relid = PG_GETARG_OID(0);
2558 bool nulls = PG_GETARG_BOOL(1);
2559 bool tableforest = PG_GETARG_BOOL(2);
2560 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2561
2562 PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2563 nulls, tableforest,
2564 targetns, true)));
2565}
2566
2567
2568Datum
2569query_to_xml(PG_FUNCTION_ARGS)
2570{
2571 char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2572 bool nulls = PG_GETARG_BOOL(1);
2573 bool tableforest = PG_GETARG_BOOL(2);
2574 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2575
2576 PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2577 NULL, nulls, tableforest,
2578 targetns, true)));
2579}
2580
2581
2582Datum
2583cursor_to_xml(PG_FUNCTION_ARGS)
2584{
2585 char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2586 int32 count = PG_GETARG_INT32(1);
2587 bool nulls = PG_GETARG_BOOL(2);
2588 bool tableforest = PG_GETARG_BOOL(3);
2589 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2590
2591 StringInfoData result;
2592 Portal portal;
2593 uint64 i;
2594
2595 initStringInfo(&result);
2596
2597 if (!tableforest)
2598 {
2599 xmldata_root_element_start(&result, "table", NULL, targetns, true);
2600 appendStringInfoChar(&result, '\n');
2601 }
2602
2603 SPI_connect();
2604 portal = SPI_cursor_find(name);
2605 if (portal == NULL)
2606 ereport(ERROR,
2607 (errcode(ERRCODE_UNDEFINED_CURSOR),
2608 errmsg("cursor \"%s\" does not exist", name)));
2609
2610 SPI_cursor_fetch(portal, true, count);
2611 for (i = 0; i < SPI_processed; i++)
2612 SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2613 tableforest, targetns, true);
2614
2615 SPI_finish();
2616
2617 if (!tableforest)
2618 xmldata_root_element_end(&result, "table");
2619
2620 PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2621}
2622
2623
2624/*
2625 * Write the start tag of the root element of a data mapping.
2626 *
2627 * top_level means that this is the very top level of the eventual
2628 * output. For example, when the user calls table_to_xml, then a call
2629 * with a table name to this function is the top level. When the user
2630 * calls database_to_xml, then a call with a schema name to this
2631 * function is not the top level. If top_level is false, then the XML
2632 * namespace declarations are omitted, because they supposedly already
2633 * appeared earlier in the output. Repeating them is not wrong, but
2634 * it looks ugly.
2635 */
2636static void
2637xmldata_root_element_start(StringInfo result, const char *eltname,
2638 const char *xmlschema, const char *targetns,
2639 bool top_level)
2640{
2641 /* This isn't really wrong but currently makes no sense. */
2642 Assert(top_level || !xmlschema);
2643
2644 appendStringInfo(result, "<%s", eltname);
2645 if (top_level)
2646 {
2647 appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
2648 if (strlen(targetns) > 0)
2649 appendStringInfo(result, " xmlns=\"%s\"", targetns);
2650 }
2651 if (xmlschema)
2652 {
2653 /* FIXME: better targets */
2654 if (strlen(targetns) > 0)
2655 appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
2656 else
2657 appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
2658 }
2659 appendStringInfoString(result, ">\n");
2660}
2661
2662
2663static void
2664xmldata_root_element_end(StringInfo result, const char *eltname)
2665{
2666 appendStringInfo(result, "</%s>\n", eltname);
2667}
2668
2669
2670static StringInfo
2671query_to_xml_internal(const char *query, char *tablename,
2672 const char *xmlschema, bool nulls, bool tableforest,
2673 const char *targetns, bool top_level)
2674{
2675 StringInfo result;
2676 char *xmltn;
2677 uint64 i;
2678
2679 if (tablename)
2680 xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
2681 else
2682 xmltn = "table";
2683
2684 result = makeStringInfo();
2685
2686 SPI_connect();
2687 if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
2688 ereport(ERROR,
2689 (errcode(ERRCODE_DATA_EXCEPTION),
2690 errmsg("invalid query")));
2691
2692 if (!tableforest)
2693 {
2694 xmldata_root_element_start(result, xmltn, xmlschema,
2695 targetns, top_level);
2696 appendStringInfoChar(result, '\n');
2697 }
2698
2699 if (xmlschema)
2700 appendStringInfo(result, "%s\n\n", xmlschema);
2701
2702 for (i = 0; i < SPI_processed; i++)
2703 SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
2704 tableforest, targetns, top_level);
2705
2706 if (!tableforest)
2707 xmldata_root_element_end(result, xmltn);
2708
2709 SPI_finish();
2710
2711 return result;
2712}
2713
2714
2715Datum
2716table_to_xmlschema(PG_FUNCTION_ARGS)
2717{
2718 Oid relid = PG_GETARG_OID(0);
2719 bool nulls = PG_GETARG_BOOL(1);
2720 bool tableforest = PG_GETARG_BOOL(2);
2721 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2722 const char *result;
2723 Relation rel;
2724
2725 rel = table_open(relid, AccessShareLock);
2726 result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2727 tableforest, targetns);
2728 table_close(rel, NoLock);
2729
2730 PG_RETURN_XML_P(cstring_to_xmltype(result));
2731}
2732
2733
2734Datum
2735query_to_xmlschema(PG_FUNCTION_ARGS)
2736{
2737 char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2738 bool nulls = PG_GETARG_BOOL(1);
2739 bool tableforest = PG_GETARG_BOOL(2);
2740 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2741 const char *result;
2742 SPIPlanPtr plan;
2743 Portal portal;
2744
2745 SPI_connect();
2746
2747 if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2748 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2749
2750 if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2751 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2752
2753 result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2754 InvalidOid, nulls,
2755 tableforest, targetns));
2756 SPI_cursor_close(portal);
2757 SPI_finish();
2758
2759 PG_RETURN_XML_P(cstring_to_xmltype(result));
2760}
2761
2762
2763Datum
2764cursor_to_xmlschema(PG_FUNCTION_ARGS)
2765{
2766 char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2767 bool nulls = PG_GETARG_BOOL(1);
2768 bool tableforest = PG_GETARG_BOOL(2);
2769 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2770 const char *xmlschema;
2771 Portal portal;
2772
2773 SPI_connect();
2774 portal = SPI_cursor_find(name);
2775 if (portal == NULL)
2776 ereport(ERROR,
2777 (errcode(ERRCODE_UNDEFINED_CURSOR),
2778 errmsg("cursor \"%s\" does not exist", name)));
2779
2780 xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2781 InvalidOid, nulls,
2782 tableforest, targetns));
2783 SPI_finish();
2784
2785 PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
2786}
2787
2788
2789Datum
2790table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2791{
2792 Oid relid = PG_GETARG_OID(0);
2793 bool nulls = PG_GETARG_BOOL(1);
2794 bool tableforest = PG_GETARG_BOOL(2);
2795 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2796 Relation rel;
2797 const char *xmlschema;
2798
2799 rel = table_open(relid, AccessShareLock);
2800 xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2801 tableforest, targetns);
2802 table_close(rel, NoLock);
2803
2804 PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
2805 xmlschema, nulls, tableforest,
2806 targetns, true)));
2807}
2808
2809
2810Datum
2811query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2812{
2813 char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2814 bool nulls = PG_GETARG_BOOL(1);
2815 bool tableforest = PG_GETARG_BOOL(2);
2816 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2817
2818 const char *xmlschema;
2819 SPIPlanPtr plan;
2820 Portal portal;
2821
2822 SPI_connect();
2823
2824 if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2825 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2826
2827 if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2828 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2829
2830 xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2831 InvalidOid, nulls, tableforest, targetns));
2832 SPI_cursor_close(portal);
2833 SPI_finish();
2834
2835 PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2836 xmlschema, nulls, tableforest,
2837 targetns, true)));
2838}
2839
2840
2841/*
2842 * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
2843 * sections 9.13, 9.14.
2844 */
2845
2846static StringInfo
2847schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
2848 bool tableforest, const char *targetns, bool top_level)
2849{
2850 StringInfo result;
2851 char *xmlsn;
2852 List *relid_list;
2853 ListCell *cell;
2854
2855 xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
2856 true, false);
2857 result = makeStringInfo();
2858
2859 xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
2860 appendStringInfoChar(result, '\n');
2861
2862 if (xmlschema)
2863 appendStringInfo(result, "%s\n\n", xmlschema);
2864
2865 SPI_connect();
2866
2867 relid_list = schema_get_xml_visible_tables(nspid);
2868
2869 foreach(cell, relid_list)
2870 {
2871 Oid relid = lfirst_oid(cell);
2872 StringInfo subres;
2873
2874 subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
2875 targetns, false);
2876
2877 appendStringInfoString(result, subres->data);
2878 appendStringInfoChar(result, '\n');
2879 }
2880
2881 SPI_finish();
2882
2883 xmldata_root_element_end(result, xmlsn);
2884
2885 return result;
2886}
2887
2888
2889Datum
2890schema_to_xml(PG_FUNCTION_ARGS)
2891{
2892 Name name = PG_GETARG_NAME(0);
2893 bool nulls = PG_GETARG_BOOL(1);
2894 bool tableforest = PG_GETARG_BOOL(2);
2895 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2896
2897 char *schemaname;
2898 Oid nspid;
2899
2900 schemaname = NameStr(*name);
2901 nspid = LookupExplicitNamespace(schemaname, false);
2902
2903 PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
2904 nulls, tableforest, targetns, true)));
2905}
2906
2907
2908/*
2909 * Write the start element of the root element of an XML Schema mapping.
2910 */
2911static void
2912xsd_schema_element_start(StringInfo result, const char *targetns)
2913{
2914 appendStringInfoString(result,
2915 "<xsd:schema\n"
2916 " xmlns:xsd=\"" NAMESPACE_XSD "\"");
2917 if (strlen(targetns) > 0)
2918 appendStringInfo(result,
2919 "\n"
2920 " targetNamespace=\"%s\"\n"
2921 " elementFormDefault=\"qualified\"",
2922 targetns);
2923 appendStringInfoString(result,
2924 ">\n\n");
2925}
2926
2927
2928static void
2929xsd_schema_element_end(StringInfo result)
2930{
2931 appendStringInfoString(result, "</xsd:schema>");
2932}
2933
2934
2935static StringInfo
2936schema_to_xmlschema_internal(const char *schemaname, bool nulls,
2937 bool tableforest, const char *targetns)
2938{
2939 Oid nspid;
2940 List *relid_list;
2941 List *tupdesc_list;
2942 ListCell *cell;
2943 StringInfo result;
2944
2945 result = makeStringInfo();
2946
2947 nspid = LookupExplicitNamespace(schemaname, false);
2948
2949 xsd_schema_element_start(result, targetns);
2950
2951 SPI_connect();
2952
2953 relid_list = schema_get_xml_visible_tables(nspid);
2954
2955 tupdesc_list = NIL;
2956 foreach(cell, relid_list)
2957 {
2958 Relation rel;
2959
2960 rel = table_open(lfirst_oid(cell), AccessShareLock);
2961 tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
2962 table_close(rel, NoLock);
2963 }
2964
2965 appendStringInfoString(result,
2966 map_sql_typecoll_to_xmlschema_types(tupdesc_list));
2967
2968 appendStringInfoString(result,
2969 map_sql_schema_to_xmlschema_types(nspid, relid_list,
2970 nulls, tableforest, targetns));
2971
2972 xsd_schema_element_end(result);
2973
2974 SPI_finish();
2975
2976 return result;
2977}
2978
2979
2980Datum
2981schema_to_xmlschema(PG_FUNCTION_ARGS)
2982{
2983 Name name = PG_GETARG_NAME(0);
2984 bool nulls = PG_GETARG_BOOL(1);
2985 bool tableforest = PG_GETARG_BOOL(2);
2986 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2987
2988 PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
2989 nulls, tableforest, targetns)));
2990}
2991
2992
2993Datum
2994schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2995{
2996 Name name = PG_GETARG_NAME(0);
2997 bool nulls = PG_GETARG_BOOL(1);
2998 bool tableforest = PG_GETARG_BOOL(2);
2999 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3000 char *schemaname;
3001 Oid nspid;
3002 StringInfo xmlschema;
3003
3004 schemaname = NameStr(*name);
3005 nspid = LookupExplicitNamespace(schemaname, false);
3006
3007 xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
3008 tableforest, targetns);
3009
3010 PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
3011 xmlschema->data, nulls,
3012 tableforest, targetns, true)));
3013}
3014
3015
3016/*
3017 * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
3018 * sections 9.16, 9.17.
3019 */
3020
3021static StringInfo
3022database_to_xml_internal(const char *xmlschema, bool nulls,
3023 bool tableforest, const char *targetns)
3024{
3025 StringInfo result;
3026 List *nspid_list;
3027 ListCell *cell;
3028 char *xmlcn;
3029
3030 xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
3031 true, false);
3032 result = makeStringInfo();
3033
3034 xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
3035 appendStringInfoChar(result, '\n');
3036
3037 if (xmlschema)
3038 appendStringInfo(result, "%s\n\n", xmlschema);
3039
3040 SPI_connect();
3041
3042 nspid_list = database_get_xml_visible_schemas();
3043
3044 foreach(cell, nspid_list)
3045 {
3046 Oid nspid = lfirst_oid(cell);
3047 StringInfo subres;
3048
3049 subres = schema_to_xml_internal(nspid, NULL, nulls,
3050 tableforest, targetns, false);
3051
3052 appendStringInfoString(result, subres->data);
3053 appendStringInfoChar(result, '\n');
3054 }
3055
3056 SPI_finish();
3057
3058 xmldata_root_element_end(result, xmlcn);
3059
3060 return result;
3061}
3062
3063
3064Datum
3065database_to_xml(PG_FUNCTION_ARGS)
3066{
3067 bool nulls = PG_GETARG_BOOL(0);
3068 bool tableforest = PG_GETARG_BOOL(1);
3069 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3070
3071 PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
3072 tableforest, targetns)));
3073}
3074
3075
3076static StringInfo
3077database_to_xmlschema_internal(bool nulls, bool tableforest,
3078 const char *targetns)
3079{
3080 List *relid_list;
3081 List *nspid_list;
3082 List *tupdesc_list;
3083 ListCell *cell;
3084 StringInfo result;
3085
3086 result = makeStringInfo();
3087
3088 xsd_schema_element_start(result, targetns);
3089
3090 SPI_connect();
3091
3092 relid_list = database_get_xml_visible_tables();
3093 nspid_list = database_get_xml_visible_schemas();
3094
3095 tupdesc_list = NIL;
3096 foreach(cell, relid_list)
3097 {
3098 Relation rel;
3099
3100 rel = table_open(lfirst_oid(cell), AccessShareLock);
3101 tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3102 table_close(rel, NoLock);
3103 }
3104
3105 appendStringInfoString(result,
3106 map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3107
3108 appendStringInfoString(result,
3109 map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
3110
3111 xsd_schema_element_end(result);
3112
3113 SPI_finish();
3114
3115 return result;
3116}
3117
3118
3119Datum
3120database_to_xmlschema(PG_FUNCTION_ARGS)
3121{
3122 bool nulls = PG_GETARG_BOOL(0);
3123 bool tableforest = PG_GETARG_BOOL(1);
3124 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3125
3126 PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
3127 tableforest, targetns)));
3128}
3129
3130
3131Datum
3132database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3133{
3134 bool nulls = PG_GETARG_BOOL(0);
3135 bool tableforest = PG_GETARG_BOOL(1);
3136 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3137 StringInfo xmlschema;
3138
3139 xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
3140
3141 PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
3142 nulls, tableforest, targetns)));
3143}
3144
3145
3146/*
3147 * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
3148 * 9.2.
3149 */
3150static char *
3151map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d)
3152{
3153 StringInfoData result;
3154
3155 initStringInfo(&result);
3156
3157 if (a)
3158 appendStringInfoString(&result,
3159 map_sql_identifier_to_xml_name(a, true, true));
3160 if (b)
3161 appendStringInfo(&result, ".%s",
3162 map_sql_identifier_to_xml_name(b, true, true));
3163 if (c)
3164 appendStringInfo(&result, ".%s",
3165 map_sql_identifier_to_xml_name(c, true, true));
3166 if (d)
3167 appendStringInfo(&result, ".%s",
3168 map_sql_identifier_to_xml_name(d, true, true));
3169
3170 return result.data;
3171}
3172
3173
3174/*
3175 * Map an SQL table to an XML Schema document; see SQL/XML:2008
3176 * section 9.11.
3177 *
3178 * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3179 * 9.9.
3180 */
3181static const char *
3182map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3183 bool tableforest, const char *targetns)
3184{
3185 int i;
3186 char *xmltn;
3187 char *tabletypename;
3188 char *rowtypename;
3189 StringInfoData result;
3190
3191 initStringInfo(&result);
3192
3193 if (OidIsValid(relid))
3194 {
3195 HeapTuple tuple;
3196 Form_pg_class reltuple;
3197
3198 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3199 if (!HeapTupleIsValid(tuple))
3200 elog(ERROR, "cache lookup failed for relation %u", relid);
3201 reltuple = (Form_pg_class) GETSTRUCT(tuple);
3202
3203 xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3204 true, false);
3205
3206 tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3207 get_database_name(MyDatabaseId),
3208 get_namespace_name(reltuple->relnamespace),
3209 NameStr(reltuple->relname));
3210
3211 rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3212 get_database_name(MyDatabaseId),
3213 get_namespace_name(reltuple->relnamespace),
3214 NameStr(reltuple->relname));
3215
3216 ReleaseSysCache(tuple);
3217 }
3218 else
3219 {
3220 if (tableforest)
3221 xmltn = "row";
3222 else
3223 xmltn = "table";
3224
3225 tabletypename = "TableType";
3226 rowtypename = "RowType";
3227 }
3228
3229 xsd_schema_element_start(&result, targetns);
3230
3231 appendStringInfoString(&result,
3232 map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3233
3234 appendStringInfo(&result,
3235 "<xsd:complexType name=\"%s\">\n"
3236 " <xsd:sequence>\n",
3237 rowtypename);
3238
3239 for (i = 0; i < tupdesc->natts; i++)
3240 {
3241 Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3242
3243 if (att->attisdropped)
3244 continue;
3245 appendStringInfo(&result,
3246 " <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3247 map_sql_identifier_to_xml_name(NameStr(att->attname),
3248 true, false),
3249 map_sql_type_to_xml_name(att->atttypid, -1),
3250 nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3251 }
3252
3253 appendStringInfoString(&result,
3254 " </xsd:sequence>\n"
3255 "</xsd:complexType>\n\n");
3256
3257 if (!tableforest)
3258 {
3259 appendStringInfo(&result,
3260 "<xsd:complexType name=\"%s\">\n"
3261 " <xsd:sequence>\n"
3262 " <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3263 " </xsd:sequence>\n"
3264 "</xsd:complexType>\n\n",
3265 tabletypename, rowtypename);
3266
3267 appendStringInfo(&result,
3268 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3269 xmltn, tabletypename);
3270 }
3271 else
3272 appendStringInfo(&result,
3273 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3274 xmltn, rowtypename);
3275
3276 xsd_schema_element_end(&result);
3277
3278 return result.data;
3279}
3280
3281
3282/*
3283 * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3284 * section 9.12.
3285 */
3286static const char *
3287map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3288 bool tableforest, const char *targetns)
3289{
3290 char *dbname;
3291 char *nspname;
3292 char *xmlsn;
3293 char *schematypename;
3294 StringInfoData result;
3295 ListCell *cell;
3296
3297 dbname = get_database_name(MyDatabaseId);
3298 nspname = get_namespace_name(nspid);
3299
3300 initStringInfo(&result);
3301
3302 xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3303
3304 schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3305 dbname,
3306 nspname,
3307 NULL);
3308
3309 appendStringInfo(&result,
3310 "<xsd:complexType name=\"%s\">\n", schematypename);
3311 if (!tableforest)
3312 appendStringInfoString(&result,
3313 " <xsd:all>\n");
3314 else
3315 appendStringInfoString(&result,
3316 " <xsd:sequence>\n");
3317
3318 foreach(cell, relid_list)
3319 {
3320 Oid relid = lfirst_oid(cell);
3321 char *relname = get_rel_name(relid);
3322 char *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3323 char *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3324 dbname,
3325 nspname,
3326 relname);
3327
3328 if (!tableforest)
3329 appendStringInfo(&result,
3330 " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3331 xmltn, tabletypename);
3332 else
3333 appendStringInfo(&result,
3334 " <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3335 xmltn, tabletypename);
3336 }
3337
3338 if (!tableforest)
3339 appendStringInfoString(&result,
3340 " </xsd:all>\n");
3341 else
3342 appendStringInfoString(&result,
3343 " </xsd:sequence>\n");
3344 appendStringInfoString(&result,
3345 "</xsd:complexType>\n\n");
3346
3347 appendStringInfo(&result,
3348 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3349 xmlsn, schematypename);
3350
3351 return result.data;
3352}
3353
3354
3355/*
3356 * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3357 * section 9.15.
3358 */
3359static const char *
3360map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3361 bool tableforest, const char *targetns)
3362{
3363 char *dbname;
3364 char *xmlcn;
3365 char *catalogtypename;
3366 StringInfoData result;
3367 ListCell *cell;
3368
3369 dbname = get_database_name(MyDatabaseId);
3370
3371 initStringInfo(&result);
3372
3373 xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3374
3375 catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3376 dbname,
3377 NULL,
3378 NULL);
3379
3380 appendStringInfo(&result,
3381 "<xsd:complexType name=\"%s\">\n", catalogtypename);
3382 appendStringInfoString(&result,
3383 " <xsd:all>\n");
3384
3385 foreach(cell, nspid_list)
3386 {
3387 Oid nspid = lfirst_oid(cell);
3388 char *nspname = get_namespace_name(nspid);
3389 char *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3390 char *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3391 dbname,
3392 nspname,
3393 NULL);
3394
3395 appendStringInfo(&result,
3396 " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3397 xmlsn, schematypename);
3398 }
3399
3400 appendStringInfoString(&result,
3401 " </xsd:all>\n");
3402 appendStringInfoString(&result,
3403 "</xsd:complexType>\n\n");
3404
3405 appendStringInfo(&result,
3406 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3407 xmlcn, catalogtypename);
3408
3409 return result.data;
3410}
3411
3412
3413/*
3414 * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3415 */
3416static const char *
3417map_sql_type_to_xml_name(Oid typeoid, int typmod)
3418{
3419 StringInfoData result;
3420
3421 initStringInfo(&result);
3422
3423 switch (typeoid)
3424 {
3425 case BPCHAROID:
3426 if (typmod == -1)
3427 appendStringInfoString(&result, "CHAR");
3428 else
3429 appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3430 break;
3431 case VARCHAROID:
3432 if (typmod == -1)
3433 appendStringInfoString(&result, "VARCHAR");
3434 else
3435 appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3436 break;
3437 case NUMERICOID:
3438 if (typmod == -1)
3439 appendStringInfoString(&result, "NUMERIC");
3440 else
3441 appendStringInfo(&result, "NUMERIC_%d_%d",
3442 ((typmod - VARHDRSZ) >> 16) & 0xffff,
3443 (typmod - VARHDRSZ) & 0xffff);
3444 break;
3445 case INT4OID:
3446 appendStringInfoString(&result, "INTEGER");
3447 break;
3448 case INT2OID:
3449 appendStringInfoString(&result, "SMALLINT");
3450 break;
3451 case INT8OID:
3452 appendStringInfoString(&result, "BIGINT");
3453 break;
3454 case FLOAT4OID:
3455 appendStringInfoString(&result, "REAL");
3456 break;
3457 case FLOAT8OID:
3458 appendStringInfoString(&result, "DOUBLE");
3459 break;
3460 case BOOLOID:
3461 appendStringInfoString(&result, "BOOLEAN");
3462 break;
3463 case TIMEOID:
3464 if (typmod == -1)
3465 appendStringInfoString(&result, "TIME");
3466 else
3467 appendStringInfo(&result, "TIME_%d", typmod);
3468 break;
3469 case TIMETZOID:
3470 if (typmod == -1)
3471 appendStringInfoString(&result, "TIME_WTZ");
3472 else
3473 appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3474 break;
3475 case TIMESTAMPOID:
3476 if (typmod == -1)
3477 appendStringInfoString(&result, "TIMESTAMP");
3478 else
3479 appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3480 break;
3481 case TIMESTAMPTZOID:
3482 if (typmod == -1)
3483 appendStringInfoString(&result, "TIMESTAMP_WTZ");
3484 else
3485 appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3486 break;
3487 case DATEOID:
3488 appendStringInfoString(&result, "DATE");
3489 break;
3490 case XMLOID:
3491 appendStringInfoString(&result, "XML");
3492 break;
3493 default:
3494 {
3495 HeapTuple tuple;
3496 Form_pg_type typtuple;
3497
3498 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3499 if (!HeapTupleIsValid(tuple))
3500 elog(ERROR, "cache lookup failed for type %u", typeoid);
3501 typtuple = (Form_pg_type) GETSTRUCT(tuple);
3502
3503 appendStringInfoString(&result,
3504 map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3505 get_database_name(MyDatabaseId),
3506 get_namespace_name(typtuple->typnamespace),
3507 NameStr(typtuple->typname)));
3508
3509 ReleaseSysCache(tuple);
3510 }
3511 }
3512
3513 return result.data;
3514}
3515
3516
3517/*
3518 * Map a collection of SQL data types to XML Schema data types; see
3519 * SQL/XML:2008 section 9.7.
3520 */
3521static const char *
3522map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3523{
3524 List *uniquetypes = NIL;
3525 int i;
3526 StringInfoData result;
3527 ListCell *cell0;
3528
3529 /* extract all column types used in the set of TupleDescs */
3530 foreach(cell0, tupdesc_list)
3531 {
3532 TupleDesc tupdesc = (TupleDesc) lfirst(cell0);
3533
3534 for (i = 0; i < tupdesc->natts; i++)
3535 {
3536 Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3537
3538 if (att->attisdropped)
3539 continue;
3540 uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid);
3541 }
3542 }
3543
3544 /* add base types of domains */
3545 foreach(cell0, uniquetypes)
3546 {
3547 Oid typid = lfirst_oid(cell0);
3548 Oid basetypid = getBaseType(typid);
3549
3550 if (basetypid != typid)
3551 uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3552 }
3553
3554 /* Convert to textual form */
3555 initStringInfo(&result);
3556
3557 foreach(cell0, uniquetypes)
3558 {
3559 appendStringInfo(&result, "%s\n",
3560 map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3561 -1));
3562 }
3563
3564 return result.data;
3565}
3566
3567
3568/*
3569 * Map an SQL data type to a named XML Schema data type; see
3570 * SQL/XML:2008 sections 9.5 and 9.6.
3571 *
3572 * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3573 * a name attribute, which this function does. The name-less version
3574 * 9.5 doesn't appear to be required anywhere.)
3575 */
3576static const char *
3577map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3578{
3579 StringInfoData result;
3580 const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3581
3582 initStringInfo(&result);
3583
3584 if (typeoid == XMLOID)
3585 {
3586 appendStringInfoString(&result,
3587 "<xsd:complexType mixed=\"true\">\n"
3588 " <xsd:sequence>\n"
3589 " <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3590 " </xsd:sequence>\n"
3591 "</xsd:complexType>\n");
3592 }
3593 else
3594 {
3595 appendStringInfo(&result,
3596 "<xsd:simpleType name=\"%s\">\n", typename);
3597
3598 switch (typeoid)
3599 {
3600 case BPCHAROID:
3601 case VARCHAROID:
3602 case TEXTOID:
3603 appendStringInfoString(&result,
3604 " <xsd:restriction base=\"xsd:string\">\n");
3605 if (typmod != -1)
3606 appendStringInfo(&result,
3607 " <xsd:maxLength value=\"%d\"/>\n",
3608 typmod - VARHDRSZ);
3609 appendStringInfoString(&result, " </xsd:restriction>\n");
3610 break;
3611
3612 case BYTEAOID:
3613 appendStringInfo(&result,
3614 " <xsd:restriction base=\"xsd:%s\">\n"
3615 " </xsd:restriction>\n",
3616 xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3617 break;
3618
3619 case NUMERICOID:
3620 if (typmod != -1)
3621 appendStringInfo(&result,
3622 " <xsd:restriction base=\"xsd:decimal\">\n"
3623 " <xsd:totalDigits value=\"%d\"/>\n"
3624 " <xsd:fractionDigits value=\"%d\"/>\n"
3625 " </xsd:restriction>\n",
3626 ((typmod - VARHDRSZ) >> 16) & 0xffff,
3627 (typmod - VARHDRSZ) & 0xffff);
3628 break;
3629
3630 case INT2OID:
3631 appendStringInfo(&result,
3632 " <xsd:restriction base=\"xsd:short\">\n"
3633 " <xsd:maxInclusive value=\"%d\"/>\n"
3634 " <xsd:minInclusive value=\"%d\"/>\n"
3635 " </xsd:restriction>\n",
3636 SHRT_MAX, SHRT_MIN);
3637 break;
3638
3639 case INT4OID:
3640 appendStringInfo(&result,
3641 " <xsd:restriction base=\"xsd:int\">\n"
3642 " <xsd:maxInclusive value=\"%d\"/>\n"
3643 " <xsd:minInclusive value=\"%d\"/>\n"
3644 " </xsd:restriction>\n",
3645 INT_MAX, INT_MIN);
3646 break;
3647
3648 case INT8OID:
3649 appendStringInfo(&result,
3650 " <xsd:restriction base=\"xsd:long\">\n"
3651 " <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
3652 " <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
3653 " </xsd:restriction>\n",
3654 (((uint64) 1) << (sizeof(int64) * 8 - 1)) - 1,
3655 (((uint64) 1) << (sizeof(int64) * 8 - 1)));
3656 break;
3657
3658 case FLOAT4OID:
3659 appendStringInfoString(&result,
3660 " <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
3661 break;
3662
3663 case FLOAT8OID:
3664 appendStringInfoString(&result,
3665 " <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
3666 break;
3667
3668 case BOOLOID:
3669 appendStringInfoString(&result,
3670 " <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
3671 break;
3672
3673 case TIMEOID:
3674 case TIMETZOID:
3675 {
3676 const char *tz = (typeoid == TIMETZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3677
3678 if (typmod == -1)
3679 appendStringInfo(&result,
3680 " <xsd:restriction base=\"xsd:time\">\n"
3681 " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3682 " </xsd:restriction>\n", tz);
3683 else if (typmod == 0)
3684 appendStringInfo(&result,
3685 " <xsd:restriction base=\"xsd:time\">\n"
3686 " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3687 " </xsd:restriction>\n", tz);
3688 else
3689 appendStringInfo(&result,
3690 " <xsd:restriction base=\"xsd:time\">\n"
3691 " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3692 " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3693 break;
3694 }
3695
3696 case TIMESTAMPOID:
3697 case TIMESTAMPTZOID:
3698 {
3699 const char *tz = (typeoid == TIMESTAMPTZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3700
3701 if (typmod == -1)
3702 appendStringInfo(&result,
3703 " <xsd:restriction base=\"xsd:dateTime\">\n"
3704 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3705 " </xsd:restriction>\n", tz);
3706 else if (typmod == 0)
3707 appendStringInfo(&result,
3708 " <xsd:restriction base=\"xsd:dateTime\">\n"
3709 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3710 " </xsd:restriction>\n", tz);
3711 else
3712 appendStringInfo(&result,
3713 " <xsd:restriction base=\"xsd:dateTime\">\n"
3714 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3715 " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3716 break;
3717 }
3718
3719 case DATEOID:
3720 appendStringInfoString(&result,
3721 " <xsd:restriction base=\"xsd:date\">\n"
3722 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
3723 " </xsd:restriction>\n");
3724 break;
3725
3726 default:
3727 if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
3728 {
3729 Oid base_typeoid;
3730 int32 base_typmod = -1;
3731
3732 base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
3733
3734 appendStringInfo(&result,
3735 " <xsd:restriction base=\"%s\"/>\n",
3736 map_sql_type_to_xml_name(base_typeoid, base_typmod));
3737 }
3738 break;
3739 }
3740 appendStringInfoString(&result, "</xsd:simpleType>\n");
3741 }
3742
3743 return result.data;
3744}
3745
3746
3747/*
3748 * Map an SQL row to an XML element, taking the row from the active
3749 * SPI cursor. See also SQL/XML:2008 section 9.10.
3750 */
3751static void
3752SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
3753 bool nulls, bool tableforest,
3754 const char *targetns, bool top_level)
3755{
3756 int i;
3757 char *xmltn;
3758
3759 if (tablename)
3760 xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3761 else
3762 {
3763 if (tableforest)
3764 xmltn = "row";
3765 else
3766 xmltn = "table";
3767 }
3768
3769 if (tableforest)
3770 xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
3771 else
3772 appendStringInfoString(result, "<row>\n");
3773
3774 for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
3775 {
3776 char *colname;
3777 Datum colval;
3778 bool isnull;
3779
3780 colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
3781 true, false);
3782 colval = SPI_getbinval(SPI_tuptable->vals[rownum],
3783 SPI_tuptable->tupdesc,
3784 i,
3785 &isnull);
3786 if (isnull)
3787 {
3788 if (nulls)
3789 appendStringInfo(result, " <%s xsi:nil=\"true\"/>\n", colname);
3790 }
3791 else
3792 appendStringInfo(result, " <%s>%s</%s>\n",
3793 colname,
3794 map_sql_value_to_xml_value(colval,
3795 SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
3796 colname);
3797 }
3798
3799 if (tableforest)
3800 {
3801 xmldata_root_element_end(result, xmltn);
3802 appendStringInfoChar(result, '\n');
3803 }
3804 else
3805 appendStringInfoString(result, "</row>\n\n");
3806}
3807
3808
3809/*
3810 * XPath related functions
3811 */
3812
3813#ifdef USE_LIBXML
3814
3815/*
3816 * Convert XML node to text.
3817 *
3818 * For attribute and text nodes, return the escaped text. For anything else,
3819 * dump the whole subtree.
3820 */
3821static text *
3822xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
3823{
3824 xmltype *result;
3825
3826 if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE)
3827 {
3828 void (*volatile nodefree) (xmlNodePtr) = NULL;
3829 volatile xmlBufferPtr buf = NULL;
3830 volatile xmlNodePtr cur_copy = NULL;
3831
3832 PG_TRY();
3833 {
3834 int bytes;
3835
3836 buf = xmlBufferCreate();
3837 if (buf == NULL || xmlerrcxt->err_occurred)
3838 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3839 "could not allocate xmlBuffer");
3840
3841 /*
3842 * Produce a dump of the node that we can serialize. xmlNodeDump
3843 * does that, but the result of that function won't contain
3844 * namespace definitions from ancestor nodes, so we first do a
3845 * xmlCopyNode() which duplicates the node along with its required
3846 * namespace definitions.
3847 *
3848 * Some old libxml2 versions such as 2.7.6 produce partially
3849 * broken XML_DOCUMENT_NODE nodes (unset content field) when
3850 * copying them. xmlNodeDump of such a node works fine, but
3851 * xmlFreeNode crashes; set us up to call xmlFreeDoc instead.
3852 */
3853 cur_copy = xmlCopyNode(cur, 1);
3854 if (cur_copy == NULL || xmlerrcxt->err_occurred)
3855 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3856 "could not copy node");
3857 nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ?
3858 (void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode;
3859
3860 bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0);
3861 if (bytes == -1 || xmlerrcxt->err_occurred)
3862 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3863 "could not dump node");
3864
3865 result = xmlBuffer_to_xmltype(buf);
3866 }
3867 PG_CATCH();
3868 {
3869 if (nodefree)
3870 nodefree(cur_copy);
3871 if (buf)
3872 xmlBufferFree(buf);
3873 PG_RE_THROW();
3874 }
3875 PG_END_TRY();
3876
3877 if (nodefree)
3878 nodefree(cur_copy);
3879 xmlBufferFree(buf);
3880 }
3881 else
3882 {
3883 xmlChar *str;
3884
3885 str = xmlXPathCastNodeToString(cur);
3886 PG_TRY();
3887 {
3888 /* Here we rely on XML having the same representation as TEXT */
3889 char *escaped = escape_xml((char *) str);
3890
3891 result = (xmltype *) cstring_to_text(escaped);
3892 pfree(escaped);
3893 }
3894 PG_CATCH();
3895 {
3896 xmlFree(str);
3897 PG_RE_THROW();
3898 }
3899 PG_END_TRY();
3900 xmlFree(str);
3901 }
3902
3903 return result;
3904}
3905
3906/*
3907 * Convert an XML XPath object (the result of evaluating an XPath expression)
3908 * to an array of xml values, which are appended to astate. The function
3909 * result value is the number of elements in the array.
3910 *
3911 * If "astate" is NULL then we don't generate the array value, but we still
3912 * return the number of elements it would have had.
3913 *
3914 * Nodesets are converted to an array containing the nodes' textual
3915 * representations. Primitive values (float, double, string) are converted
3916 * to a single-element array containing the value's string representation.
3917 */
3918static int
3919xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
3920 ArrayBuildState *astate,
3921 PgXmlErrorContext *xmlerrcxt)
3922{
3923 int result = 0;
3924 Datum datum;
3925 Oid datumtype;
3926 char *result_str;
3927
3928 switch (xpathobj->type)
3929 {
3930 case XPATH_NODESET:
3931 if (xpathobj->nodesetval != NULL)
3932 {
3933 result = xpathobj->nodesetval->nodeNr;
3934 if (astate != NULL)
3935 {
3936 int i;
3937
3938 for (i = 0; i < result; i++)
3939 {
3940 datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
3941 xmlerrcxt));
3942 (void) accumArrayResult(astate, datum, false,
3943 XMLOID, CurrentMemoryContext);
3944 }
3945 }
3946 }
3947 return result;
3948
3949 case XPATH_BOOLEAN:
3950 if (astate == NULL)
3951 return 1;
3952 datum = BoolGetDatum(xpathobj->boolval);
3953 datumtype = BOOLOID;
3954 break;
3955
3956 case XPATH_NUMBER:
3957 if (astate == NULL)
3958 return 1;
3959 datum = Float8GetDatum(xpathobj->floatval);
3960 datumtype = FLOAT8OID;
3961 break;
3962
3963 case XPATH_STRING:
3964 if (astate == NULL)
3965 return 1;
3966 datum = CStringGetDatum((char *) xpathobj->stringval);
3967 datumtype = CSTRINGOID;
3968 break;
3969
3970 default:
3971 elog(ERROR, "xpath expression result type %d is unsupported",
3972 xpathobj->type);
3973 return 0; /* keep compiler quiet */
3974 }
3975
3976 /* Common code for scalar-value cases */
3977 result_str = map_sql_value_to_xml_value(datum, datumtype, true);
3978 datum = PointerGetDatum(cstring_to_xmltype(result_str));
3979 (void) accumArrayResult(astate, datum, false,
3980 XMLOID, CurrentMemoryContext);
3981 return 1;
3982}
3983
3984
3985/*
3986 * Common code for xpath() and xmlexists()
3987 *
3988 * Evaluate XPath expression and return number of nodes in res_items
3989 * and array of XML values in astate. Either of those pointers can be
3990 * NULL if the corresponding result isn't wanted.
3991 *
3992 * It is up to the user to ensure that the XML passed is in fact
3993 * an XML document - XPath doesn't work easily on fragments without
3994 * a context node being known.
3995 */
3996static void
3997xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
3998 int *res_nitems, ArrayBuildState *astate)
3999{
4000 PgXmlErrorContext *xmlerrcxt;
4001 volatile xmlParserCtxtPtr ctxt = NULL;
4002 volatile xmlDocPtr doc = NULL;
4003 volatile xmlXPathContextPtr xpathctx = NULL;
4004 volatile xmlXPathCompExprPtr xpathcomp = NULL;
4005 volatile xmlXPathObjectPtr xpathobj = NULL;
4006 char *datastr;
4007 int32 len;
4008 int32 xpath_len;
4009 xmlChar *string;
4010 xmlChar *xpath_expr;
4011 size_t xmldecl_len = 0;
4012 int i;
4013 int ndim;
4014 Datum *ns_names_uris;
4015 bool *ns_names_uris_nulls;
4016 int ns_count;
4017
4018 /*
4019 * Namespace mappings are passed as text[]. If an empty array is passed
4020 * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
4021 * Else, a 2-dimensional array with length of the second axis being equal
4022 * to 2 should be passed, i.e., every subarray contains 2 elements, the
4023 * first element defining the name, the second one the URI. Example:
4024 * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
4025 * 'http://example2.com']].
4026 */
4027 ndim = namespaces ? ARR_NDIM(namespaces) : 0;
4028 if (ndim != 0)
4029 {
4030 int *dims;
4031
4032 dims = ARR_DIMS(namespaces);
4033
4034 if (ndim != 2 || dims[1] != 2)
4035 ereport(ERROR,
4036 (errcode(ERRCODE_DATA_EXCEPTION),
4037 errmsg("invalid array for XML namespace mapping"),
4038 errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
4039
4040 Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
4041
4042 deconstruct_array(namespaces, TEXTOID, -1, false, 'i',
4043 &ns_names_uris, &ns_names_uris_nulls,
4044 &ns_count);
4045
4046 Assert((ns_count % 2) == 0); /* checked above */
4047 ns_count /= 2; /* count pairs only */
4048 }
4049 else
4050 {
4051 ns_names_uris = NULL;
4052 ns_names_uris_nulls = NULL;
4053 ns_count = 0;
4054 }
4055
4056 datastr = VARDATA(data);
4057 len = VARSIZE(data) - VARHDRSZ;
4058 xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
4059 if (xpath_len == 0)
4060 ereport(ERROR,
4061 (errcode(ERRCODE_DATA_EXCEPTION),
4062 errmsg("empty XPath expression")));
4063
4064 string = pg_xmlCharStrndup(datastr, len);
4065 xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
4066
4067 /*
4068 * In a UTF8 database, skip any xml declaration, which might assert
4069 * another encoding. Ignore parse_xml_decl() failure, letting
4070 * xmlCtxtReadMemory() report parse errors. Documentation disclaims
4071 * xpath() support for non-ASCII data in non-UTF8 databases, so leave
4072 * those scenarios bug-compatible with historical behavior.
4073 */
4074 if (GetDatabaseEncoding() == PG_UTF8)
4075 parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
4076
4077 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4078
4079 PG_TRY();
4080 {
4081 xmlInitParser();
4082
4083 /*
4084 * redundant XML parsing (two parsings for the same value during one
4085 * command execution are possible)
4086 */
4087 ctxt = xmlNewParserCtxt();
4088 if (ctxt == NULL || xmlerrcxt->err_occurred)
4089 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4090 "could not allocate parser context");
4091 doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
4092 len - xmldecl_len, NULL, NULL, 0);
4093 if (doc == NULL || xmlerrcxt->err_occurred)
4094 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4095 "could not parse XML document");
4096 xpathctx = xmlXPathNewContext(doc);
4097 if (xpathctx == NULL || xmlerrcxt->err_occurred)
4098 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4099 "could not allocate XPath context");
4100 xpathctx->node = (xmlNodePtr) doc;
4101
4102 /* register namespaces, if any */
4103 if (ns_count > 0)
4104 {
4105 for (i = 0; i < ns_count; i++)
4106 {
4107 char *ns_name;
4108 char *ns_uri;
4109
4110 if (ns_names_uris_nulls[i * 2] ||
4111 ns_names_uris_nulls[i * 2 + 1])
4112 ereport(ERROR,
4113 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4114 errmsg("neither namespace name nor URI may be null")));
4115 ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
4116 ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
4117 if (xmlXPathRegisterNs(xpathctx,
4118 (xmlChar *) ns_name,
4119 (xmlChar *) ns_uri) != 0)
4120 ereport(ERROR, /* is this an internal error??? */
4121 (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
4122 ns_name, ns_uri)));
4123 }
4124 }
4125
4126 xpathcomp = xmlXPathCompile(xpath_expr);
4127 if (xpathcomp == NULL || xmlerrcxt->err_occurred)
4128 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4129 "invalid XPath expression");
4130
4131 /*
4132 * Version 2.6.27 introduces a function named
4133 * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
4134 * but we can derive the existence by whether any nodes are returned,
4135 * thereby preventing a library version upgrade and keeping the code
4136 * the same.
4137 */
4138 xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
4139 if (xpathobj == NULL || xmlerrcxt->err_occurred)
4140 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4141 "could not create XPath object");
4142
4143 /*
4144 * Extract the results as requested.
4145 */
4146 if (res_nitems != NULL)
4147 *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4148 else
4149 (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4150 }
4151 PG_CATCH();
4152 {
4153 if (xpathobj)
4154 xmlXPathFreeObject(xpathobj);
4155 if (xpathcomp)
4156 xmlXPathFreeCompExpr(xpathcomp);
4157 if (xpathctx)
4158 xmlXPathFreeContext(xpathctx);
4159 if (doc)
4160 xmlFreeDoc(doc);
4161 if (ctxt)
4162 xmlFreeParserCtxt(ctxt);
4163
4164 pg_xml_done(xmlerrcxt, true);
4165
4166 PG_RE_THROW();
4167 }
4168 PG_END_TRY();
4169
4170 xmlXPathFreeObject(xpathobj);
4171 xmlXPathFreeCompExpr(xpathcomp);
4172 xmlXPathFreeContext(xpathctx);
4173 xmlFreeDoc(doc);
4174 xmlFreeParserCtxt(ctxt);
4175
4176 pg_xml_done(xmlerrcxt, false);
4177}
4178#endif /* USE_LIBXML */
4179
4180/*
4181 * Evaluate XPath expression and return array of XML values.
4182 *
4183 * As we have no support of XQuery sequences yet, this function seems
4184 * to be the most useful one (array of XML functions plays a role of
4185 * some kind of substitution for XQuery sequences).
4186 */
4187Datum
4188xpath(PG_FUNCTION_ARGS)
4189{
4190#ifdef USE_LIBXML
4191 text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4192 xmltype *data = PG_GETARG_XML_P(1);
4193 ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4194 ArrayBuildState *astate;
4195
4196 astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
4197 xpath_internal(xpath_expr_text, data, namespaces,
4198 NULL, astate);
4199 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
4200#else
4201 NO_XML_SUPPORT();
4202 return 0;
4203#endif
4204}
4205
4206/*
4207 * Determines if the node specified by the supplied XPath exists
4208 * in a given XML document, returning a boolean.
4209 */
4210Datum
4211xmlexists(PG_FUNCTION_ARGS)
4212{
4213#ifdef USE_LIBXML
4214 text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4215 xmltype *data = PG_GETARG_XML_P(1);
4216 int res_nitems;
4217
4218 xpath_internal(xpath_expr_text, data, NULL,
4219 &res_nitems, NULL);
4220
4221 PG_RETURN_BOOL(res_nitems > 0);
4222#else
4223 NO_XML_SUPPORT();
4224 return 0;
4225#endif
4226}
4227
4228/*
4229 * Determines if the node specified by the supplied XPath exists
4230 * in a given XML document, returning a boolean. Differs from
4231 * xmlexists as it supports namespaces and is not defined in SQL/XML.
4232 */
4233Datum
4234xpath_exists(PG_FUNCTION_ARGS)
4235{
4236#ifdef USE_LIBXML
4237 text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4238 xmltype *data = PG_GETARG_XML_P(1);
4239 ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4240 int res_nitems;
4241
4242 xpath_internal(xpath_expr_text, data, namespaces,
4243 &res_nitems, NULL);
4244
4245 PG_RETURN_BOOL(res_nitems > 0);
4246#else
4247 NO_XML_SUPPORT();
4248 return 0;
4249#endif
4250}
4251
4252/*
4253 * Functions for checking well-formed-ness
4254 */
4255
4256#ifdef USE_LIBXML
4257static bool
4258wellformed_xml(text *data, XmlOptionType xmloption_arg)
4259{
4260 bool result;
4261 volatile xmlDocPtr doc = NULL;
4262
4263 /* We want to catch any exceptions and return false */
4264 PG_TRY();
4265 {
4266 doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
4267 result = true;
4268 }
4269 PG_CATCH();
4270 {
4271 FlushErrorState();
4272 result = false;
4273 }
4274 PG_END_TRY();
4275
4276 if (doc)
4277 xmlFreeDoc(doc);
4278
4279 return result;
4280}
4281#endif
4282
4283Datum
4284xml_is_well_formed(PG_FUNCTION_ARGS)
4285{
4286#ifdef USE_LIBXML
4287 text *data = PG_GETARG_TEXT_PP(0);
4288
4289 PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4290#else
4291 NO_XML_SUPPORT();
4292 return 0;
4293#endif /* not USE_LIBXML */
4294}
4295
4296Datum
4297xml_is_well_formed_document(PG_FUNCTION_ARGS)
4298{
4299#ifdef USE_LIBXML
4300 text *data = PG_GETARG_TEXT_PP(0);
4301
4302 PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4303#else
4304 NO_XML_SUPPORT();
4305 return 0;
4306#endif /* not USE_LIBXML */
4307}
4308
4309Datum
4310xml_is_well_formed_content(PG_FUNCTION_ARGS)
4311{
4312#ifdef USE_LIBXML
4313 text *data = PG_GETARG_TEXT_PP(0);
4314
4315 PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4316#else
4317 NO_XML_SUPPORT();
4318 return 0;
4319#endif /* not USE_LIBXML */
4320}
4321
4322/*
4323 * support functions for XMLTABLE
4324 *
4325 */
4326#ifdef USE_LIBXML
4327
4328/*
4329 * Returns private data from executor state. Ensure validity by check with
4330 * MAGIC number.
4331 */
4332static inline XmlTableBuilderData *
4333GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4334{
4335 XmlTableBuilderData *result;
4336
4337 if (!IsA(state, TableFuncScanState))
4338 elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4339 result = (XmlTableBuilderData *) state->opaque;
4340 if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4341 elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4342
4343 return result;
4344}
4345#endif
4346
4347/*
4348 * XmlTableInitOpaque
4349 * Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4350 * the XML parser.
4351 *
4352 * Note: Because we call pg_xml_init() here and pg_xml_done() in
4353 * XmlTableDestroyOpaque, it is critical for robustness that no other
4354 * executor nodes run until this node is processed to completion. Caller
4355 * must execute this to completion (probably filling a tuplestore to exhaust
4356 * this node in a single pass) instead of using row-per-call mode.
4357 */
4358static void
4359XmlTableInitOpaque(TableFuncScanState *state, int natts)
4360{
4361#ifdef USE_LIBXML
4362 volatile xmlParserCtxtPtr ctxt = NULL;
4363 XmlTableBuilderData *xtCxt;
4364 PgXmlErrorContext *xmlerrcxt;
4365
4366 xtCxt = palloc0(sizeof(XmlTableBuilderData));
4367 xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4368 xtCxt->natts = natts;
4369 xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4370
4371 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4372
4373 PG_TRY();
4374 {
4375 xmlInitParser();
4376
4377 ctxt = xmlNewParserCtxt();
4378 if (ctxt == NULL || xmlerrcxt->err_occurred)
4379 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4380 "could not allocate parser context");
4381 }
4382 PG_CATCH();
4383 {
4384 if (ctxt != NULL)
4385 xmlFreeParserCtxt(ctxt);
4386
4387 pg_xml_done(xmlerrcxt, true);
4388
4389 PG_RE_THROW();
4390 }
4391 PG_END_TRY();
4392
4393 xtCxt->xmlerrcxt = xmlerrcxt;
4394 xtCxt->ctxt = ctxt;
4395
4396 state->opaque = xtCxt;
4397#else
4398 NO_XML_SUPPORT();
4399#endif /* not USE_LIBXML */
4400}
4401
4402/*
4403 * XmlTableSetDocument
4404 * Install the input document
4405 */
4406static void
4407XmlTableSetDocument(TableFuncScanState *state, Datum value)
4408{
4409#ifdef USE_LIBXML
4410 XmlTableBuilderData *xtCxt;
4411 xmltype *xmlval = DatumGetXmlP(value);
4412 char *str;
4413 xmlChar *xstr;
4414 int length;
4415 volatile xmlDocPtr doc = NULL;
4416 volatile xmlXPathContextPtr xpathcxt = NULL;
4417
4418 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4419
4420 /*
4421 * Use out function for casting to string (remove encoding property). See
4422 * comment in xml_out.
4423 */
4424 str = xml_out_internal(xmlval, 0);
4425
4426 length = strlen(str);
4427 xstr = pg_xmlCharStrndup(str, length);
4428
4429 PG_TRY();
4430 {
4431 doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4432 if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4433 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4434 "could not parse XML document");
4435 xpathcxt = xmlXPathNewContext(doc);
4436 if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4437 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4438 "could not allocate XPath context");
4439 xpathcxt->node = (xmlNodePtr) doc;
4440 }
4441 PG_CATCH();
4442 {
4443 if (xpathcxt != NULL)
4444 xmlXPathFreeContext(xpathcxt);
4445 if (doc != NULL)
4446 xmlFreeDoc(doc);
4447
4448 PG_RE_THROW();
4449 }
4450 PG_END_TRY();
4451
4452 xtCxt->doc = doc;
4453 xtCxt->xpathcxt = xpathcxt;
4454#else
4455 NO_XML_SUPPORT();
4456#endif /* not USE_LIBXML */
4457}
4458
4459/*
4460 * XmlTableSetNamespace
4461 * Add a namespace declaration
4462 */
4463static void
4464XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri)
4465{
4466#ifdef USE_LIBXML
4467 XmlTableBuilderData *xtCxt;
4468
4469 if (name == NULL)
4470 ereport(ERROR,
4471 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4472 errmsg("DEFAULT namespace is not supported")));
4473 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4474
4475 if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4476 pg_xmlCharStrndup(name, strlen(name)),
4477 pg_xmlCharStrndup(uri, strlen(uri))))
4478 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4479 "could not set XML namespace");
4480#else
4481 NO_XML_SUPPORT();
4482#endif /* not USE_LIBXML */
4483}
4484
4485/*
4486 * XmlTableSetRowFilter
4487 * Install the row-filter Xpath expression.
4488 */
4489static void
4490XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
4491{
4492#ifdef USE_LIBXML
4493 XmlTableBuilderData *xtCxt;
4494 xmlChar *xstr;
4495
4496 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4497
4498 if (*path == '\0')
4499 ereport(ERROR,
4500 (errcode(ERRCODE_DATA_EXCEPTION),
4501 errmsg("row path filter must not be empty string")));
4502
4503 xstr = pg_xmlCharStrndup(path, strlen(path));
4504
4505 xtCxt->xpathcomp = xmlXPathCompile(xstr);
4506 if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4507 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR,
4508 "invalid XPath expression");
4509#else
4510 NO_XML_SUPPORT();
4511#endif /* not USE_LIBXML */
4512}
4513
4514/*
4515 * XmlTableSetColumnFilter
4516 * Install the column-filter Xpath expression, for the given column.
4517 */
4518static void
4519XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
4520{
4521#ifdef USE_LIBXML
4522 XmlTableBuilderData *xtCxt;
4523 xmlChar *xstr;
4524
4525 AssertArg(PointerIsValid(path));
4526
4527 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4528
4529 if (*path == '\0')
4530 ereport(ERROR,
4531 (errcode(ERRCODE_DATA_EXCEPTION),
4532 errmsg("column path filter must not be empty string")));
4533
4534 xstr = pg_xmlCharStrndup(path, strlen(path));
4535
4536 xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr);
4537 if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4538 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4539 "invalid XPath expression");
4540#else
4541 NO_XML_SUPPORT();
4542#endif /* not USE_LIBXML */
4543}
4544
4545/*
4546 * XmlTableFetchRow
4547 * Prepare the next "current" tuple for upcoming GetValue calls.
4548 * Returns false if the row-filter expression returned no more rows.
4549 */
4550static bool
4551XmlTableFetchRow(TableFuncScanState *state)
4552{
4553#ifdef USE_LIBXML
4554 XmlTableBuilderData *xtCxt;
4555
4556 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4557
4558 /*
4559 * XmlTable returns table - set of composite values. The error context, is
4560 * used for producement more values, between two calls, there can be
4561 * created and used another libxml2 error context. It is libxml2 global
4562 * value, so it should be refreshed any time before any libxml2 usage,
4563 * that is finished by returning some value.
4564 */
4565 xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4566
4567 if (xtCxt->xpathobj == NULL)
4568 {
4569 xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4570 if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4571 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4572 "could not create XPath object");
4573
4574 xtCxt->row_count = 0;
4575 }
4576
4577 if (xtCxt->xpathobj->type == XPATH_NODESET)
4578 {
4579 if (xtCxt->xpathobj->nodesetval != NULL)
4580 {
4581 if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4582 return true;
4583 }
4584 }
4585
4586 return false;
4587#else
4588 NO_XML_SUPPORT();
4589 return false;
4590#endif /* not USE_LIBXML */
4591}
4592
4593/*
4594 * XmlTableGetValue
4595 * Return the value for column number 'colnum' for the current row. If
4596 * column -1 is requested, return representation of the whole row.
4597 *
4598 * This leaks memory, so be sure to reset often the context in which it's
4599 * called.
4600 */
4601static Datum
4602XmlTableGetValue(TableFuncScanState *state, int colnum,
4603 Oid typid, int32 typmod, bool *isnull)
4604{
4605#ifdef USE_LIBXML
4606 XmlTableBuilderData *xtCxt;
4607 Datum result = (Datum) 0;
4608 xmlNodePtr cur;
4609 char *cstr = NULL;
4610 volatile xmlXPathObjectPtr xpathobj = NULL;
4611
4612 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4613
4614 Assert(xtCxt->xpathobj &&
4615 xtCxt->xpathobj->type == XPATH_NODESET &&
4616 xtCxt->xpathobj->nodesetval != NULL);
4617
4618 /* Propagate context related error context to libxml2 */
4619 xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4620
4621 *isnull = false;
4622
4623 cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
4624
4625 Assert(xtCxt->xpathscomp[colnum] != NULL);
4626
4627 PG_TRY();
4628 {
4629 /* Set current node as entry point for XPath evaluation */
4630 xtCxt->xpathcxt->node = cur;
4631
4632 /* Evaluate column path */
4633 xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
4634 if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4635 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4636 "could not create XPath object");
4637
4638 /*
4639 * There are four possible cases, depending on the number of nodes
4640 * returned by the XPath expression and the type of the target column:
4641 * a) XPath returns no nodes. b) The target type is XML (return all
4642 * as XML). For non-XML return types: c) One node (return content).
4643 * d) Multiple nodes (error).
4644 */
4645 if (xpathobj->type == XPATH_NODESET)
4646 {
4647 int count = 0;
4648
4649 if (xpathobj->nodesetval != NULL)
4650 count = xpathobj->nodesetval->nodeNr;
4651
4652 if (xpathobj->nodesetval == NULL || count == 0)
4653 {
4654 *isnull = true;
4655 }
4656 else
4657 {
4658 if (typid == XMLOID)
4659 {
4660 text *textstr;
4661 StringInfoData str;
4662
4663 /* Concatenate serialized values */
4664 initStringInfo(&str);
4665 for (int i = 0; i < count; i++)
4666 {
4667 textstr =
4668 xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4669 xtCxt->xmlerrcxt);
4670
4671 appendStringInfoText(&str, textstr);
4672 }
4673 cstr = str.data;
4674 }
4675 else
4676 {
4677 xmlChar *str;
4678
4679 if (count > 1)
4680 ereport(ERROR,
4681 (errcode(ERRCODE_CARDINALITY_VIOLATION),
4682 errmsg("more than one value returned by column XPath expression")));
4683
4684 str = xmlXPathCastNodeSetToString(xpathobj->nodesetval);
4685 cstr = str ? xml_pstrdup_and_free(str) : "";
4686 }
4687 }
4688 }
4689 else if (xpathobj->type == XPATH_STRING)
4690 {
4691 /* Content should be escaped when target will be XML */
4692 if (typid == XMLOID)
4693 cstr = escape_xml((char *) xpathobj->stringval);
4694 else
4695 cstr = (char *) xpathobj->stringval;
4696 }
4697 else if (xpathobj->type == XPATH_BOOLEAN)
4698 {
4699 char typcategory;
4700 bool typispreferred;
4701 xmlChar *str;
4702
4703 /* Allow implicit casting from boolean to numbers */
4704 get_type_category_preferred(typid, &typcategory, &typispreferred);
4705
4706 if (typcategory != TYPCATEGORY_NUMERIC)
4707 str = xmlXPathCastBooleanToString(xpathobj->boolval);
4708 else
4709 str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval));
4710
4711 cstr = xml_pstrdup_and_free(str);
4712 }
4713 else if (xpathobj->type == XPATH_NUMBER)
4714 {
4715 xmlChar *str;
4716
4717 str = xmlXPathCastNumberToString(xpathobj->floatval);
4718 cstr = xml_pstrdup_and_free(str);
4719 }
4720 else
4721 elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
4722
4723 /*
4724 * By here, either cstr contains the result value, or the isnull flag
4725 * has been set.
4726 */
4727 Assert(cstr || *isnull);
4728
4729 if (!*isnull)
4730 result = InputFunctionCall(&state->in_functions[colnum],
4731 cstr,
4732 state->typioparams[colnum],
4733 typmod);
4734 }
4735 PG_CATCH();
4736 {
4737 if (xpathobj != NULL)
4738 xmlXPathFreeObject(xpathobj);
4739 PG_RE_THROW();
4740 }
4741 PG_END_TRY();
4742
4743 xmlXPathFreeObject(xpathobj);
4744
4745 return result;
4746#else
4747 NO_XML_SUPPORT();
4748 return 0;
4749#endif /* not USE_LIBXML */
4750}
4751
4752/*
4753 * XmlTableDestroyOpaque
4754 * Release all libxml2 resources
4755 */
4756static void
4757XmlTableDestroyOpaque(TableFuncScanState *state)
4758{
4759#ifdef USE_LIBXML
4760 XmlTableBuilderData *xtCxt;
4761
4762 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
4763
4764 /* Propagate context related error context to libxml2 */
4765 xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4766
4767 if (xtCxt->xpathscomp != NULL)
4768 {
4769 int i;
4770
4771 for (i = 0; i < xtCxt->natts; i++)
4772 if (xtCxt->xpathscomp[i] != NULL)
4773 xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
4774 }
4775
4776 if (xtCxt->xpathobj != NULL)
4777 xmlXPathFreeObject(xtCxt->xpathobj);
4778 if (xtCxt->xpathcomp != NULL)
4779 xmlXPathFreeCompExpr(xtCxt->xpathcomp);
4780 if (xtCxt->xpathcxt != NULL)
4781 xmlXPathFreeContext(xtCxt->xpathcxt);
4782 if (xtCxt->doc != NULL)
4783 xmlFreeDoc(xtCxt->doc);
4784 if (xtCxt->ctxt != NULL)
4785 xmlFreeParserCtxt(xtCxt->ctxt);
4786
4787 pg_xml_done(xtCxt->xmlerrcxt, true);
4788
4789 /* not valid anymore */
4790 xtCxt->magic = 0;
4791 state->opaque = NULL;
4792
4793#else
4794 NO_XML_SUPPORT();
4795#endif /* not USE_LIBXML */
4796}
4797