@@ -119,9 +119,10 @@ struct PgXmlErrorContext
119119
120120static xmlParserInputPtr xmlPgEntityLoader (const char * URL , const char * ID ,
121121 xmlParserCtxtPtr ctxt );
122+ static void xml_errsave (Node * escontext , PgXmlErrorContext * errcxt ,
123+ int sqlcode , const char * msg );
122124static void xml_errorHandler (void * data , xmlErrorPtr error );
123- static void xml_ereport_by_code (int level , int sqlcode ,
124- const char * msg , int code );
125+ static int errdetail_for_xml_code (int code );
125126static void chopStringInfoNewlines (StringInfo str );
126127static void appendStringInfoLineSeparator (StringInfo str );
127128
@@ -143,7 +144,8 @@ static bool print_xml_decl(StringInfo buf, const xmlChar *version,
143144 pg_enc encoding , int standalone );
144145static bool xml_doctype_in_content (const xmlChar * str );
145146static xmlDocPtr xml_parse (text * data , XmlOptionType xmloption_arg ,
146- bool preserve_whitespace , int encoding );
147+ bool preserve_whitespace , int encoding ,
148+ Node * escontext );
147149static text * xml_xmlnodetoxmltype (xmlNodePtr cur , PgXmlErrorContext * xmlerrcxt );
148150static int xml_xpathobjtoxmlarray (xmlXPathObjectPtr xpathobj ,
149151 ArrayBuildState * astate ,
@@ -261,14 +263,18 @@ xml_in(PG_FUNCTION_ARGS)
261263 xmltype * vardata ;
262264 xmlDocPtr doc ;
263265
266+ /* Build the result object. */
264267 vardata = (xmltype * ) cstring_to_text (s );
265268
266269 /*
267- * Parse the data to check if it is well-formed XML data. Assume that
268- * ERROR occurred if parsing failed.
270+ * Parse the data to check if it is well-formed XML data.
271+ *
272+ * Note: we don't need to worry about whether a soft error is detected.
269273 */
270- doc = xml_parse (vardata , xmloption , true, GetDatabaseEncoding ());
271- xmlFreeDoc (doc );
274+ doc = xml_parse (vardata , xmloption , true, GetDatabaseEncoding (),
275+ fcinfo -> context );
276+ if (doc != NULL )
277+ xmlFreeDoc (doc );
272278
273279 PG_RETURN_XML_P (vardata );
274280#else
@@ -323,9 +329,10 @@ xml_out_internal(xmltype *x, pg_enc target_encoding)
323329 return buf .data ;
324330 }
325331
326- xml_ereport_by_code (WARNING , ERRCODE_INTERNAL_ERROR ,
327- "could not parse XML declaration in stored value" ,
328- res_code );
332+ ereport (WARNING ,
333+ errcode (ERRCODE_INTERNAL_ERROR ),
334+ errmsg_internal ("could not parse XML declaration in stored value" ),
335+ errdetail_for_xml_code (res_code ));
329336#endif
330337 return str ;
331338}
@@ -392,7 +399,7 @@ xml_recv(PG_FUNCTION_ARGS)
392399 * Parse the data to check if it is well-formed XML data. Assume that
393400 * xml_parse will throw ERROR if not.
394401 */
395- doc = xml_parse (result , xmloption , true, encoding );
402+ doc = xml_parse (result , xmloption , true, encoding , NULL );
396403 xmlFreeDoc (doc );
397404
398405 /* Now that we know what we're dealing with, convert to server encoding */
@@ -754,7 +761,7 @@ xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
754761 xmlDocPtr doc ;
755762
756763 doc = xml_parse (data , xmloption_arg , preserve_whitespace ,
757- GetDatabaseEncoding ());
764+ GetDatabaseEncoding (), NULL );
758765 xmlFreeDoc (doc );
759766
760767 return (xmltype * ) data ;
@@ -895,7 +902,7 @@ xml_is_document(xmltype *arg)
895902 PG_TRY ();
896903 {
897904 doc = xml_parse ((text * ) arg , XMLOPTION_DOCUMENT , true,
898- GetDatabaseEncoding ());
905+ GetDatabaseEncoding (), NULL );
899906 result = true;
900907 }
901908 PG_CATCH ();
@@ -1500,17 +1507,26 @@ xml_doctype_in_content(const xmlChar *str)
15001507
15011508
15021509/*
1503- * Convert a C string to XML internal representation
1510+ * Convert a text object to XML internal representation
1511+ *
1512+ * data is the source data (must not be toasted!), encoding is its encoding,
1513+ * and xmloption_arg and preserve_whitespace are options for the
1514+ * transformation.
1515+ *
1516+ * Errors normally result in ereport(ERROR), but if escontext is an
1517+ * ErrorSaveContext, then "safe" errors are reported there instead, and the
1518+ * caller must check SOFT_ERROR_OCCURRED() to see whether that happened.
15041519 *
15051520 * Note: it is caller's responsibility to xmlFreeDoc() the result,
1506- * else a permanent memory leak will ensue!
1521+ * else a permanent memory leak will ensue! But note the result could
1522+ * be NULL after a soft error.
15071523 *
15081524 * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
15091525 * yet do not use SAX - see xmlreader.c)
15101526 */
15111527static xmlDocPtr
15121528xml_parse (text * data , XmlOptionType xmloption_arg , bool preserve_whitespace ,
1513- int encoding )
1529+ int encoding , Node * escontext )
15141530{
15151531 int32 len ;
15161532 xmlChar * string ;
@@ -1519,9 +1535,20 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
15191535 volatile xmlParserCtxtPtr ctxt = NULL ;
15201536 volatile xmlDocPtr doc = NULL ;
15211537
1538+ /*
1539+ * This step looks annoyingly redundant, but we must do it to have a
1540+ * null-terminated string in case encoding conversion isn't required.
1541+ */
15221542 len = VARSIZE_ANY_EXHDR (data ); /* will be useful later */
15231543 string = xml_text2xmlChar (data );
15241544
1545+ /*
1546+ * If the data isn't UTF8, we must translate before giving it to libxml.
1547+ *
1548+ * XXX ideally, we'd catch any encoding conversion failure and return a
1549+ * soft error. However, failure to convert to UTF8 should be pretty darn
1550+ * rare, so for now this is left undone.
1551+ */
15251552 utf8string = pg_do_encoding_conversion (string ,
15261553 len ,
15271554 encoding ,
@@ -1539,6 +1566,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
15391566 xmlChar * version = NULL ;
15401567 int standalone = 0 ;
15411568
1569+ /* Any errors here are reported as hard ereport's */
15421570 xmlInitParser ();
15431571
15441572 ctxt = xmlNewParserCtxt ();
@@ -1555,9 +1583,13 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
15551583 res_code = parse_xml_decl (utf8string ,
15561584 & count , & version , NULL , & standalone );
15571585 if (res_code != 0 )
1558- xml_ereport_by_code (ERROR , ERRCODE_INVALID_XML_CONTENT ,
1559- "invalid XML content: invalid XML declaration" ,
1560- res_code );
1586+ {
1587+ errsave (escontext ,
1588+ errcode (ERRCODE_INVALID_XML_CONTENT ),
1589+ errmsg_internal ("invalid XML content: invalid XML declaration" ),
1590+ errdetail_for_xml_code (res_code ));
1591+ goto fail ;
1592+ }
15611593
15621594 /* Is there a DOCTYPE element? */
15631595 if (xml_doctype_in_content (utf8string + count ))
@@ -1580,20 +1612,30 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
15801612 | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS ));
15811613 if (doc == NULL || xmlerrcxt -> err_occurred )
15821614 {
1583- /* Use original option to decide which error code to throw */
1615+ /* Use original option to decide which error code to report */
15841616 if (xmloption_arg == XMLOPTION_DOCUMENT )
1585- xml_ereport (xmlerrcxt , ERROR , ERRCODE_INVALID_XML_DOCUMENT ,
1617+ xml_errsave (escontext , xmlerrcxt ,
1618+ ERRCODE_INVALID_XML_DOCUMENT ,
15861619 "invalid XML document" );
15871620 else
1588- xml_ereport (xmlerrcxt , ERROR , ERRCODE_INVALID_XML_CONTENT ,
1621+ xml_errsave (escontext , xmlerrcxt ,
1622+ ERRCODE_INVALID_XML_CONTENT ,
15891623 "invalid XML content" );
1624+ goto fail ;
15901625 }
15911626 }
15921627 else
15931628 {
15941629 doc = xmlNewDoc (version );
1630+ if (doc == NULL || xmlerrcxt -> err_occurred )
1631+ xml_ereport (xmlerrcxt , ERROR , ERRCODE_OUT_OF_MEMORY ,
1632+ "could not allocate XML document" );
1633+
15951634 Assert (doc -> encoding == NULL );
15961635 doc -> encoding = xmlStrdup ((const xmlChar * ) "UTF-8" );
1636+ if (doc -> encoding == NULL || xmlerrcxt -> err_occurred )
1637+ xml_ereport (xmlerrcxt , ERROR , ERRCODE_OUT_OF_MEMORY ,
1638+ "could not allocate XML document" );
15971639 doc -> standalone = standalone ;
15981640
15991641 /* allow empty content */
@@ -1602,10 +1644,17 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
16021644 res_code = xmlParseBalancedChunkMemory (doc , NULL , NULL , 0 ,
16031645 utf8string + count , NULL );
16041646 if (res_code != 0 || xmlerrcxt -> err_occurred )
1605- xml_ereport (xmlerrcxt , ERROR , ERRCODE_INVALID_XML_CONTENT ,
1647+ {
1648+ xml_errsave (escontext , xmlerrcxt ,
1649+ ERRCODE_INVALID_XML_CONTENT ,
16061650 "invalid XML content" );
1651+ goto fail ;
1652+ }
16071653 }
16081654 }
1655+
1656+ fail :
1657+ ;
16091658 }
16101659 PG_CATCH ();
16111660 {
@@ -1745,6 +1794,44 @@ xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
17451794}
17461795
17471796
1797+ /*
1798+ * xml_errsave --- save an XML-related error
1799+ *
1800+ * If escontext is an ErrorSaveContext, error details are saved into it,
1801+ * and control returns normally.
1802+ *
1803+ * Otherwise, the error is thrown, so that this is equivalent to
1804+ * xml_ereport() with level == ERROR.
1805+ *
1806+ * This should be used only for errors that we're sure we do not need
1807+ * a transaction abort to clean up after.
1808+ */
1809+ static void
1810+ xml_errsave (Node * escontext , PgXmlErrorContext * errcxt ,
1811+ int sqlcode , const char * msg )
1812+ {
1813+ char * detail ;
1814+
1815+ /* Defend against someone passing us a bogus context struct */
1816+ if (errcxt -> magic != ERRCXT_MAGIC )
1817+ elog (ERROR , "xml_errsave called with invalid PgXmlErrorContext" );
1818+
1819+ /* Flag that the current libxml error has been reported */
1820+ errcxt -> err_occurred = false;
1821+
1822+ /* Include detail only if we have some text from libxml */
1823+ if (errcxt -> err_buf .len > 0 )
1824+ detail = errcxt -> err_buf .data ;
1825+ else
1826+ detail = NULL ;
1827+
1828+ errsave (escontext ,
1829+ (errcode (sqlcode ),
1830+ errmsg_internal ("%s" , msg ),
1831+ detail ? errdetail_internal ("%s" , detail ) : 0 ));
1832+ }
1833+
1834+
17481835/*
17491836 * Error handler for libxml errors and warnings
17501837 */
@@ -1917,15 +2004,16 @@ xml_errorHandler(void *data, xmlErrorPtr error)
19172004
19182005
19192006/*
1920- * Wrapper for "ereport" function for XML-related errors. The "msg"
1921- * is the SQL-level message; some can be adopted from the SQL/XML
1922- * standard. This function uses "code" to create a textual detail
1923- * message. At the moment, we only need to cover those codes that we
2007+ * Convert libxml error codes into textual errdetail messages.
2008+ *
2009+ * This should be called within an ereport or errsave invocation,
2010+ * just as errdetail would be.
2011+ *
2012+ * At the moment, we only need to cover those codes that we
19242013 * may raise in this file.
19252014 */
1926- static void
1927- xml_ereport_by_code (int level , int sqlcode ,
1928- const char * msg , int code )
2015+ static int
2016+ errdetail_for_xml_code (int code )
19292017{
19302018 const char * det ;
19312019
@@ -1954,10 +2042,7 @@ xml_ereport_by_code(int level, int sqlcode,
19542042 break ;
19552043 }
19562044
1957- ereport (level ,
1958- (errcode (sqlcode ),
1959- errmsg_internal ("%s" , msg ),
1960- errdetail (det , code )));
2045+ return errdetail (det , code );
19612046}
19622047
19632048
@@ -4241,7 +4326,7 @@ wellformed_xml(text *data, XmlOptionType xmloption_arg)
42414326 /* We want to catch any exceptions and return false */
42424327 PG_TRY ();
42434328 {
4244- doc = xml_parse (data , xmloption_arg , true, GetDatabaseEncoding ());
4329+ doc = xml_parse (data , xmloption_arg , true, GetDatabaseEncoding (), NULL );
42454330 result = true;
42464331 }
42474332 PG_CATCH ();
0 commit comments