77 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
88 * Portions Copyright (c) 1994, Regents of the University of California
99 *
10- * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.93 2009/08/10 05:46:50 tgl Exp $
10+ * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.94 2009/09/04 10:49:29 heikki Exp $
1111 *
1212 *-------------------------------------------------------------------------
1313 */
@@ -109,7 +109,7 @@ static int parse_xml_decl(const xmlChar *str, size_t *lenp,
109109static bool print_xml_decl (StringInfo buf , const xmlChar * version ,
110110 pg_enc encoding , int standalone );
111111static xmlDocPtr xml_parse (text * data , XmlOptionType xmloption_arg ,
112- bool preserve_whitespace , xmlChar * encoding );
112+ bool preserve_whitespace , int encoding );
113113static text * xml_xmlnodetoxmltype (xmlNodePtr cur );
114114#endif /* USE_LIBXML */
115115
@@ -183,7 +183,7 @@ xml_in(PG_FUNCTION_ARGS)
183183 * Parse the data to check if it is well-formed XML data. Assume that
184184 * ERROR occurred if parsing failed.
185185 */
186- doc = xml_parse (vardata , xmloption , true, NULL );
186+ doc = xml_parse (vardata , xmloption , true, GetDatabaseEncoding () );
187187 xmlFreeDoc (doc );
188188
189189 PG_RETURN_XML_P (vardata );
@@ -272,7 +272,8 @@ xml_recv(PG_FUNCTION_ARGS)
272272 char * newstr ;
273273 int nbytes ;
274274 xmlDocPtr doc ;
275- xmlChar * encoding = NULL ;
275+ xmlChar * encodingStr = NULL ;
276+ int encoding ;
276277
277278 /*
278279 * Read the data in raw format. We don't know yet what the encoding is, as
@@ -293,7 +294,15 @@ xml_recv(PG_FUNCTION_ARGS)
293294 str = VARDATA (result );
294295 str [nbytes ] = '\0' ;
295296
296- parse_xml_decl ((xmlChar * ) str , NULL , NULL , & encoding , NULL );
297+ parse_xml_decl ((xmlChar * ) str , NULL , NULL , & encodingStr , NULL );
298+
299+ /*
300+ * If encoding wasn't explicitly specified in the XML header, treat it as
301+ * UTF-8, as that's the default in XML. This is different from xml_in(),
302+ * where the input has to go through the normal client to server encoding
303+ * conversion.
304+ */
305+ encoding = encodingStr ? xmlChar_to_encoding (encodingStr ) : PG_UTF8 ;
297306
298307 /*
299308 * Parse the data to check if it is well-formed XML data. Assume that
@@ -305,9 +314,7 @@ xml_recv(PG_FUNCTION_ARGS)
305314 /* Now that we know what we're dealing with, convert to server encoding */
306315 newstr = (char * ) pg_do_encoding_conversion ((unsigned char * ) str ,
307316 nbytes ,
308- encoding ?
309- xmlChar_to_encoding (encoding ) :
310- PG_UTF8 ,
317+ encoding ,
311318 GetDatabaseEncoding ());
312319
313320 if (newstr != str )
@@ -659,7 +666,8 @@ xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
659666#ifdef USE_LIBXML
660667 xmlDocPtr doc ;
661668
662- doc = xml_parse (data , xmloption_arg , preserve_whitespace , NULL );
669+ doc = xml_parse (data , xmloption_arg , preserve_whitespace ,
670+ GetDatabaseEncoding ());
663671 xmlFreeDoc (doc );
664672
665673 return (xmltype * ) data ;
@@ -799,7 +807,8 @@ xml_is_document(xmltype *arg)
799807 /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
800808 PG_TRY ();
801809 {
802- doc = xml_parse ((text * ) arg , XMLOPTION_DOCUMENT , true, NULL );
810+ doc = xml_parse ((text * ) arg , XMLOPTION_DOCUMENT , true,
811+ GetDatabaseEncoding ());
803812 result = true;
804813 }
805814 PG_CATCH ();
@@ -1152,7 +1161,7 @@ print_xml_decl(StringInfo buf, const xmlChar *version,
11521161 */
11531162static xmlDocPtr
11541163xml_parse (text * data , XmlOptionType xmloption_arg , bool preserve_whitespace ,
1155- xmlChar * encoding )
1164+ int encoding )
11561165{
11571166 int32 len ;
11581167 xmlChar * string ;
@@ -1165,9 +1174,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
11651174
11661175 utf8string = pg_do_encoding_conversion (string ,
11671176 len ,
1168- encoding ?
1169- xmlChar_to_encoding (encoding ) :
1170- GetDatabaseEncoding (),
1177+ encoding ,
11711178 PG_UTF8 );
11721179
11731180 /* Start up libxml and its parser (no-ops if already done) */
0 commit comments