77 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88 * Portions Copyright (c) 1994, Regents of the University of California
99 *
10- * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.35 2007/03/15 23:12:06 tgl Exp $
10+ * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.36 2007/03/22 20:14:58 momjian Exp $
1111 *
1212 *-------------------------------------------------------------------------
1313 */
4747#include <libxml/uri.h>
4848#include <libxml/xmlerror.h>
4949#include <libxml/xmlwriter.h>
50+ #include <libxml/xpath.h>
51+ #include <libxml/xpathInternals.h>
5052#endif /* USE_LIBXML */
5153
5254#include "catalog/namespace.h"
6769#include "utils/datetime.h"
6870#include "utils/lsyscache.h"
6971#include "utils/memutils.h"
72+ #include "access/tupmacs.h"
7073#include "utils/xml.h"
7174
7275
@@ -88,6 +91,7 @@ static xmlChar *xml_text2xmlChar(text *in);
8891static int parse_xml_decl (const xmlChar * str , size_t * lenp , xmlChar * * version , xmlChar * * encoding , int * standalone );
8992static bool print_xml_decl (StringInfo buf , const xmlChar * version , pg_enc encoding , int standalone );
9093static xmlDocPtr xml_parse (text * data , XmlOptionType xmloption_arg , bool preserve_whitespace , xmlChar * encoding );
94+ static text * xml_xmlnodetotext (xmlNodePtr cur );
9195
9296#endif /* USE_LIBXML */
9397
@@ -1463,7 +1467,6 @@ map_xml_name_to_sql_identifier(char *name)
14631467 return buf .data ;
14641468}
14651469
1466-
14671470/*
14681471 * Map SQL value to XML value; see SQL/XML:2003 section 9.16.
14691472 */
@@ -2403,3 +2406,247 @@ SPI_sql_row_to_xmlelement(int rownum, StringInfo result, char *tablename, bool n
24032406 else
24042407 appendStringInfoString (result , "</row>\n\n" );
24052408}
2409+
2410+
2411+ /*
2412+ * XPath related functions
2413+ */
2414+
2415+ #ifdef USE_LIBXML
2416+ /*
2417+ * Convert XML node to text (return value only, it's not dumping)
2418+ */
2419+ text *
2420+ xml_xmlnodetotext (xmlNodePtr cur )
2421+ {
2422+ xmlChar * str ;
2423+ text * result ;
2424+ size_t len ;
2425+
2426+ str = xmlXPathCastNodeToString (cur );
2427+ len = strlen ((char * ) str );
2428+ result = (text * ) palloc (len + VARHDRSZ );
2429+ SET_VARSIZE (result , len + VARHDRSZ );
2430+ memcpy (VARDATA (result ), str , len );
2431+
2432+ return result ;
2433+ }
2434+ #endif
2435+
2436+ /*
2437+ * Evaluate XPath expression and return array of XML values.
2438+ * As we have no support of XQuery sequences yet, this functions seems
2439+ * to be the most useful one (array of XML functions plays a role of
2440+ * some kind of substritution for XQuery sequences).
2441+
2442+ * Workaround here: we parse XML data in different way to allow XPath for
2443+ * fragments (see "XPath for fragment" TODO comment inside).
2444+ */
2445+ Datum
2446+ xmlpath (PG_FUNCTION_ARGS )
2447+ {
2448+ #ifdef USE_LIBXML
2449+ ArrayBuildState * astate = NULL ;
2450+ xmlParserCtxtPtr ctxt = NULL ;
2451+ xmlDocPtr doc = NULL ;
2452+ xmlXPathContextPtr xpathctx = NULL ;
2453+ xmlXPathCompExprPtr xpathcomp = NULL ;
2454+ xmlXPathObjectPtr xpathobj = NULL ;
2455+ int32 len , xpath_len ;
2456+ xmlChar * string , * xpath_expr ;
2457+ bool res_is_null = FALSE;
2458+ int i ;
2459+ xmltype * data ;
2460+ text * xpath_expr_text ;
2461+ ArrayType * namespaces ;
2462+ int * dims , ndims , ns_count = 0 , bitmask = 1 ;
2463+ char * ptr ;
2464+ bits8 * bitmap ;
2465+ char * * ns_names = NULL , * * ns_uris = NULL ;
2466+ int16 typlen ;
2467+ bool typbyval ;
2468+ char typalign ;
2469+
2470+ /* the function is not strict, we must check first two args */
2471+ if (PG_ARGISNULL (0 ) || PG_ARGISNULL (1 ))
2472+ PG_RETURN_NULL ();
2473+
2474+ xpath_expr_text = PG_GETARG_TEXT_P (0 );
2475+ data = PG_GETARG_XML_P (1 );
2476+
2477+ /* Namespace mappings passed as text[].
2478+ * Assume that 2-dimensional array has been passed,
2479+ * the 1st subarray is array of names, the 2nd -- array of URIs,
2480+ * example: ARRAY[ARRAY['myns', 'myns2'], ARRAY['http://example.com', 'http://example2.com']].
2481+ */
2482+ if (!PG_ARGISNULL (2 ))
2483+ {
2484+ namespaces = PG_GETARG_ARRAYTYPE_P (2 );
2485+ ndims = ARR_NDIM (namespaces );
2486+ dims = ARR_DIMS (namespaces );
2487+
2488+ /* Sanity check */
2489+ if (ndims != 2 )
2490+ ereport (ERROR , (errmsg ("invalid array passed for namespace mappings" ),
2491+ errdetail ("Only 2-dimensional array may be used for namespace mappings." )));
2492+
2493+ Assert (ARR_ELEMTYPE (namespaces ) == TEXTOID );
2494+
2495+ ns_count = ArrayGetNItems (ndims , dims ) / 2 ;
2496+ get_typlenbyvalalign (ARR_ELEMTYPE (namespaces ),
2497+ & typlen , & typbyval , & typalign );
2498+ ns_names = (char * * ) palloc (ns_count * sizeof (char * ));
2499+ ns_uris = (char * * ) palloc (ns_count * sizeof (char * ));
2500+ ptr = ARR_DATA_PTR (namespaces );
2501+ bitmap = ARR_NULLBITMAP (namespaces );
2502+ bitmask = 1 ;
2503+
2504+ for (i = 0 ; i < ns_count * 2 ; i ++ )
2505+ {
2506+ if (bitmap && (* bitmap & bitmask ) == 0 )
2507+ ereport (ERROR , (errmsg ("neither namespace nor URI may be NULL" ))); /* TODO: better message */
2508+ else
2509+ {
2510+ if (i < ns_count )
2511+ ns_names [i ] = DatumGetCString (DirectFunctionCall1 (textout ,
2512+ PointerGetDatum (ptr )));
2513+ else
2514+ ns_uris [i - ns_count ] = DatumGetCString (DirectFunctionCall1 (textout ,
2515+ PointerGetDatum (ptr )));
2516+ ptr = att_addlength (ptr , typlen , PointerGetDatum (ptr ));
2517+ ptr = (char * ) att_align (ptr , typalign );
2518+ }
2519+
2520+ /* advance bitmap pointer if any */
2521+ if (bitmap )
2522+ {
2523+ bitmask <<= 1 ;
2524+ if (bitmask == 0x100 )
2525+ {
2526+ bitmap ++ ;
2527+ bitmask = 1 ;
2528+ }
2529+ }
2530+ }
2531+ }
2532+
2533+ len = VARSIZE (data ) - VARHDRSZ ;
2534+ xpath_len = VARSIZE (xpath_expr_text ) - VARHDRSZ ;
2535+ if (xpath_len == 0 )
2536+ ereport (ERROR , (errmsg ("empty XPath expression" )));
2537+
2538+ if (xmlStrncmp ((xmlChar * ) VARDATA (data ), (xmlChar * ) "<?xml" , 5 ) == 0 )
2539+ {
2540+ string = palloc (len + 1 );
2541+ memcpy (string , VARDATA (data ), len );
2542+ string [len ] = '\0' ;
2543+ xpath_expr = palloc (xpath_len + 1 );
2544+ memcpy (xpath_expr , VARDATA (xpath_expr_text ), xpath_len );
2545+ xpath_expr [xpath_len ] = '\0' ;
2546+ }
2547+ else
2548+ {
2549+ /* use "<x>...</x>" as dummy root element to enable XPath for fragments */
2550+ /* TODO: (XPath for fragment) find better solution to work with XML fragment! */
2551+ string = xmlStrncatNew ((xmlChar * ) "<x>" , (xmlChar * ) VARDATA (data ), len );
2552+ string = xmlStrncat (string , (xmlChar * ) "</x>" , 5 );
2553+ len += 7 ;
2554+ xpath_expr = xmlStrncatNew ((xmlChar * ) "/x" , (xmlChar * ) VARDATA (xpath_expr_text ), xpath_len );
2555+ len += 2 ;
2556+ }
2557+
2558+ xml_init ();
2559+
2560+ PG_TRY ();
2561+ {
2562+ /* redundant XML parsing (two parsings for the same value in the same session are possible) */
2563+ ctxt = xmlNewParserCtxt ();
2564+ if (ctxt == NULL )
2565+ xml_ereport (ERROR , ERRCODE_INTERNAL_ERROR ,
2566+ "could not allocate parser context" );
2567+ doc = xmlCtxtReadMemory (ctxt , (char * ) string , len , NULL , NULL , 0 );
2568+ if (doc == NULL )
2569+ xml_ereport (ERROR , ERRCODE_INVALID_XML_DOCUMENT ,
2570+ "could not parse XML data" );
2571+ xpathctx = xmlXPathNewContext (doc );
2572+ if (xpathctx == NULL )
2573+ xml_ereport (ERROR , ERRCODE_INTERNAL_ERROR ,
2574+ "could not allocate XPath context" );
2575+ xpathctx -> node = xmlDocGetRootElement (doc );
2576+ if (xpathctx -> node == NULL )
2577+ xml_ereport (ERROR , ERRCODE_INTERNAL_ERROR ,
2578+ "could not find root XML element" );
2579+
2580+ /* register namespaces, if any */
2581+ if ((ns_count > 0 ) && ns_names && ns_uris )
2582+ for (i = 0 ; i < ns_count ; i ++ )
2583+ if (0 != xmlXPathRegisterNs (xpathctx , (xmlChar * ) ns_names [i ], (xmlChar * ) ns_uris [i ]))
2584+ ereport (ERROR ,
2585+ (errmsg ("could not register XML namespace with prefix=\"%s\" and href=\"%s\"" , ns_names [i ], ns_uris [i ])));
2586+
2587+ xpathcomp = xmlXPathCompile (xpath_expr );
2588+ if (xpathcomp == NULL )
2589+ xml_ereport (ERROR , ERRCODE_INTERNAL_ERROR ,
2590+ "invalid XPath expression" ); /* TODO: show proper XPath error details */
2591+
2592+ xpathobj = xmlXPathCompiledEval (xpathcomp , xpathctx );
2593+ xmlXPathFreeCompExpr (xpathcomp );
2594+ if (xpathobj == NULL )
2595+ ereport (ERROR , (errmsg ("could not create XPath object" )));
2596+
2597+ if (xpathobj -> nodesetval == NULL )
2598+ res_is_null = TRUE;
2599+
2600+ if (!res_is_null && xpathobj -> nodesetval -> nodeNr == 0 )
2601+ /* TODO maybe empty array should be here, not NULL? (if so -- fix segfault) */
2602+ /*PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));*/
2603+ res_is_null = TRUE;
2604+
2605+ if (!res_is_null )
2606+ for (i = 0 ; i < xpathobj -> nodesetval -> nodeNr ; i ++ )
2607+ {
2608+ Datum elem ;
2609+ bool elemisnull = false;
2610+ elem = PointerGetDatum (xml_xmlnodetotext (xpathobj -> nodesetval -> nodeTab [i ]));
2611+ astate = accumArrayResult (astate , elem ,
2612+ elemisnull , XMLOID ,
2613+ CurrentMemoryContext );
2614+ }
2615+
2616+ xmlXPathFreeObject (xpathobj );
2617+ xmlXPathFreeContext (xpathctx );
2618+ xmlFreeParserCtxt (ctxt );
2619+ xmlFreeDoc (doc );
2620+ xmlCleanupParser ();
2621+ }
2622+ PG_CATCH ();
2623+ {
2624+ if (xpathcomp )
2625+ xmlXPathFreeCompExpr (xpathcomp );
2626+ if (xpathobj )
2627+ xmlXPathFreeObject (xpathobj );
2628+ if (xpathctx )
2629+ xmlXPathFreeContext (xpathctx );
2630+ if (doc )
2631+ xmlFreeDoc (doc );
2632+ if (ctxt )
2633+ xmlFreeParserCtxt (ctxt );
2634+ xmlCleanupParser ();
2635+
2636+ PG_RE_THROW ();
2637+ }
2638+ PG_END_TRY ();
2639+
2640+ if (res_is_null )
2641+ {
2642+ PG_RETURN_NULL ();
2643+ }
2644+ else
2645+ {
2646+ PG_RETURN_ARRAYTYPE_P (makeArrayResult (astate , CurrentMemoryContext ));
2647+ }
2648+ #else
2649+ NO_XML_SUPPORT ();
2650+ return 0 ;
2651+ #endif
2652+ }
0 commit comments