4

I'm parsing the xml file with TSQL to build a table for further analysis. Using great advises from xquery-lab-61-writing-a-recursive-cte-to-process-an-xml-document I use CTE but not getting the desire result. The problem is with value() function for nodes with childrens.

I have

DECLARE @x XML  
SELECT @x = '
<books>
    <book id="101">
        <title>my book</title>
        <author>Myself</author>
    </book>
    <book id="202">
        text before
          <title>your book</title>
        in the middle
          <author>you</author>
        text after
    </book>
</books>'

;WITH cte AS ( 
    SELECT 
        1 AS lvl, 
        x.value('local-name(.)','VARCHAR(MAX)') AS FullPath, 
        x.value('text()[1]','VARCHAR(MAX)') AS Value, 
        x.query('.') AS CurrentNode,        
        CAST(CAST(1 AS VARBINARY(4)) AS VARBINARY(MAX)) AS Sort
    FROM @x.nodes('/*') a(x) 
    UNION ALL 
    SELECT 
        p.lvl + 1 AS lvl, 
        CAST( 
            p.FullPath 
            + '/' 
            + c.value('local-name(.)','VARCHAR(MAX)') AS VARCHAR(MAX) 
        ) AS FullPath, 
        CAST( c.value('text()[1]','VARCHAR(MAX)') AS VARCHAR(MAX) ) AS Value, 
        c.query('.')  AS CurrentNode,        
        CAST( 
            p.Sort 
            + CAST( (lvl + 1) * 1024 
            + (ROW_NUMBER() OVER(ORDER BY (SELECT 1)) * 2) AS VARBINARY(4) 
        ) AS VARBINARY(MAX) ) AS Sort
    FROM cte p 
    CROSS APPLY CurrentNode.nodes('/*/*') b(c)        
), cte2 AS (
    SELECT 
        FullPath, 
        Value, 
        Sort 
    FROM cte 
    UNION ALL 
    SELECT 
        p.FullPath + '/@' + x.value('local-name(.)','VARCHAR(MAX)'), 
        x.value('.','VARCHAR(MAX)'),
        Sort 
    FROM cte p 
    CROSS APPLY CurrentNode.nodes('/*/@*') a(x) 
)
SELECT FullPath, value 
FROM cte2
WHERE Value IS NOT NULL
ORDER BY Sort 

which results in

FullPath             Value
-------------------- ------------------------------
books\book\@id       101
books\book\title     my book
books\book\author    Myself
books\book           text before 
books\book\@id       202
books\book\title     your book
books\book\author    you

and I need something like:

FullPath             Value
-------------------- ------------------------------
books\book\@id       101
books\book\title     my book
books\book\author    Myself
books\book           text before 
books\book\@id       202
books\book\title     your book
books\book           in the middle
books\book\author    you
books\book           text after

I would prefer to find a solution using TSQL if possible. I will be extremely grateful for any good solutions/advises.

1 Answer 1

2

This is easier done with OPENXML instead of using the XML datatype.

With OPENXML you can create an edge table that has one row for each node in the XML.

declare @idoc int;
exec sp_xml_preparedocument @idoc out, @x;

select *
from openxml(@idoc, '')

exec sp_xml_removedocument @idoc;

Result:

id  parentid nodetype localname prefix namespaceuri datatype prev text
--- ----------------- --------- ------ ------------ -------- ---- -------------
0   NULL     1        books     NULL   NULL         NULL     NULL NULL
2   0        1        book      NULL   NULL         NULL     NULL NULL
3   2        2        id        NULL   NULL         NULL     NULL NULL
13  3        3        #text     NULL   NULL         NULL     NULL 101
4   2        1        title     NULL   NULL         NULL     NULL NULL
14  4        3        #text     NULL   NULL         NULL     NULL my book
5   2        1        author    NULL   NULL         NULL     4    NULL
15  5        3        #text     NULL   NULL         NULL     NULL Myself
6   0        1        book      NULL   NULL         NULL     2    NULL
7   6        2        id        NULL   NULL         NULL     NULL NULL
16  7        3        #text     NULL   NULL         NULL     NULL 202
8   6        3        #text     NULL   NULL         NULL     NULL text before
9   6        1        title     NULL   NULL         NULL     8    NULL
17  9        3        #text     NULL   NULL         NULL     NULL your book
10  6        3        #text     NULL   NULL         NULL     9    in the middle
11  6        1        author    NULL   NULL         NULL     10   NULL
18  11       3        #text     NULL   NULL         NULL     NULL you
12  6        3        #text     NULL   NULL         NULL     11   text after

Store the edge table in a temporary table and do a recursive CTE using id and parentid. Use nodetype when you build the FullPath column.

declare @x xml;  
select @x = '
<books>
    <book id="101">
        <title>my book</title>
        <author>Myself</author>
    </book>
    <book id="202">
        text before
          <title>your book</title>
        in the middle
          <author>you</author>
        text after
    </book>
</books>';

declare @idoc int;
exec sp_xml_preparedocument @idoc out, @x;

select *
into #T
from openxml(@idoc, '');

exec sp_xml_removedocument @idoc;

with C as
(
  select T.id,
         T.parentid,
         T.localname as FullPath,
         T.text as Value
  from #T as T
  where T.parentid is null
  union all
  select T.id,
         T.parentid,
         C.FullPath + case T.nodetype 
                        when 1 then  N'\' + T.localname  -- Element node
                        when 2 then  N'\@' + T.localname -- Attribute node
                        when 3 then  N''                 -- Text node
                        when 4 then  N''                 -- CDATA secotion node
                        when 5 then  N''                 -- Entity reference node
                        when 6 then  N''                 -- Entity node
                        when 7 then  N''                 -- Processing instrution node
                        when 8 then  N''                 -- Comment node
                        when 9 then  N''                 -- Document node
                        when 10 then N''                 -- Document type node
                        when 11 then N''                 -- Document fragment node
                        when 12 then N''                 -- Notation node
                      end,
         T.text
  from C
    inner join #T as T
      on C.id = T.parentid
)
select C.FullPath,
       C.Value
from C
where C.Value is not null
order by C.parentid, 
         C.id;

drop table #T;

Result:

FullPath           Value
------------------ --------------
books\book\@id     101
books\book\title   my book
books\book\author  Myself
books\book         text before
books\book         in the middle
books\book         text after
books\book\@id     202
books\book\title   your book
books\book\author  you

SQL Fiddle

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.