0

XML:

<sample>
    <test>
        <Cell1>John</Cell1>
        <Cell2>A</Cell2>
        <Cell4>xy</Cell4>
    </test>
    <test>
        <Cell1>John</Cell1>
        <Cell2>B</Cell2>
        <Cell6>10</Cell6>
    </test>
    <test>
        <Cell1>John,Jade</Cell1>
        <Cell2>A,Y</Cell2>
        <Cell4>1</Cell4>
    </test>
    <test>
        <Cell1>John,Jade</Cell1>
        <Cell2>A C,X</Cell2>
    </test>
    <test>
        <Cell1>John,Jade</Cell1>
        <Cell2>C D,Y</Cell2>
    </test>
    <test>
        <Cell1>John</Cell1>
        <Cell2>A B</Cell2>
        <Cell4>xy</Cell4>
    </test>
</sample>

XSLT:

<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xs="http://www.w3.org/2001/XMLSchema" exclude-result-prefixes="xs">
    <xsl:output method="xml" encoding="UTF-8" indent="no"/>
    <xsl:template match="/">
        <xsl:apply-templates select="sample"/>
    </xsl:template>
    <xsl:template match="sample">
        <xsl:variable name="atomictest">
            <!--Store the test containing only one value in cell2-->
            <xsl:copy-of select="test[not(contains(Cell2,',')) or not(contains(Cell2,' '))]"/>
        </xsl:variable>
        <xsl:variable name="copy">
            <xsl:apply-templates select="test">
                <xsl:with-param name="atomictest" select="$atomictest"/>
            </xsl:apply-templates>
        </xsl:variable>
    </xsl:template>
    <xsl:template match="test">
        <xsl:param name="atomictest"/>
        <xsl:choose>
            <xsl:when test="contains(Cell2,',')">
                <xsl:variable name="Cell1">
                    <xsl:copy-of select="Cell1"/>
                </xsl:variable>
                <!-- tokenize cell2 based on comma -->
                <xsl:for-each select="tokenize(Cell2,',')">
                    <xsl:variable name="str">
                        <xsl:value-of select="."/>
                    </xsl:variable>
                    <xsl:variable name="pos">
                        <xsl:value-of select="position()"/>
                    </xsl:variable>
                    <xsl:choose>
                        <!-- If cell2 contains space -->
                        <xsl:when test="contains(.,' ')">
                            <!-- tokenize cell2 based on comma -->
                            <xsl:for-each select="tokenize(.,' ')">
                                <xsl:variable name="str">
                                    <xsl:value-of select="."/>
                                </xsl:variable>
                                <!-- if cell2 value not contained in the atomic collected -->
                                <xsl:if test="not($atomictest/test[normalize-space(Cell2/text())=normalize-space($str)])">
                                    <!--Store Cell2 value -->
                                    <xsl:variable name="Cell2">
                                        <xsl:value-of select="."/>
                                    </xsl:variable>
                                    <!-- tokenize cell1-->
                                    <xsl:for-each select="tokenize($Cell1/Cell1,',')">
                                        <xsl:if test="position()=$pos">
                                            <test>
                                                <Cell1>
                                                    <xsl:value-of select="."/>
                                                </Cell1>
                                                <Cell2>
                                                    <xsl:value-of select="$Cell2"/>
                                                </Cell2>
                                            </test>
                                        </xsl:if>
                                    </xsl:for-each>
                                </xsl:if>
                            </xsl:for-each>
                        </xsl:when>
                        <xsl:otherwise>
                            <!-- if cell2 doesnot contains space -->
                            <xsl:if test="not($atomictest/test[normalize-space(Cell2/text())=normalize-space($str)])">
                                <xsl:variable name="Cell2">
                                    <xsl:value-of select="."/>
                                </xsl:variable>
                                <xsl:for-each select="tokenize($Cell1/Cell1,',')">
                                    <xsl:if test="position()=$pos">
                                        <test>
                                            <Cell1>
                                                <xsl:value-of select="."/>
                                            </Cell1>
                                            <Cell2>
                                                <xsl:value-of select="$Cell2"/>
                                            </Cell2>
                                        </test>
                                    </xsl:if>
                                </xsl:for-each>
                            </xsl:if>
                        </xsl:otherwise>
                    </xsl:choose>
                </xsl:for-each>
            </xsl:when>
            <xsl:when test="contains(Cell2,' ')">
                <xsl:variable name="Cell1">
                    <xsl:copy-of select="Cell1"/>
                </xsl:variable>
                <!-- tokenize cell2 based on space or comma -->
                <xsl:for-each select="tokenize(Cell2,' ')">
                    <xsl:variable name="str">
                        <xsl:value-of select="."/>
                    </xsl:variable>
                    <xsl:variable name="pos">
                        <xsl:value-of select="position()"/>
                    </xsl:variable>
                    <!-- if cell2 value not contained in the atomic rows collected -->
                    <xsl:if test="not($atomictest/test[normalize-space(Cell2/text())=normalize-space($str)])">
                        <xsl:if test="position()=$pos">
                            <test>
                                <Cell1>
                                    <xsl:value-of select="$Cell1"/>
                                </Cell1>
                                <Cell2>
                                    <xsl:value-of select="$str"/>
                                </Cell2>
                            </test>
                        </xsl:if>
                    </xsl:if>
                </xsl:for-each>
            </xsl:when>
            <xsl:otherwise>
                <test>
                    <Cell1>
                        <xsl:value-of select="Cell1"/>
                    </Cell1>
                    <Cell2>
                        <xsl:value-of select="Cell2"/>
                    </Cell2>
                </test>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>
</xsl:stylesheet>
  1. I have stored the cell2 that contains a single value in atomictest variable
  2. Check if Cell2 contains comma. if true tokenize Cell2 based on comma and check if the tokenized Cell2 value is there in atomic test -> if no then add Cell2 and Cell1 value to the output
  3. I would like to update the newly added Cell1 and Cell2 values in the output to the atomictest variable so that if I come through the same Cell2 value the next time I need to skip it. How to do this??

The output which I get:

<test>
    <Cell1>John</Cell1>
    <Cell2>A</Cell2>
</test>
<test>
    <Cell1>John</Cell1>
    <Cell2>B</Cell2>
</test>
<test>
    <Cell1>Jade</Cell1>
    <Cell2>Y</Cell2>
</test>
<test>
    <Cell1>John</Cell1>
    <Cell2>C</Cell2>
</test>
<test>
    <Cell1>Jade</Cell1>
    <Cell2>X</Cell2>
</test>
<test>
    <Cell1>John</Cell1>
    <Cell2>C</Cell2>
</test>
<test>
    <Cell1>John</Cell1>
    <Cell2>D</Cell2>
</test>
<test>
    <Cell1>Jade</Cell1>
    <Cell2>Y</Cell2>
</test>

Resulting output should look like the following:

<test>
        <Cell1>John</Cell1>
        <Cell2>A</Cell2>
    </test>
    <test>
        <Cell1>John</Cell1>
        <Cell2>B</Cell2>
    </test>
    <test>
        <Cell1>Jade</Cell1>
        <Cell2>Y</Cell2>
    </test>
    <test>
        <Cell1>John</Cell1>
        <Cell2>C</Cell2>
    </test>
    <test>
        <Cell1>Jade</Cell1>
        <Cell2>X</Cell2>
    </test>
    <test>
        <Cell1>John</Cell1>
        <Cell2>D</Cell2>
    </test>
4
  • 1
    XSLT is a functional language. Among other things, this means that variables cannot be updated. Libraries of XSLT templates/functions exist that solve very challenging tasks with amazing simplicity, that are probably simpler, more understandable and maintainable and not less efficient than respective imperative language solutions. The result you want to produce can be generated with a fully functional code, not updating a variable. But please, provide a short example so that people would have time to read it and to work on it. Commented Oct 1, 2012 at 12:42
  • Should the last expected elements be Jimmy/C, Jimmy/B and then John/B, instead of Jimmy/D ? Can you look again? You listed expected output doesn't make sense if "D" is included. Where did "D" come from? It is not even in the input document. Commented Oct 1, 2012 at 13:36
  • Ok. Getting better. But why is John/B excluded from your expected output? Commented Oct 1, 2012 at 13:47
  • forgot to update the output.Done it now! Commented Oct 1, 2012 at 14:08

2 Answers 2

1

This XSLT 2.0 style-sheet...

<xsl:stylesheet version="2.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:temp="http://stackoverflow.com/questions/12673307"
  exclude-result-prefixes="xsl temp">
<xsl:output omit-xml-declaration="yes" indent="yes" />
<xsl:strip-space elements="*" />  

<xsl:variable name="phase-1-output">
  <temp:tests>
    <xsl:apply-templates select="/*/test" mode="phase-1" />
  </temp:tests>
</xsl:variable>

<xsl:variable name="phase-2-output">
  <xsl:apply-templates select="$phase-1-output" mode="phase-2" />
</xsl:variable>

<xsl:template match="/">
 <xsl:copy-of select="$phase-2-output"/>
</xsl:template>

<xsl:template match="*" mode="phase-1" />

<xsl:template match="test[Cell1!=''][Cell2!='']" mode="phase-1">
  <xsl:variable name="cell2" select="tokenize(Cell2,',')" />
  <xsl:for-each select="tokenize(Cell1,',')" >
    <xsl:variable name="cell1-pos" select="position()" />
    <xsl:variable name="cell1" select="." />
    <xsl:for-each select="tokenize($cell2[$cell1-pos],' ')">
      <temp:test>
        <temp:Cell1><xsl:value-of select="$cell1" /></temp:Cell1>
        <temp:Cell2><xsl:value-of select="." /></temp:Cell2>
      </temp:test>
    </xsl:for-each>
  </xsl:for-each>
</xsl:template>

<xsl:template match="temp:tests" mode="phase-2">
  <xsl:for-each-group select="temp:test" group-by="concat(temp:Cell1,'|',temp:Cell2)">
    <test>
      <Cell1><xsl:value-of select="substring-before(current-grouping-key(),'|')" /></Cell1>
      <Cell2><xsl:value-of select="substring-after(current-grouping-key(),'|')" /></Cell2>
    </test>
  </xsl:for-each-group>
</xsl:template>

</xsl:stylesheet>

...will transform this input...

<sample>
    <test>
        <Cell1>John</Cell1>
        <Cell2>A</Cell2>
        <Cell4>xy</Cell4>
    </test>
    <test>
        <Cell1>John</Cell1>
        <Cell2>B</Cell2>
        <Cell6>10</Cell6>
    </test>
    <test>
        <Cell1>John,Jade</Cell1>
        <Cell2>A,Y</Cell2>
        <Cell4>1</Cell4>
    </test>
    <test>
        <Cell1>John,Jade</Cell1>
        <Cell2>A C,X</Cell2>
    </test>
    <test>
        <Cell1>John,Jade</Cell1>
        <Cell2>C D,Y</Cell2>
    </test>
    <test>
        <Cell1>John</Cell1>
        <Cell2>A B</Cell2>
        <Cell4>xy</Cell4>
    </test>
</sample>

...into...

<test>
   <Cell1>John</Cell1>
   <Cell2>A</Cell2>
</test>
<test>
   <Cell1>John</Cell1>
   <Cell2>B</Cell2>
</test>
<test>
   <Cell1>Jade</Cell1>
   <Cell2>Y</Cell2>
</test>
<test>
   <Cell1>John</Cell1>
   <Cell2>C</Cell2>
</test>
<test>
   <Cell1>Jade</Cell1>
   <Cell2>X</Cell2>
</test>
<test>
   <Cell1>John</Cell1>
   <Cell2>D</Cell2>
</test>

Alternative solution

Here is an alternative single phase solution. It is simpler, but less adaptable.

<xsl:stylesheet version="2.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output omit-xml-declaration="yes" indent="yes" />
<xsl:strip-space elements="*" />  

<xsl:template match="/">
  <xsl:for-each select="
     distinct-values(
       for $t in /*/test,
           $p1 in 1 to  count( tokenize($t/Cell1,',')),
           $cell1 in           tokenize($t/Cell1,',')[$p1],
           $cell2 in tokenize( tokenize($t/Cell2,',')[$p1], ' ') return
               concat($cell1,'|',$cell2))">
    <test>
      <Cell1><xsl:value-of select="substring-before(.,'|')" /></Cell1>
      <Cell2><xsl:value-of select="substring-after( .,'|')" /></Cell2>
    </test>
  </xsl:for-each>  
</xsl:template>

</xsl:stylesheet>

Note

Both solutions rely on the following assumptions:

  1. Both Cell1 and Cell2 have the same count of commas.
  2. Cell1 will never contain the pipe ('|') character.
Sign up to request clarification or add additional context in comments.

Comments

1

Variables are read-only in XSLT. That is, you can aasign them only once. After that they are read-only.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.