XSLT to group and remove duplicate at same time

43 Views Asked by At

Below is my input XML. I want to group below XML based on username and also remove any duplicate in HierarchyLocation

Below is my input XML

<data>
  <UploadUser>
    <User>
      <EmailAddress>test1.com</EmailAddress>
      <Username>User1</Username>
      <Replace>True</Replace>
    </User>
    <HierarchyLocations>
      <Hierarchy>Hierarchy</Hierarchy>
      <UserGroup>UserGroup1</UserGroup>
      <Location>43737</Location>
    </HierarchyLocations>
  </UploadUser>
  <UploadUser>
    <User>
      <EmailAddress>test2.com</EmailAddress>
      <Username>User2</Username>
      <Replace>True</Replace>
    </User>
    <HierarchyLocations>
      <Hierarchy>Hierarchy</Hierarchy>
      <UserGroup>UserGroup2</UserGroup>
      <Location>43737</Location>
    </HierarchyLocations>
  </UploadUser>
  <UploadUser>
    <User>
      <EmailAddress>test2.com</EmailAddress>
      <Username>User2</Username>
      <Replace>True</Replace>
    </User>
    <HierarchyLocations>
      <Hierarchy>Hierarchy</Hierarchy>
      <UserGroup>UserGroup2</UserGroup>
      <Location>43737</Location>
    </HierarchyLocations>
  </UploadUser>
    <UploadUser>
    <User>
      <EmailAddress>test2.com</EmailAddress>
      <Username>User2</Username>
      <Replace>True</Replace>
    </User>
    <HierarchyLocations>
      <Hierarchy>Hierarchy</Hierarchy>
      <UserGroup>UserGroup3</UserGroup>
      <Location>5230</Location>
    </HierarchyLocations>
  </UploadUser>
  </data>

Below is my expected output

<data>
  <UploadUser>
    <User>
      <EmailAddress>test1.com</EmailAddress>
      <Username>User1</Username>
      <Hierarchies>
        <Replace>True</Replace>
        <HierarchyLocations>
          <Hierarchy>Hierarchy</Hierarchy>
          <UserGroup>UserGroup1</UserGroup>
          <Location>43737</Location>
        </HierarchyLocations>
      </Hierarchies>
    </User>
    <User>
      <EmailAddress>test2.com</EmailAddress>
      <Username>User2</Username>
      <Hierarchies>
        <Replace>True</Replace>
        <HierarchyLocations>
          <Hierarchy>Hierarchy</Hierarchy>
          <UserGroup>UserGroup2</UserGroup>
          <Location>43737</Location>
        </HierarchyLocations>
        <HierarchyLocations>
          <Hierarchy>Hierarchy</Hierarchy>
          <UserGroup>UserGroup3</UserGroup>
          <Location>5230</Location>
        </HierarchyLocations>
      </Hierarchies>
    </User>
  </UploadUser>
</data>

I tried below XSLT but i was not able to remove the duplicate HierarchyLocations.

<xsl:stylesheet version="1.0" 
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="data">
    <xsl:copy>
        <UploadUser>
        <xsl:for-each-group select="UploadUser" group-by="User/Username">
            <User>
                <xsl:apply-templates select="current-group()[1]/User/EmailAddress" />
                <xsl:apply-templates select="current-group()[1]/User/Username" />
                <Hierarchies>
                <xsl:apply-templates select="current-group()[1]/User/Replace" />   
                    <xsl:for-each select="current-group()">
                        <xsl:apply-templates select="HierarchyLocations[not(Location=preceding-sibling::HierarchyLocations/Location)]" />
                    </xsl:for-each>
                </Hierarchies>
            </User>
        </xsl:for-each-group>
        </UploadUser>
    </xsl:copy>
</xsl:template>

<xsl:template match="@* | node()">
    <xsl:copy>
        <xsl:apply-templates select="@* | node()" />
    </xsl:copy>
</xsl:template>
</xsl:stylesheet>

Output with above XSLT. Hierarchy location is having duplicate for username "user2" which i need to remove comparing all the 3 fields(Hierarchy,usergroup & location). I tried not copy it by using "preceding-sibling" but still its copied.

<data>
  <UploadUser>
    <User>
      <EmailAddress>test1.com</EmailAddress>
      <Username>User1</Username>
      <Hierarchies>
        <Replace>True</Replace>
        <HierarchyLocations>
          <Hierarchy>Hierarchy</Hierarchy>
          <UserGroup>UserGroup1</UserGroup>
          <Location>43737</Location>
        </HierarchyLocations>
      </Hierarchies>
    </User>
    <User>
      <EmailAddress>test2.com</EmailAddress>
      <Username>User2</Username>
      <Hierarchies>
        <Replace>True</Replace>
        <HierarchyLocations>
          <Hierarchy>Hierarchy</Hierarchy>
          <UserGroup>UserGroup2</UserGroup>
          <Location>43737</Location>
        </HierarchyLocations>
        <HierarchyLocations>
          <Hierarchy>Hierarchy</Hierarchy>
          <UserGroup>UserGroup2</UserGroup>
          <Location>43737</Location>
        </HierarchyLocations>
        <HierarchyLocations>
          <Hierarchy>Hierarchy</Hierarchy>
          <UserGroup>UserGroup3</UserGroup>
          <Location>5230</Location>
        </HierarchyLocations>
      </Hierarchies>
    </User>
  </UploadUser>
</data>
1

There are 1 best solutions below

0
michael.hor257k On BEST ANSWER

Just do another grouping on the current group:

XSLT 2.0

<xsl:stylesheet version="2.0" 
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>

<xsl:template match="data">
    <xsl:copy>
        <UploadUser>
            <xsl:for-each-group select="UploadUser" group-by="User/Username">
                <User>
                    <xsl:copy-of select="User/(EmailAddress|Username)"/>
                    <Hierarchies>
                        <xsl:copy-of select="User/Replace"/>
                        <xsl:for-each-group select="current-group()/HierarchyLocations" group-by="string-join((Hierarchy, UserGroup, Location), '|')">
                            <xsl:copy-of select="."/>
                        </xsl:for-each-group>
                </Hierarchies>
            </User>
        </xsl:for-each-group>
        </UploadUser>
    </xsl:copy>
</xsl:template>

</xsl:stylesheet>

Note that within xsl:for-each-group, the first item of the group is the context item - so you can get rid of those awkward current-group()[1]/something expressions.