I've been thinking again about flattening some metadata that is stored in a hierarchical structure in an XML file to a form that the XML micro service understands. The microservice effectively only takes in XML files with this type of data

<?xml version="1.0" encoding="UTF-8"?>
<metadata>
<AVU>
<Attribute>Alice</Attribute>
<Value>1</Value>
<Unit>Years Old</Unit>
</AVU>

Of course my metadata.xml doesn't fit this model, it is of the form.

<?xml version="1.0" encoding="UTF-8"?>
<Metadata xsi:schemaLocation="http://www.hpc-europaII.org/jra3 schema.xsd" 
xmlns="http://www.hpc-europaII.org/jra3" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
    <Source>
        <Experiment>
            <expUID>SOMEVALUE</expUID>
            <expURI></expURI>
        </Experiment>
        <Tools>
            <ShortName>
            UG
            </ShortName>
            <Version>0bcfb63f7c6c67cd954609c3a90a27b2</Version>
        </Tools>
        <Authors>
            <Name>
                <First>
Alice
</First>
                <Middle></Middle>
                <Last>
Bob
</Last>
            </Name>
            <Role>
            Group Leader
            </Role>
            <Address>
                <Street>Some Street</Street>
                <PostalCode>Some PostalCode</PostalCode>
                <City>Some City</City>
                <Country>Some Country</Country>
                <Phone>Some Number</Phone>
                <E-mail>Some E-mail Address</E-mail>
                <Web>Some Web Address</Web>
            </Address>
        </Authors>
        <Institutes>
            <ShortName>Some ShortName for a Lab</ShortName>
            <Address>
                <Street>Some Street</Street>
                <PostalCode>Some PostalCode</PostalCode>
                <City>Some City</City>
                <Country>Some Country</Country>
                <Phone>Some Number</Phone>
                <E-mail>Some E-mail Address</E-mail>
                <Web>Some Web Address</Web>
            </Address>
        </Institutes>
    </Source>
    <Science>
    </Science>
    <Results>
    </Results>
</Metadata>

I had a look at http://nwalsh.com/docs/tutorials/extreme04/, http://www.w3.org/TR/xslt20/ and the example posted at http://groups.google.com/group/irod-chat/browse_thread/thread/1a1669bf1e36c205/46a904d3e5d651fd?lnk=gst&q=irods+%26+ddi%3F#46a904d3e5d651fd

In the end I came up with this simple xsl file

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" 
                xmlns:jra3="http://www.hpc-europaII.org/jra3"
                xmlns:xsl="http://www.w3.org/1999/XSL/Transform" 
                xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
                exclude-result-prefixes="xsi jra3">

  <!-- output will be in XML with indents for ease of reading -->
  <xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>

  <!-- skip over first tag -->
  <!-- xsl:template match="jra3:Metadata" priority="2"/ -->

  <!-- strip out the white space at the top of the file -->
  <xsl:strip-space elements="*"/>

  <!-- puts <avu></avu> around the attribute-value-unit triplets -->
  <xsl:template match="/">
    <metadata>
      <xsl:text>&#xA;</xsl:text>
      <xsl:text>&#xA;</xsl:text>
      <xsl:apply-templates/>
    </metadata>
  </xsl:template>

  <!-- call the templates -->
  <xsl:template match="jra3:Source">
    <xsl:apply-templates select="jra3:Experiment//jra3:expUID"/>
    <xsl:apply-templates select="jra3:Experiment//jra3:expURI"/>
    <xsl:apply-templates select="jra3:Tools//jra3:ShortName"/>
    <xsl:apply-templates select="jra3:Tools//jra3:Version"/>
    <xsl:apply-templates select="jra3:Authors//jra3:Name//jra3:First"/>
    <xsl:apply-templates select="jra3:Authors//jra3:Name//jra3:Middle"/>
    <xsl:apply-templates select="jra3:Authors//jra3:Name//jra3:Last"/>
    <xsl:apply-templates select="jra3:Authors//jra3:Role"/>
    <xsl:apply-templates select="jra3:Authors//jra3:Address"/>
    <xsl:apply-templates select="jra3:Institutes//jra3:ShortName"/>
    <xsl:apply-templates select="jra3:Institutes//jra3:Address"/>
  </xsl:template>

  <!-- begin output -->
  <xsl:template match="jra3:Experiment//jra3:expUID">
    <AVU>
      <Attribute>Source/Experiment/expUID</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(.)"/>
      </Value>
      <Unit>
        <!-- xsl:value-of select="normalize=space(.)"/ -->
      </Unit>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>

  <xsl:template match="jra3:Experiment//jra3:expURI">
    <AVU>
      <Attribute>Source/Experiment/expURI</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(.)"/>
      </Value>
      <Unit/>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>

  <xsl:template match="jra3:Tools//jra3:Version">
    <AVU>
      <Attribute>Source/Tools/Version</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(.)"/>
      </Value>
      <Unit/>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>

  <xsl:template match="jra3:Tools//jra3:ShortName">
    <AVU>
      <Attribute>Source/Tools/ShortName</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(.)"/>
      </Value>
      <Unit/>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>

  <xsl:template match="jra3:Authors//jra3:Name//jra3:First">
    <AVU>
      <Attribute>Authors/Name/First</Attribute>
    </AVU>
    <xsl:apply-templates select="jra3:Authors//jra3:Name//jra3:First"/>
  </xsl:template>

  <!-- begin output -->
  <xsl:template match="jra3:Experiment//jra3:expUID">
    <AVU>
      <Attribute>Source/Experiment/expUID</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(.)"/>
      </Value>
      <Unit>
        <!-- xsl:value-of select="normalize=space(.)"/ -->
      </Unit>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>

  <xsl:template match="jra3:Experiment//jra3:expURI">
    <AVU>
      <Attribute>Source/Experiment/expURI</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(.)"/>
      </Value>
      <Unit/>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>

  <xsl:template match="jra3:Tools//jra3:Version">
    <AVU>
      <Attribute>Source/Tools/Version</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(.)"/>
      </Value>
      <Unit/>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>

  <xsl:template match="jra3:Tools//jra3:ShortName">
    <AVU>
      <Attribute>Source/Tools/ShortName</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(.)"/>
      </Value>
      <Unit/>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>

  <xsl:template match="jra3:Authors//jra3:Name//jra3:First">
    <AVU>
      <Attribute>Authors/Name/First</Attribute>
    </AVU>
    <xsl:apply-templates select="jra3:Authors//jra3:Name//jra3:First"/>
  </xsl:template>

  <!-- begin output -->
  <xsl:template match="jra3:Experiment//jra3:expUID">
    <AVU>
      <Attribute>Source/Experiment/expUID</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(.)"/>
      </Value>
      <Unit>
        <!-- xsl:value-of select="normalize=space(.)"/ -->
      </Unit>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>

  <xsl:template match="jra3:Experiment//jra3:expURI">
    <AVU>
      <Attribute>Source/Experiment/expURI</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(.)"/>
      </Value>
      <Unit/>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>

  <xsl:template match="jra3:Tools//jra3:Version">
    <AVU>
      <Attribute>Source/Tools/Version</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(.)"/>
      </Value>
      <Unit/>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>

  <xsl:template match="jra3:Tools//jra3:ShortName">
    <AVU>
      <Attribute>Source/Tools/ShortName</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(.)"/>
      </Value>
      <Unit/>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>

  <xsl:template match="jra3:Authors//jra3:Name//jra3:First">
    <AVU>
      <Attribute>Authors/Name/First</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(.)"/>
      </Value>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>

  <xsl:template match="jra3:Authors//jra3:Name//jra3:Middle">
    <AVU>
      <Attribute>Authors/Name/Middle</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(.)"/>
      </Value>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>

  <xsl:template match="jra3:Authors//jra3:Name//jra3:Last">
    <AVU>
      <Attribute>Authors/Name/Last</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(.)"/>
      </Value>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>

  <xsl:template match="jra3:Authors//jra3:Role">
    <AVU>
      <Attribute>Authors/Role</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(.)"/>
      </Value>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>

  <xsl:template match="jra3:Authors//jra3:Address">
    <AVU>
      <Attribute>Authors/Address</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(jra3:Street)"/>,
        <xsl:value-of select="normalize-space(jra3:PostalCode)"/>,
        <xsl:value-of select="normalize-space(jra3:City)"/>,
    <xsl:value-of select="normalize-space(jra3:Country)"/>,
        <xsl:value-of select="normalize-space(jra3:Phone)"/>,
        <xsl:value-of select="normalize-space(jra3:E-mail)"/>,
        <xsl:value-of select="normalize-space(jra3:Web)"/>.
      </Value>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>



  <xsl:template match="jra3:Institutes//jra3:ShortName">
    <AVU>
      <Attribute>Institutes/ShortName</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(.)"/>,
      </Value>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>


  <xsl:template match="jra3:Institutes//jra3:Address">
    <AVU>
      <Attribute>Institutes/Address</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(jra3:Street)"/>,
        <xsl:value-of select="normalize-space(jra3:PostalCode)"/>,
        <xsl:value-of select="normalize-space(jra3:City)"/>,
        <xsl:value-of select="normalize-space(jra3:Country)"/>,
        <xsl:value-of select="normalize-space(jra3:Phone)"/>,
        <xsl:value-of select="normalize-space(jra3:E-mail)"/>,
        <xsl:value-of select="normalize-space(jra3:Web)"/>.
      </Value>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>

<!--      
  <xsl:template match="jra3:Tools//jra3:">
    <AVU>
      <Attribute>Source/Tools</Attribute>
      <Value>
        <xsl:value-of select="normalize-space(.)"/>
      </Value>
      <Unit/>
    </AVU>
    <xsl:text>&#xA;</xsl:text>
    <xsl:text>&#xA;</xsl:text>
  </xsl:template>
-->

<!-- delete the rest -->
  <xsl:template match="jra3:Results"/>
  <xsl:template match="jra3:Science"/>

</xsl:stylesheet>

Running this xsl file on my metadata.xml file I get this type of output

<?xml version="1.0" encoding="UTF-8"?>
<metadata>

<AVU><Attribute>Source/Experiment/expUID</Attribute><Value>SOMEVALUE</Value><Unit/></AVU>

<AVU><Attribute>Source/Experiment/expURI</Attribute><Value></Value><Unit/></AVU>

<AVU><Attribute>Source/Tools/ShortName</Attribute><Value>UG</Value><Unit/></AVU>

<AVU><Attribute>Source/Tools/Version</Attribute><Value>0bcfb63f7c6c67cd954609c3a90a27b2</Value><Unit/></AVU>

<AVU><Attribute>Authors/Name/First</Attribute><Value>Alice</Value></AVU>

<AVU><Attribute>Authors/Name/Middle</Attribute><Value></Value></AVU>

<AVU><Attribute>Authors/Name/Last</Attribute><Value>Bob</Value></AVU>

<AVU><Attribute>Authors/Role</Attribute><Value>Group Leader</Value></AVU>

<AVU><Attribute>Authors/Address</Attribute><Value>Some Street,
    Some PostalCode,
    Some City,
    Some Number,
    Some E-mail Address,
    Some Web Address.
      </Value></AVU>

<AVU><Attribute>Institutes/ShortName</Attribute><Value>Some ShortName for a Lab,
      </Value></AVU>

<AVU><Attribute>Institutes/Address</Attribute><Value>Some Street,
    Some PostalCode,
    Some City,
    Some Country,
    Some Number,
    Some E-mail Address,
    Some Web Address.
      </Value></AVU>

</metadata>

This flattened output would allow the XML micro service to load it up and associate it as metadata to my collection of files. I think using the XSLT microservice to transform the data on the server side would be ideal.

It seems this is the most effective strategy for a number of iRODS users out there in the world.

Bookmark and Share