Question

I'm looking to replace all title elements with h1, h2, ... , h6 elements depending on how many ancestors are section elements. Example input/output:

Input.xml

<document>
    <section>
        <title>Title A</title>
        <section>
            <title>Title B</title>
        </section>
        <section>
            <title>Title C</title>
            <section>
                <title>Title D</title>
            </section>
        </section>
    </section>
</document>

Output.xml

<document>
    <section>
        <h1>Title A</h1>
        <section>
            <h2>Title B</h2>
        </section>
        <section>
            <h2>Title C</h2>
            <section>
                <h3>Title D</h3>
            </section>
        </section>
    </section>
</document>

I can replace all titles with h1s using something like this

swapTitles :: ArrowXml a => a XmlTree XmlTree
swapTitles = processTopDown $
             (changeQName . const $ mkName "h1")
             `when`
             (isElem >>> (hasQName $ mkName "title"))

I believe I should be using ArrowState, but I've not been able to figure out how. Can someone point me in the right direction?

Was it helpful?

Solution

Using XSL with package hxt-xslt. Standards make life easier :-)

{-# LANGUAGE Arrows, PackageImports #-} 

import System.Environment ( getArgs )
import System.Exit (exitSuccess, exitWith, ExitCode(..))

import Control.Arrow
import "hxt" Text.XML.HXT.Core 
import "hxt" Text.XML.HXT.DOM.XmlKeywords 
import "hxt-xslt" Text.XML.HXT.XSLT.XsltArrows
import "hxt" Text.XML.HXT.Arrow.XmlState.TraceHandling (withTraceLevel)

process :: String -> String -> IO [String]
process xslStylesheetPath xmlDocPath = do

    -- compile stylesheet

    compiledStyleSheetResults <- runX $ 
        arr (const xslStylesheetPath) 
        >>> readXSLTDoc [ withValidate yes, withInputEncoding utf8]   -- withTrace 2 
        >>> {- withTraceLevel 2 -} xsltCompileStylesheet

    case compiledStyleSheetResults of
         [] -> return ["error compiling " ++ xslStylesheetPath] 
         compiledStyleSheet : _ -> do

             -- apply compiled stylesheet to xml doc 

             runX $ arr (const xmlDocPath) 
                 >>> readXSLTDoc [ withValidate yes, withInputEncoding utf8] -- withTrace 2
                 >>> xsltApplyStylesheet compiledStyleSheet
                 >>> writeDocumentToString [withOutputEncoding utf8, 
                                            withXmlPi yes, withIndent yes]

-- readXSLTDoc from internals of module Text.XML.HXT.XSLT.XsltArrows

readXSLTDoc :: SysConfigList -> IOSArrow String XmlTree
readXSLTDoc options
    = readFromDocument (options ++ defaultOptions)
    where
    defaultOptions
        = [ withCheckNamespaces yes
          , withValidate        no
          , withPreserveComment no
          ]         

main = do
 args <- getArgs
 case args of 
   [arg1, arg2] -> do
       results <- process arg1 arg2
       case results of
            [] -> putStrLn "errors"
            result : _ -> putStrLn result

       exitSuccess

   _ -> do 
            putStrLn "missing parameters: xslStylesheetPath xmlDocPath"
            exitWith $ ExitFailure 1

with XSL file "mystyle.xsl"

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>

<xsl:template match="/">
  <xsl:for-each select="document">
    <xsl:copy>
      <xsl:call-template name="myloop">
        <xsl:with-param name="nesting" select="0"/>
      </xsl:call-template>
    </xsl:copy>
  </xsl:for-each>
</xsl:template>

<xsl:template name="myloop">
  <xsl:param name="nesting"/>

  <xsl:if test="title">
   <xsl:element name="{concat('h',string($nesting))}">
     <xsl:value-of select="title" /> 
   </xsl:element>
  </xsl:if> 

  <xsl:for-each select="section">
    <xsl:copy>
      <xsl:call-template name="myloop">
        <xsl:with-param name="nesting" select="$nesting+1"/>
      </xsl:call-template>
    </xsl:copy>
  </xsl:for-each>

</xsl:template>
</xsl:stylesheet>

with "yourdata.xml"

<?xml version="1.0" encoding="UTF-8"?>
<document>
    <section>
        <title>Title A</title>
        <section>
            <title>Title B</title>
        </section>
        <section>
            <title>Title C</title>
            <section>
                <title>Title D</title>
            </section>
        </section>
    </section>
</document>

running

runhaskell test.hs mystyle.xsl yourdata.xml

result:

<?xml version="1.0" encoding="UTF-8"?>
<document>
  <section>
    <h1>Title A</h1>
    <section>
      <h2>Title B</h2>
    </section>
    <section>
      <h2>Title C</h2>
      <section>
        <h3>Title D</h3>
      </section>
    </section>
  </section>
</document>
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top