rdftemplate

Library for generating XML documents from RDF data using templates
git clone https://code.djc.id.au/git/rdftemplate/
commit 24898296acd1a8a7a4142c755adaf33dda6bd374
parent 02acfcae740b6a35def527eb9d55d443c89f510e
Author: Dan Callaghan <djc@djc.id.au>
Date:   Sun, 27 Mar 2011 11:42:04 +1000

HTML compatibility mode for TemplateInterpolator

For now, this just strips out nested hyperlinks.

Diffstat:
Msrc/main/java/au/id/djc/rdftemplate/TemplateInterpolator.java | 16++++++++++++++--
Asrc/main/java/au/id/djc/rdftemplate/html/XHTMLEventConsumer.java | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/test/java/au/id/djc/rdftemplate/TemplateInterpolatorHtmlCompatibleUnitTest.java | 47+++++++++++++++++++++++++++++++++++++++++++++++
Asrc/test/resources/au/id/djc/rdftemplate/nested-anchors.xml | 9+++++++++
4 files changed, 137 insertions(+), 2 deletions(-)
diff --git a/src/main/java/au/id/djc/rdftemplate/TemplateInterpolator.java b/src/main/java/au/id/djc/rdftemplate/TemplateInterpolator.java
@@ -1,7 +1,7 @@
 package au.id.djc.rdftemplate;
 
-import java.io.InputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.Reader;
 import java.io.StringReader;
 import java.io.StringWriter;
@@ -36,6 +36,7 @@ import javax.xml.stream.util.XMLEventConsumer;
 import com.hp.hpl.jena.rdf.model.Literal;
 import com.hp.hpl.jena.rdf.model.RDFNode;
 
+import au.id.djc.rdftemplate.html.XHTMLEventConsumer;
 import au.id.djc.rdftemplate.selector.InvalidSelectorSyntaxException;
 import au.id.djc.rdftemplate.selector.Selector;
 import au.id.djc.rdftemplate.selector.SelectorFactory;
@@ -47,13 +48,22 @@ public class TemplateInterpolator {
     private final XMLInputFactory inputFactory = XMLInputFactory.newInstance();
     private final XMLOutputFactory outputFactory = XMLOutputFactory.newInstance();
     private final XMLEventFactory eventFactory = XMLEventFactory.newInstance();
-    
     private final SelectorFactory selectorFactory;
+    private final boolean htmlCompatible;
     
     public TemplateInterpolator(SelectorFactory selectorFactory) {
+        this(selectorFactory, false);
+    }
+    
+    public TemplateInterpolator(SelectorFactory selectorFactory, boolean htmlCompatible) {
         this.selectorFactory = selectorFactory;
         inputFactory.setProperty(XMLInputFactory.IS_COALESCING, true);
         outputFactory.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES, true);
+        this.htmlCompatible = htmlCompatible;
+    }
+    
+    public boolean isHtmlCompatible() {
+        return htmlCompatible;
     }
     
     public String interpolate(Reader reader, RDFNode node) {
@@ -115,6 +125,8 @@ public class TemplateInterpolator {
     
     public void interpolate(Iterator<XMLEvent> reader, RDFNode node, XMLEventConsumer writer)
             throws XMLStreamException {
+        if (htmlCompatible)
+            writer = new XHTMLEventConsumer(writer);
         while (reader.hasNext()) {
             XMLEvent event = reader.next();
             switch (event.getEventType()) {
diff --git a/src/main/java/au/id/djc/rdftemplate/html/XHTMLEventConsumer.java b/src/main/java/au/id/djc/rdftemplate/html/XHTMLEventConsumer.java
@@ -0,0 +1,67 @@
+package au.id.djc.rdftemplate.html;
+
+import javax.xml.namespace.QName;
+import javax.xml.stream.XMLStreamConstants;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.events.EndElement;
+import javax.xml.stream.events.StartElement;
+import javax.xml.stream.events.XMLEvent;
+import javax.xml.stream.util.XMLEventConsumer;
+
+/**
+ * {@link XMLEventConsumer} implementation which indirects to another event
+ * consumer, but munges the stream on the way through to adhere to the vagaries
+ * of (X)HTML.
+ * <p>
+ * Only elements in the XHTML namespace are modified, elements from other
+ * namespaces are passed through untouched.
+ * <p>
+ * The HTML-specific things currently handled by this class are:
+ * <ul>
+ * <li>Nested anchors are removed (they are
+ * <a href="http://www.w3.org/TR/html401/struct/links.html#h-12.2.2">illegal</a>)</li>
+ * </ul>
+ */
+// XXX do self-closing tags here too
+public class XHTMLEventConsumer implements XMLEventConsumer {
+    
+    private static final QName XHTML_A_QNAME = new QName("http://www.w3.org/1999/xhtml", "a");
+    
+    private final XMLEventConsumer delegate;
+    private int anchorNestDepth = 0;
+
+    public XHTMLEventConsumer(XMLEventConsumer delegate) {
+        this.delegate = delegate;
+    }
+
+    @Override
+    public void add(XMLEvent event) throws XMLStreamException {
+        switch (event.getEventType()) {
+            case XMLStreamConstants.START_ELEMENT: {
+                StartElement start = event.asStartElement();
+                if (XHTML_A_QNAME.equals(start.getName())) {
+                    anchorNestDepth ++;
+                    if (anchorNestDepth == 1)
+                        delegate.add(event);
+                } else {
+                    delegate.add(event);
+                }
+                break;
+            }
+            case XMLStreamConstants.END_ELEMENT: {
+                EndElement end = event.asEndElement();
+                if (XHTML_A_QNAME.equals(end.getName())) {
+                    if (anchorNestDepth == 1)
+                        delegate.add(event);
+                    anchorNestDepth --;
+                } else {
+                    delegate.add(event);
+                }
+                break;
+            }
+            default:
+                delegate.add(event);
+        }
+    }
+
+}
diff --git a/src/test/java/au/id/djc/rdftemplate/TemplateInterpolatorHtmlCompatibleUnitTest.java b/src/test/java/au/id/djc/rdftemplate/TemplateInterpolatorHtmlCompatibleUnitTest.java
@@ -0,0 +1,47 @@
+package au.id.djc.rdftemplate;
+
+import static org.junit.Assert.*;
+import static org.junit.matchers.JUnitMatchers.*;
+
+import java.io.InputStreamReader;
+
+import javax.xml.stream.XMLOutputFactory;
+
+import com.hp.hpl.jena.rdf.model.Model;
+import com.hp.hpl.jena.rdf.model.ModelFactory;
+import com.hp.hpl.jena.rdf.model.Resource;
+import com.hp.hpl.jena.vocabulary.DCTerms;
+import org.junit.Before;
+import org.junit.Test;
+
+import au.id.djc.rdftemplate.selector.AntlrSelectorFactory;
+
+public class TemplateInterpolatorHtmlCompatibleUnitTest {
+    
+    private XMLOutputFactory outputFactory;
+    private Model model;
+    private TemplateInterpolator templateInterpolator;
+    
+    @Before
+    public void setUp() {
+        outputFactory = XMLOutputFactory.newInstance();
+        outputFactory.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES, true);
+        model = ModelFactory.createDefaultModel();
+        AntlrSelectorFactory selectorFactory = new AntlrSelectorFactory();
+        selectorFactory.setNamespacePrefixMap(TestNamespacePrefixMap.getInstance());
+        templateInterpolator = new TemplateInterpolator(selectorFactory, /* htmlCompatible */ true);
+    }
+    
+    @Test
+    public void should_strip_nested_anchors() {
+        Resource book = model.createResource("http://miskinhill.com.au/cited/books/lermontov-1899");
+        String title = "<span xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">" +
+                "this has a <a href=\"http://elsewhere.invalid\">nested anchor</a></span>";
+        model.add(model.createLiteralStatement(book, DCTerms.title, model.createLiteral(title, true)));
+        InputStreamReader templateReader = new InputStreamReader(this.getClass().getResourceAsStream("nested-anchors.xml"));
+        String result = templateInterpolator.interpolate(templateReader, book);
+        assertThat(result, containsString(
+                "<a href=\"http://nowhere.invalid/\"><span lang=\"en\">this has a nested anchor</span></a>"));
+    }
+
+}
diff --git a/src/test/resources/au/id/djc/rdftemplate/nested-anchors.xml b/src/test/resources/au/id/djc/rdftemplate/nested-anchors.xml
@@ -0,0 +1,8 @@
+<html xmlns="http://www.w3.org/1999/xhtml"
+      xmlns:rdf="http://code.miskinhill.com.au/rdftemplate/">
+<body>
+
+<a href="http://nowhere.invalid/" rdf:content="dc:title" />
+
+</body>
+</html>
+\ No newline at end of file