commit 24898296acd1a8a7a4142c755adaf33dda6bd374
parent 02acfcae740b6a35def527eb9d55d443c89f510e
Author: Dan Callaghan <djc@djc.id.au>
Date: Sun, 27 Mar 2011 11:42:04 +1000
HTML compatibility mode for TemplateInterpolator
For now, this just strips out nested hyperlinks.
Diffstat:
4 files changed, 137 insertions(+), 2 deletions(-)
diff --git a/src/main/java/au/id/djc/rdftemplate/TemplateInterpolator.java b/src/main/java/au/id/djc/rdftemplate/TemplateInterpolator.java
@@ -1,7 +1,7 @@
package au.id.djc.rdftemplate;
-import java.io.InputStream;
import java.io.IOException;
+import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
@@ -36,6 +36,7 @@ import javax.xml.stream.util.XMLEventConsumer;
import com.hp.hpl.jena.rdf.model.Literal;
import com.hp.hpl.jena.rdf.model.RDFNode;
+import au.id.djc.rdftemplate.html.XHTMLEventConsumer;
import au.id.djc.rdftemplate.selector.InvalidSelectorSyntaxException;
import au.id.djc.rdftemplate.selector.Selector;
import au.id.djc.rdftemplate.selector.SelectorFactory;
@@ -47,13 +48,22 @@ public class TemplateInterpolator {
private final XMLInputFactory inputFactory = XMLInputFactory.newInstance();
private final XMLOutputFactory outputFactory = XMLOutputFactory.newInstance();
private final XMLEventFactory eventFactory = XMLEventFactory.newInstance();
-
private final SelectorFactory selectorFactory;
+ private final boolean htmlCompatible;
public TemplateInterpolator(SelectorFactory selectorFactory) {
+ this(selectorFactory, false);
+ }
+
+ public TemplateInterpolator(SelectorFactory selectorFactory, boolean htmlCompatible) {
this.selectorFactory = selectorFactory;
inputFactory.setProperty(XMLInputFactory.IS_COALESCING, true);
outputFactory.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES, true);
+ this.htmlCompatible = htmlCompatible;
+ }
+
+ public boolean isHtmlCompatible() {
+ return htmlCompatible;
}
public String interpolate(Reader reader, RDFNode node) {
@@ -115,6 +125,8 @@ public class TemplateInterpolator {
public void interpolate(Iterator<XMLEvent> reader, RDFNode node, XMLEventConsumer writer)
throws XMLStreamException {
+ if (htmlCompatible)
+ writer = new XHTMLEventConsumer(writer);
while (reader.hasNext()) {
XMLEvent event = reader.next();
switch (event.getEventType()) {
diff --git a/src/main/java/au/id/djc/rdftemplate/html/XHTMLEventConsumer.java b/src/main/java/au/id/djc/rdftemplate/html/XHTMLEventConsumer.java
@@ -0,0 +1,67 @@
+package au.id.djc.rdftemplate.html;
+
+import javax.xml.namespace.QName;
+import javax.xml.stream.XMLStreamConstants;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.events.EndElement;
+import javax.xml.stream.events.StartElement;
+import javax.xml.stream.events.XMLEvent;
+import javax.xml.stream.util.XMLEventConsumer;
+
+/**
+ * {@link XMLEventConsumer} implementation which indirects to another event
+ * consumer, but munges the stream on the way through to adhere to the vagaries
+ * of (X)HTML.
+ * <p>
+ * Only elements in the XHTML namespace are modified, elements from other
+ * namespaces are passed through untouched.
+ * <p>
+ * The HTML-specific things currently handled by this class are:
+ * <ul>
+ * <li>Nested anchors are removed (they are
+ * <a href="http://www.w3.org/TR/html401/struct/links.html#h-12.2.2">illegal</a>)</li>
+ * </ul>
+ */
+// XXX do self-closing tags here too
+public class XHTMLEventConsumer implements XMLEventConsumer {
+
+ private static final QName XHTML_A_QNAME = new QName("http://www.w3.org/1999/xhtml", "a");
+
+ private final XMLEventConsumer delegate;
+ private int anchorNestDepth = 0;
+
+ public XHTMLEventConsumer(XMLEventConsumer delegate) {
+ this.delegate = delegate;
+ }
+
+ @Override
+ public void add(XMLEvent event) throws XMLStreamException {
+ switch (event.getEventType()) {
+ case XMLStreamConstants.START_ELEMENT: {
+ StartElement start = event.asStartElement();
+ if (XHTML_A_QNAME.equals(start.getName())) {
+ anchorNestDepth ++;
+ if (anchorNestDepth == 1)
+ delegate.add(event);
+ } else {
+ delegate.add(event);
+ }
+ break;
+ }
+ case XMLStreamConstants.END_ELEMENT: {
+ EndElement end = event.asEndElement();
+ if (XHTML_A_QNAME.equals(end.getName())) {
+ if (anchorNestDepth == 1)
+ delegate.add(event);
+ anchorNestDepth --;
+ } else {
+ delegate.add(event);
+ }
+ break;
+ }
+ default:
+ delegate.add(event);
+ }
+ }
+
+}
diff --git a/src/test/java/au/id/djc/rdftemplate/TemplateInterpolatorHtmlCompatibleUnitTest.java b/src/test/java/au/id/djc/rdftemplate/TemplateInterpolatorHtmlCompatibleUnitTest.java
@@ -0,0 +1,47 @@
+package au.id.djc.rdftemplate;
+
+import static org.junit.Assert.*;
+import static org.junit.matchers.JUnitMatchers.*;
+
+import java.io.InputStreamReader;
+
+import javax.xml.stream.XMLOutputFactory;
+
+import com.hp.hpl.jena.rdf.model.Model;
+import com.hp.hpl.jena.rdf.model.ModelFactory;
+import com.hp.hpl.jena.rdf.model.Resource;
+import com.hp.hpl.jena.vocabulary.DCTerms;
+import org.junit.Before;
+import org.junit.Test;
+
+import au.id.djc.rdftemplate.selector.AntlrSelectorFactory;
+
+public class TemplateInterpolatorHtmlCompatibleUnitTest {
+
+ private XMLOutputFactory outputFactory;
+ private Model model;
+ private TemplateInterpolator templateInterpolator;
+
+ @Before
+ public void setUp() {
+ outputFactory = XMLOutputFactory.newInstance();
+ outputFactory.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES, true);
+ model = ModelFactory.createDefaultModel();
+ AntlrSelectorFactory selectorFactory = new AntlrSelectorFactory();
+ selectorFactory.setNamespacePrefixMap(TestNamespacePrefixMap.getInstance());
+ templateInterpolator = new TemplateInterpolator(selectorFactory, /* htmlCompatible */ true);
+ }
+
+ @Test
+ public void should_strip_nested_anchors() {
+ Resource book = model.createResource("http://miskinhill.com.au/cited/books/lermontov-1899");
+ String title = "<span xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">" +
+ "this has a <a href=\"http://elsewhere.invalid\">nested anchor</a></span>";
+ model.add(model.createLiteralStatement(book, DCTerms.title, model.createLiteral(title, true)));
+ InputStreamReader templateReader = new InputStreamReader(this.getClass().getResourceAsStream("nested-anchors.xml"));
+ String result = templateInterpolator.interpolate(templateReader, book);
+ assertThat(result, containsString(
+ "<a href=\"http://nowhere.invalid/\"><span lang=\"en\">this has a nested anchor</span></a>"));
+ }
+
+}
diff --git a/src/test/resources/au/id/djc/rdftemplate/nested-anchors.xml b/src/test/resources/au/id/djc/rdftemplate/nested-anchors.xml
@@ -0,0 +1,8 @@
+<html xmlns="http://www.w3.org/1999/xhtml"
+ xmlns:rdf="http://code.miskinhill.com.au/rdftemplate/">
+<body>
+
+<a href="http://nowhere.invalid/" rdf:content="dc:title" />
+
+</body>
+</html>
+\ No newline at end of file