src/RDF_Parser.vala (14207B) - raw
1 /* 2 * xmpedit 3 * Copyright 2010 Dan Callaghan <djc@djc.id.au> 4 * Released under GPLv2 5 */ 6 7 namespace RDF { 8 9 errordomain ParseError { 10 UNPARSEABLE_XML, 11 EMPTY_XML, 12 DOCUMENT_ELEMENT_NOT_FOUND, 13 ILLEGAL_RDFXML 14 } 15 16 private string resolve_uri(string uri, string base_uri) { 17 return new Soup.URI.with_base(new Soup.URI(base_uri), uri).to_string(false); 18 } 19 20 private struct Parser { 21 22 private Graph graph; 23 private string base_uri; 24 25 public Parser(Graph graph, string base_uri) { 26 this.graph = graph; 27 this.base_uri = base_uri; 28 } 29 30 public void parse(string xml) throws ParseError { 31 Xml.Doc* doc = Xml.Parser.parse_memory(xml, (int) xml.length); 32 if (doc == null) 33 throw new ParseError.UNPARSEABLE_XML("doc == null"); 34 try { 35 Xml.Node* root = doc->get_root_element(); 36 if (root == null) 37 throw new ParseError.EMPTY_XML("root == null"); 38 var document_element = find_rdf_document_element(root); 39 if (document_element == null) 40 throw new ParseError.DOCUMENT_ELEMENT_NOT_FOUND("no <rdf:RDF> element"); 41 for (Xml.Node* child = document_element->children; child != null; child = child->next) { 42 if (child->type != Xml.ElementType.ELEMENT_NODE) 43 continue; 44 parse_node_element(child); 45 } 46 } finally { 47 delete doc; 48 } 49 } 50 51 // XXX use explicit stack instead of recursion 52 private Xml.Node* find_rdf_document_element(Xml.Node* element) { 53 if (element->name == "RDF" || element->ns->href == RDF_NS) 54 return element; 55 for (Xml.Node* child = element->children; child != null; child = child->next) { 56 if (child->type != Xml.ElementType.ELEMENT_NODE) 57 continue; 58 var found = find_rdf_document_element(child); 59 if (found != null) 60 return found; 61 } 62 return null; 63 } 64 65 // XXX intern URIs and lang tags 66 67 private SubjectNode parse_node_element(Xml.Node* element) throws ParseError { 68 // determine resource URI 69 SubjectNode subject; 70 var subject_uri = element->get_ns_prop("about", RDF_NS); 71 if (subject_uri != null) 72 subject = new URIRef(resolve_uri(subject_uri, base_uri)); 73 else 74 subject = new Blank(null); 75 76 // is it a typed element? 77 if (!(element->name == "Description" && element->ns->href == RDF_NS)) { 78 graph.insert(new Statement(subject, 79 new URIRef(RDF_NS + "type"), 80 new URIRef(element->ns->href + element->name))); 81 } 82 83 // handle attributes 84 // skip rdf:about, xml:lang, rdf:parseType 85 for (Xml.Attr* attr = element->properties; attr != null; attr = attr->next) { 86 if (attr->atype != 0 || 87 (attr->name == "about" && attr->ns->href == RDF_NS) || 88 (attr->name == "lang" && attr->ns->href == XML_NS) || 89 (attr->name == "parseType" && attr->ns->href == RDF_NS)) 90 continue; 91 parse_property_attribute(subject, attr); 92 } 93 94 // handle child elements 95 for (Xml.Node* child = element->children; child != null; child = child->next) { 96 if (child->type != Xml.ElementType.ELEMENT_NODE) 97 continue; 98 parse_property_element(subject, child); 99 } 100 101 return subject; 102 } 103 104 private void parse_property_attribute(SubjectNode subject, Xml.Attr* attr) { 105 var predicate = new URIRef(attr->ns->href + attr->name); 106 Node object; 107 if (attr->name == "type" && attr->ns->href == RDF_NS) { 108 object = new URIRef(attr->children->content); 109 } else { 110 var lang = attr->parent->get_lang(); 111 if (lang != null) 112 object = new PlainLiteral.with_lang(attr->children->content, lang); 113 else 114 object = new PlainLiteral(attr->children->content); 115 } 116 graph.insert(new Statement(subject, predicate, object)); 117 } 118 119 private void parse_property_element(SubjectNode subject, Xml.Node* element) throws ParseError { 120 var predicate = new URIRef(element->ns->href + element->name); 121 122 // is the object a URI ref? (rdf:resource) 123 var object_uri = element->get_ns_prop("resource", RDF_NS); 124 if (object_uri != null) { 125 var object = new URIRef(object_uri); 126 graph.insert(new Statement(subject, predicate, object)); 127 return; 128 } 129 130 // is it a literal? (no children) 131 if (element->child_element_count() == 0) { 132 PlainLiteral object; 133 var lang = element->get_lang(); 134 if (lang != null) 135 object = new PlainLiteral.with_lang(element->get_content(), lang); 136 else 137 object = new PlainLiteral(element->get_content()); 138 graph.insert(new Statement(subject, predicate, object)); 139 return; 140 } 141 142 // need to recurse 143 for (Xml.Node* child = element->children; child != null; child = child->next) { 144 if (child->type != Xml.ElementType.ELEMENT_NODE) 145 continue; 146 var object = parse_node_element(child); 147 graph.insert(new Statement(subject, predicate, object)); 148 break; // ignore any other child elements, not legal anyway 149 } 150 } 151 152 } 153 154 #if TEST 155 156 public void test_property_attributes() { 157 var g = new Graph.from_xml(""" 158 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> 159 <rdf:Description rdf:about="" 160 xml:lang="en" 161 xmlns:Iptc4xmpCore="http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/" 162 Iptc4xmpCore:Location="UQ St Lucia"> 163 </rdf:Description> 164 </rdf:RDF> 165 """, "http://example.com/"); 166 assert(g.get_statements().size == 1); 167 assert(g.get_statements().contains(new Statement( 168 new URIRef("http://example.com/"), 169 new URIRef("http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/Location"), 170 new PlainLiteral.with_lang("UQ St Lucia", "en")))); 171 } 172 173 public void test_property_attributes_rdf_type() { 174 var g = new Graph.from_xml(""" 175 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> 176 <rdf:Description rdf:about="" 177 rdf:type="http://example.com/Class"> 178 </rdf:Description> 179 </rdf:RDF> 180 """, "http://example.com/"); 181 assert(g.get_statements().size == 1); 182 assert(g.get_statements().contains(new Statement( 183 new URIRef("http://example.com/"), 184 new URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), 185 new URIRef("http://example.com/Class")))); 186 } 187 188 public void test_property_elements() { 189 var g = new Graph.from_xml(""" 190 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> 191 <rdf:Description rdf:about="" xmlns:dc="http://purl.org/dc/elements/1.1/"> 192 <dc:description xml:lang="en">Some stuff.</dc:description> 193 </rdf:Description> 194 </rdf:RDF> 195 """, "http://example.com/"); 196 assert(g.get_statements().size == 1); 197 assert(g.get_statements().contains(new Statement( 198 new URIRef("http://example.com/"), 199 new URIRef("http://purl.org/dc/elements/1.1/description"), 200 new PlainLiteral.with_lang("Some stuff.", "en")))); 201 } 202 203 public void test_property_elements_inherit_lang() { 204 var g = new Graph.from_xml(""" 205 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> 206 <rdf:Description rdf:about="" 207 xmlns:dc="http://purl.org/dc/elements/1.1/" 208 xml:lang="en"> 209 <dc:description>Some stuff.</dc:description> 210 </rdf:Description> 211 </rdf:RDF> 212 """, "http://example.com/"); 213 assert(g.get_statements().size == 1); 214 assert(g.get_statements().contains(new Statement( 215 new URIRef("http://example.com/"), 216 new URIRef("http://purl.org/dc/elements/1.1/description"), 217 new PlainLiteral.with_lang("Some stuff.", "en")))); 218 } 219 220 public void test_unicode() { 221 var g = new Graph.from_xml(""" 222 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> 223 <rdf:Description rdf:about="" 224 xmlns:dc="http://purl.org/dc/elements/1.1/" 225 xml:lang="ru" 226 dc:title="ночь"> 227 <dc:description>день</dc:description> 228 </rdf:Description> 229 </rdf:RDF>""", "http://example.com/"); 230 assert(g.get_statements().size == 2); 231 assert(g.get_statements().contains(new Statement( 232 new URIRef("http://example.com/"), 233 new URIRef("http://purl.org/dc/elements/1.1/title"), 234 new PlainLiteral.with_lang("\xd0\xbd\xd0\xbe\xd1\x87\xd1\x8c", "ru")))); 235 assert(g.get_statements().contains(new Statement( 236 new URIRef("http://example.com/"), 237 new URIRef("http://purl.org/dc/elements/1.1/description"), 238 new PlainLiteral.with_lang("\xd0\xb4\xd0\xb5\xd0\xbd\xd1\x8c", "ru")))); 239 } 240 241 public void test_find_rdf_root() { 242 var g = new Graph.from_xml(""" 243 <ex:other xmlns:ex="http://some.other.crap/"> 244 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> 245 <rdf:Description rdf:about="" 246 xmlns:dc="http://purl.org/dc/elements/1.1/" 247 xml:lang="en"> 248 <dc:description>Some stuff.</dc:description> 249 </rdf:Description> 250 </rdf:RDF> 251 </ex:other> 252 """, "http://example.com/"); 253 assert(g.get_statements().size == 1); 254 assert(g.get_statements().contains(new Statement( 255 new URIRef("http://example.com/"), 256 new URIRef("http://purl.org/dc/elements/1.1/description"), 257 new PlainLiteral.with_lang("Some stuff.", "en")))); 258 } 259 260 public void test_nested_property_elements() { 261 var g = new Graph.from_xml(""" 262 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> 263 <rdf:Description rdf:about="" 264 xmlns:foaf="http://xmlns.com/foaf/0.1/"> 265 <foaf:knows> 266 <rdf:Description rdf:about="http://example.com/buddy"> 267 <foaf:name>My Buddy</foaf:name> 268 </rdf:Description> 269 </foaf:knows> 270 </rdf:Description> 271 </rdf:RDF> 272 """, "http://example.com/"); 273 assert(g.get_statements().size == 2); 274 assert(g.get_statements().contains(new Statement( 275 new URIRef("http://example.com/"), 276 new URIRef("http://xmlns.com/foaf/0.1/knows"), 277 new URIRef("http://example.com/buddy")))); 278 assert(g.get_statements().contains(new Statement( 279 new URIRef("http://example.com/buddy"), 280 new URIRef("http://xmlns.com/foaf/0.1/name"), 281 new PlainLiteral("My Buddy")))); 282 } 283 284 public void test_blank() { 285 var g = new Graph.from_xml(""" 286 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> 287 <rdf:Description rdf:about="" 288 xmlns:foaf="http://xmlns.com/foaf/0.1/"> 289 <foaf:knows> 290 <rdf:Description> 291 <foaf:name>My Buddy</foaf:name> 292 </rdf:Description> 293 </foaf:knows> 294 </rdf:Description> 295 </rdf:RDF> 296 """, "http://example.com/"); 297 assert(g.get_statements().size == 2); 298 var statements = g.find_matching_statements( 299 new URIRef("http://example.com/"), 300 new URIRef("http://xmlns.com/foaf/0.1/knows"), 301 null); 302 assert(statements.size == 1); 303 Blank blank; 304 { 305 Gee.Iterator<Statement> it = statements.iterator(); 306 it.next(); 307 blank = (Blank) it.get().object; 308 } 309 assert(g.find_matching_statements( 310 blank, 311 new URIRef("http://xmlns.com/foaf/0.1/name"), 312 new PlainLiteral("My Buddy")).size == 1); 313 } 314 315 public void test_typed_node_element() { 316 var g = new Graph.from_xml(""" 317 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> 318 <foaf:Person rdf:about="" 319 xmlns:foaf="http://xmlns.com/foaf/0.1/"> 320 <foaf:name>Person</foaf:name> 321 </foaf:Person> 322 </rdf:RDF> 323 """, "http://example.com/"); 324 assert(g.get_statements().size == 2); 325 assert(g.find_matching_statements( 326 new URIRef("http://example.com/"), 327 new URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), 328 new URIRef("http://xmlns.com/foaf/0.1/Person")).size == 1); 329 assert(g.find_matching_statements( 330 new URIRef("http://example.com/"), 331 new URIRef("http://xmlns.com/foaf/0.1/name"), 332 new PlainLiteral("Person")).size == 1); 333 } 334 335 public void register_parser_tests() { 336 Test.add_func("/rdf/parser/test_property_attributes", test_property_attributes); 337 Test.add_func("/rdf/parser/test_property_attributes_rdf_type", test_property_attributes_rdf_type); 338 Test.add_func("/rdf/parser/test_property_elements", test_property_elements); 339 Test.add_func("/rdf/parser/test_property_elements_inherit_lang", test_property_elements_inherit_lang); 340 Test.add_func("/rdf/parser/test_unicode", test_unicode); 341 Test.add_func("/rdf/parser/test_find_rdf_root", test_find_rdf_root); 342 Test.add_func("/rdf/parser/test_nested_property_elements", test_nested_property_elements); 343 Test.add_func("/rdf/parser/test_blank", test_blank); 344 Test.add_func("/rdf/parser/test_typed_node_element", test_typed_node_element); 345 } 346 347 #endif 348 349 }