src/RDF_Parser.vala (14207B) - raw
1 /*
2 * xmpedit
3 * Copyright 2010 Dan Callaghan <djc@djc.id.au>
4 * Released under GPLv2
5 */
6
7 namespace RDF {
8
9 errordomain ParseError {
10 UNPARSEABLE_XML,
11 EMPTY_XML,
12 DOCUMENT_ELEMENT_NOT_FOUND,
13 ILLEGAL_RDFXML
14 }
15
16 private string resolve_uri(string uri, string base_uri) {
17 return new Soup.URI.with_base(new Soup.URI(base_uri), uri).to_string(false);
18 }
19
20 private struct Parser {
21
22 private Graph graph;
23 private string base_uri;
24
25 public Parser(Graph graph, string base_uri) {
26 this.graph = graph;
27 this.base_uri = base_uri;
28 }
29
30 public void parse(string xml) throws ParseError {
31 Xml.Doc* doc = Xml.Parser.parse_memory(xml, (int) xml.length);
32 if (doc == null)
33 throw new ParseError.UNPARSEABLE_XML("doc == null");
34 try {
35 Xml.Node* root = doc->get_root_element();
36 if (root == null)
37 throw new ParseError.EMPTY_XML("root == null");
38 var document_element = find_rdf_document_element(root);
39 if (document_element == null)
40 throw new ParseError.DOCUMENT_ELEMENT_NOT_FOUND("no <rdf:RDF> element");
41 for (Xml.Node* child = document_element->children; child != null; child = child->next) {
42 if (child->type != Xml.ElementType.ELEMENT_NODE)
43 continue;
44 parse_node_element(child);
45 }
46 } finally {
47 delete doc;
48 }
49 }
50
51 // XXX use explicit stack instead of recursion
52 private Xml.Node* find_rdf_document_element(Xml.Node* element) {
53 if (element->name == "RDF" || element->ns->href == RDF_NS)
54 return element;
55 for (Xml.Node* child = element->children; child != null; child = child->next) {
56 if (child->type != Xml.ElementType.ELEMENT_NODE)
57 continue;
58 var found = find_rdf_document_element(child);
59 if (found != null)
60 return found;
61 }
62 return null;
63 }
64
65 // XXX intern URIs and lang tags
66
67 private SubjectNode parse_node_element(Xml.Node* element) throws ParseError {
68 // determine resource URI
69 SubjectNode subject;
70 var subject_uri = element->get_ns_prop("about", RDF_NS);
71 if (subject_uri != null)
72 subject = new URIRef(resolve_uri(subject_uri, base_uri));
73 else
74 subject = new Blank(null);
75
76 // is it a typed element?
77 if (!(element->name == "Description" && element->ns->href == RDF_NS)) {
78 graph.insert(new Statement(subject,
79 new URIRef(RDF_NS + "type"),
80 new URIRef(element->ns->href + element->name)));
81 }
82
83 // handle attributes
84 // skip rdf:about, xml:lang, rdf:parseType
85 for (Xml.Attr* attr = element->properties; attr != null; attr = attr->next) {
86 if (attr->atype != 0 ||
87 (attr->name == "about" && attr->ns->href == RDF_NS) ||
88 (attr->name == "lang" && attr->ns->href == XML_NS) ||
89 (attr->name == "parseType" && attr->ns->href == RDF_NS))
90 continue;
91 parse_property_attribute(subject, attr);
92 }
93
94 // handle child elements
95 for (Xml.Node* child = element->children; child != null; child = child->next) {
96 if (child->type != Xml.ElementType.ELEMENT_NODE)
97 continue;
98 parse_property_element(subject, child);
99 }
100
101 return subject;
102 }
103
104 private void parse_property_attribute(SubjectNode subject, Xml.Attr* attr) {
105 var predicate = new URIRef(attr->ns->href + attr->name);
106 Node object;
107 if (attr->name == "type" && attr->ns->href == RDF_NS) {
108 object = new URIRef(attr->children->content);
109 } else {
110 var lang = attr->parent->get_lang();
111 if (lang != null)
112 object = new PlainLiteral.with_lang(attr->children->content, lang);
113 else
114 object = new PlainLiteral(attr->children->content);
115 }
116 graph.insert(new Statement(subject, predicate, object));
117 }
118
119 private void parse_property_element(SubjectNode subject, Xml.Node* element) throws ParseError {
120 var predicate = new URIRef(element->ns->href + element->name);
121
122 // is the object a URI ref? (rdf:resource)
123 var object_uri = element->get_ns_prop("resource", RDF_NS);
124 if (object_uri != null) {
125 var object = new URIRef(object_uri);
126 graph.insert(new Statement(subject, predicate, object));
127 return;
128 }
129
130 // is it a literal? (no children)
131 if (element->child_element_count() == 0) {
132 PlainLiteral object;
133 var lang = element->get_lang();
134 if (lang != null)
135 object = new PlainLiteral.with_lang(element->get_content(), lang);
136 else
137 object = new PlainLiteral(element->get_content());
138 graph.insert(new Statement(subject, predicate, object));
139 return;
140 }
141
142 // need to recurse
143 for (Xml.Node* child = element->children; child != null; child = child->next) {
144 if (child->type != Xml.ElementType.ELEMENT_NODE)
145 continue;
146 var object = parse_node_element(child);
147 graph.insert(new Statement(subject, predicate, object));
148 break; // ignore any other child elements, not legal anyway
149 }
150 }
151
152 }
153
154 #if TEST
155
156 public void test_property_attributes() {
157 var g = new Graph.from_xml("""
158 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
159 <rdf:Description rdf:about=""
160 xml:lang="en"
161 xmlns:Iptc4xmpCore="http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/"
162 Iptc4xmpCore:Location="UQ St Lucia">
163 </rdf:Description>
164 </rdf:RDF>
165 """, "http://example.com/");
166 assert(g.get_statements().size == 1);
167 assert(g.get_statements().contains(new Statement(
168 new URIRef("http://example.com/"),
169 new URIRef("http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/Location"),
170 new PlainLiteral.with_lang("UQ St Lucia", "en"))));
171 }
172
173 public void test_property_attributes_rdf_type() {
174 var g = new Graph.from_xml("""
175 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
176 <rdf:Description rdf:about=""
177 rdf:type="http://example.com/Class">
178 </rdf:Description>
179 </rdf:RDF>
180 """, "http://example.com/");
181 assert(g.get_statements().size == 1);
182 assert(g.get_statements().contains(new Statement(
183 new URIRef("http://example.com/"),
184 new URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
185 new URIRef("http://example.com/Class"))));
186 }
187
188 public void test_property_elements() {
189 var g = new Graph.from_xml("""
190 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
191 <rdf:Description rdf:about="" xmlns:dc="http://purl.org/dc/elements/1.1/">
192 <dc:description xml:lang="en">Some stuff.</dc:description>
193 </rdf:Description>
194 </rdf:RDF>
195 """, "http://example.com/");
196 assert(g.get_statements().size == 1);
197 assert(g.get_statements().contains(new Statement(
198 new URIRef("http://example.com/"),
199 new URIRef("http://purl.org/dc/elements/1.1/description"),
200 new PlainLiteral.with_lang("Some stuff.", "en"))));
201 }
202
203 public void test_property_elements_inherit_lang() {
204 var g = new Graph.from_xml("""
205 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
206 <rdf:Description rdf:about=""
207 xmlns:dc="http://purl.org/dc/elements/1.1/"
208 xml:lang="en">
209 <dc:description>Some stuff.</dc:description>
210 </rdf:Description>
211 </rdf:RDF>
212 """, "http://example.com/");
213 assert(g.get_statements().size == 1);
214 assert(g.get_statements().contains(new Statement(
215 new URIRef("http://example.com/"),
216 new URIRef("http://purl.org/dc/elements/1.1/description"),
217 new PlainLiteral.with_lang("Some stuff.", "en"))));
218 }
219
220 public void test_unicode() {
221 var g = new Graph.from_xml("""
222 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
223 <rdf:Description rdf:about=""
224 xmlns:dc="http://purl.org/dc/elements/1.1/"
225 xml:lang="ru"
226 dc:title="ночь">
227 <dc:description>день</dc:description>
228 </rdf:Description>
229 </rdf:RDF>""", "http://example.com/");
230 assert(g.get_statements().size == 2);
231 assert(g.get_statements().contains(new Statement(
232 new URIRef("http://example.com/"),
233 new URIRef("http://purl.org/dc/elements/1.1/title"),
234 new PlainLiteral.with_lang("\xd0\xbd\xd0\xbe\xd1\x87\xd1\x8c", "ru"))));
235 assert(g.get_statements().contains(new Statement(
236 new URIRef("http://example.com/"),
237 new URIRef("http://purl.org/dc/elements/1.1/description"),
238 new PlainLiteral.with_lang("\xd0\xb4\xd0\xb5\xd0\xbd\xd1\x8c", "ru"))));
239 }
240
241 public void test_find_rdf_root() {
242 var g = new Graph.from_xml("""
243 <ex:other xmlns:ex="http://some.other.crap/">
244 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
245 <rdf:Description rdf:about=""
246 xmlns:dc="http://purl.org/dc/elements/1.1/"
247 xml:lang="en">
248 <dc:description>Some stuff.</dc:description>
249 </rdf:Description>
250 </rdf:RDF>
251 </ex:other>
252 """, "http://example.com/");
253 assert(g.get_statements().size == 1);
254 assert(g.get_statements().contains(new Statement(
255 new URIRef("http://example.com/"),
256 new URIRef("http://purl.org/dc/elements/1.1/description"),
257 new PlainLiteral.with_lang("Some stuff.", "en"))));
258 }
259
260 public void test_nested_property_elements() {
261 var g = new Graph.from_xml("""
262 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
263 <rdf:Description rdf:about=""
264 xmlns:foaf="http://xmlns.com/foaf/0.1/">
265 <foaf:knows>
266 <rdf:Description rdf:about="http://example.com/buddy">
267 <foaf:name>My Buddy</foaf:name>
268 </rdf:Description>
269 </foaf:knows>
270 </rdf:Description>
271 </rdf:RDF>
272 """, "http://example.com/");
273 assert(g.get_statements().size == 2);
274 assert(g.get_statements().contains(new Statement(
275 new URIRef("http://example.com/"),
276 new URIRef("http://xmlns.com/foaf/0.1/knows"),
277 new URIRef("http://example.com/buddy"))));
278 assert(g.get_statements().contains(new Statement(
279 new URIRef("http://example.com/buddy"),
280 new URIRef("http://xmlns.com/foaf/0.1/name"),
281 new PlainLiteral("My Buddy"))));
282 }
283
284 public void test_blank() {
285 var g = new Graph.from_xml("""
286 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
287 <rdf:Description rdf:about=""
288 xmlns:foaf="http://xmlns.com/foaf/0.1/">
289 <foaf:knows>
290 <rdf:Description>
291 <foaf:name>My Buddy</foaf:name>
292 </rdf:Description>
293 </foaf:knows>
294 </rdf:Description>
295 </rdf:RDF>
296 """, "http://example.com/");
297 assert(g.get_statements().size == 2);
298 var statements = g.find_matching_statements(
299 new URIRef("http://example.com/"),
300 new URIRef("http://xmlns.com/foaf/0.1/knows"),
301 null);
302 assert(statements.size == 1);
303 Blank blank;
304 {
305 Gee.Iterator<Statement> it = statements.iterator();
306 it.next();
307 blank = (Blank) it.get().object;
308 }
309 assert(g.find_matching_statements(
310 blank,
311 new URIRef("http://xmlns.com/foaf/0.1/name"),
312 new PlainLiteral("My Buddy")).size == 1);
313 }
314
315 public void test_typed_node_element() {
316 var g = new Graph.from_xml("""
317 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
318 <foaf:Person rdf:about=""
319 xmlns:foaf="http://xmlns.com/foaf/0.1/">
320 <foaf:name>Person</foaf:name>
321 </foaf:Person>
322 </rdf:RDF>
323 """, "http://example.com/");
324 assert(g.get_statements().size == 2);
325 assert(g.find_matching_statements(
326 new URIRef("http://example.com/"),
327 new URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
328 new URIRef("http://xmlns.com/foaf/0.1/Person")).size == 1);
329 assert(g.find_matching_statements(
330 new URIRef("http://example.com/"),
331 new URIRef("http://xmlns.com/foaf/0.1/name"),
332 new PlainLiteral("Person")).size == 1);
333 }
334
335 public void register_parser_tests() {
336 Test.add_func("/rdf/parser/test_property_attributes", test_property_attributes);
337 Test.add_func("/rdf/parser/test_property_attributes_rdf_type", test_property_attributes_rdf_type);
338 Test.add_func("/rdf/parser/test_property_elements", test_property_elements);
339 Test.add_func("/rdf/parser/test_property_elements_inherit_lang", test_property_elements_inherit_lang);
340 Test.add_func("/rdf/parser/test_unicode", test_unicode);
341 Test.add_func("/rdf/parser/test_find_rdf_root", test_find_rdf_root);
342 Test.add_func("/rdf/parser/test_nested_property_elements", test_nested_property_elements);
343 Test.add_func("/rdf/parser/test_blank", test_blank);
344 Test.add_func("/rdf/parser/test_typed_node_element", test_typed_node_element);
345 }
346
347 #endif
348
349 }