xmpedit

GTK+ editor for XMP metadata embedded in images
git clone https://code.djc.id.au/git/xmpedit/

src/RDF_Parser.vala (14207B) - raw

      1 /*
      2  * xmpedit
      3  * Copyright 2010 Dan Callaghan <djc@djc.id.au>
      4  * Released under GPLv2
      5  */
      6 
      7 namespace RDF {
      8 
      9 errordomain ParseError {
     10     UNPARSEABLE_XML,
     11     EMPTY_XML,
     12     DOCUMENT_ELEMENT_NOT_FOUND,
     13     ILLEGAL_RDFXML
     14 }
     15 
     16 private string resolve_uri(string uri, string base_uri) {
     17     return new Soup.URI.with_base(new Soup.URI(base_uri), uri).to_string(false);
     18 }
     19 
     20 private struct Parser {
     21 
     22     private Graph graph;
     23     private string base_uri;
     24     
     25     public Parser(Graph graph, string base_uri) {
     26         this.graph = graph;
     27         this.base_uri = base_uri;
     28     }
     29 
     30     public void parse(string xml) throws ParseError {
     31         Xml.Doc* doc = Xml.Parser.parse_memory(xml, (int) xml.length);
     32         if (doc == null)
     33             throw new ParseError.UNPARSEABLE_XML("doc == null");
     34         try {
     35             Xml.Node* root = doc->get_root_element();
     36             if (root == null)
     37                 throw new ParseError.EMPTY_XML("root == null");
     38             var document_element = find_rdf_document_element(root);
     39             if (document_element == null)
     40                 throw new ParseError.DOCUMENT_ELEMENT_NOT_FOUND("no <rdf:RDF> element");
     41             for (Xml.Node* child = document_element->children; child != null; child = child->next) {
     42                 if (child->type != Xml.ElementType.ELEMENT_NODE)
     43                     continue;
     44                 parse_node_element(child);
     45             }
     46         } finally {
     47             delete doc;
     48         }
     49     }
     50 
     51     // XXX use explicit stack instead of recursion
     52     private Xml.Node* find_rdf_document_element(Xml.Node* element) {
     53         if (element->name == "RDF" || element->ns->href == RDF_NS)
     54             return element;
     55         for (Xml.Node* child = element->children; child != null; child = child->next) {
     56             if (child->type != Xml.ElementType.ELEMENT_NODE)
     57                 continue;
     58             var found = find_rdf_document_element(child);
     59             if (found != null)
     60                 return found;
     61         }
     62         return null;
     63     }
     64 
     65     // XXX intern URIs and lang tags
     66 
     67     private SubjectNode parse_node_element(Xml.Node* element) throws ParseError {
     68         // determine resource URI
     69         SubjectNode subject;
     70         var subject_uri = element->get_ns_prop("about", RDF_NS);
     71         if (subject_uri != null)
     72             subject = new URIRef(resolve_uri(subject_uri, base_uri));
     73         else
     74             subject = new Blank(null);
     75         
     76         // is it a typed element?
     77         if (!(element->name == "Description" && element->ns->href == RDF_NS)) {
     78             graph.insert(new Statement(subject,
     79                     new URIRef(RDF_NS + "type"),
     80                     new URIRef(element->ns->href + element->name)));
     81         }
     82 
     83         // handle attributes
     84         // skip rdf:about, xml:lang, rdf:parseType
     85         for (Xml.Attr* attr = element->properties; attr != null; attr = attr->next) {
     86             if (attr->atype != 0 ||
     87                     (attr->name == "about" && attr->ns->href == RDF_NS) ||
     88                     (attr->name == "lang" && attr->ns->href == XML_NS) ||
     89                     (attr->name == "parseType" && attr->ns->href == RDF_NS))
     90                 continue;
     91             parse_property_attribute(subject, attr);
     92         }
     93         
     94         // handle child elements
     95         for (Xml.Node* child = element->children; child != null; child = child->next) {
     96             if (child->type != Xml.ElementType.ELEMENT_NODE)
     97                 continue;
     98             parse_property_element(subject, child);
     99         }
    100         
    101         return subject;
    102     }
    103 
    104     private void parse_property_attribute(SubjectNode subject, Xml.Attr* attr) {
    105         var predicate = new URIRef(attr->ns->href + attr->name);
    106         Node object;
    107         if (attr->name == "type" && attr->ns->href == RDF_NS) {
    108             object = new URIRef(attr->children->content);
    109         } else {
    110             var lang = attr->parent->get_lang();
    111             if (lang != null)
    112                 object = new PlainLiteral.with_lang(attr->children->content, lang);
    113             else
    114                 object = new PlainLiteral(attr->children->content);
    115         }
    116         graph.insert(new Statement(subject, predicate, object));
    117     }
    118 
    119     private void parse_property_element(SubjectNode subject, Xml.Node* element) throws ParseError {
    120         var predicate = new URIRef(element->ns->href + element->name);
    121         
    122         // is the object a URI ref? (rdf:resource)
    123         var object_uri = element->get_ns_prop("resource", RDF_NS);
    124         if (object_uri != null) {
    125             var object = new URIRef(object_uri);
    126             graph.insert(new Statement(subject, predicate, object));
    127             return;
    128         }
    129         
    130         // is it a literal? (no children)
    131         if (element->child_element_count() == 0) {
    132             PlainLiteral object;
    133             var lang = element->get_lang();
    134             if (lang != null)
    135                 object = new PlainLiteral.with_lang(element->get_content(), lang);
    136             else
    137                 object = new PlainLiteral(element->get_content());
    138             graph.insert(new Statement(subject, predicate, object));
    139             return;
    140         }
    141         
    142         // need to recurse
    143         for (Xml.Node* child = element->children; child != null; child = child->next) {
    144             if (child->type != Xml.ElementType.ELEMENT_NODE)
    145                 continue;
    146             var object = parse_node_element(child);
    147             graph.insert(new Statement(subject, predicate, object));            
    148             break; // ignore any other child elements, not legal anyway
    149         }
    150     }
    151     
    152 }
    153 
    154 #if TEST
    155 
    156 public void test_property_attributes() {
    157     var g = new Graph.from_xml("""
    158             <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    159                 <rdf:Description rdf:about=""
    160                     xml:lang="en"
    161                     xmlns:Iptc4xmpCore="http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/"
    162                     Iptc4xmpCore:Location="UQ St Lucia">
    163                 </rdf:Description>
    164             </rdf:RDF>
    165             """, "http://example.com/");
    166     assert(g.get_statements().size == 1);
    167     assert(g.get_statements().contains(new Statement(
    168             new URIRef("http://example.com/"),
    169             new URIRef("http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/Location"),
    170             new PlainLiteral.with_lang("UQ St Lucia", "en"))));
    171 }
    172 
    173 public void test_property_attributes_rdf_type() {
    174     var g = new Graph.from_xml("""
    175             <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    176                 <rdf:Description rdf:about=""
    177                     rdf:type="http://example.com/Class">
    178                 </rdf:Description>
    179             </rdf:RDF>
    180             """, "http://example.com/");
    181     assert(g.get_statements().size == 1);
    182     assert(g.get_statements().contains(new Statement(
    183             new URIRef("http://example.com/"),
    184             new URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
    185             new URIRef("http://example.com/Class"))));
    186 }
    187 
    188 public void test_property_elements() {
    189     var g = new Graph.from_xml("""
    190             <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    191                 <rdf:Description rdf:about="" xmlns:dc="http://purl.org/dc/elements/1.1/">
    192                     <dc:description xml:lang="en">Some stuff.</dc:description>
    193                 </rdf:Description>
    194             </rdf:RDF>
    195             """, "http://example.com/");
    196     assert(g.get_statements().size == 1);
    197     assert(g.get_statements().contains(new Statement(
    198             new URIRef("http://example.com/"),
    199             new URIRef("http://purl.org/dc/elements/1.1/description"),
    200             new PlainLiteral.with_lang("Some stuff.", "en"))));
    201 }
    202 
    203 public void test_property_elements_inherit_lang() {
    204     var g = new Graph.from_xml("""
    205             <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    206                 <rdf:Description rdf:about=""
    207                     xmlns:dc="http://purl.org/dc/elements/1.1/"
    208                     xml:lang="en">
    209                     <dc:description>Some stuff.</dc:description>
    210                 </rdf:Description>
    211             </rdf:RDF>
    212             """, "http://example.com/");
    213     assert(g.get_statements().size == 1);
    214     assert(g.get_statements().contains(new Statement(
    215             new URIRef("http://example.com/"),
    216             new URIRef("http://purl.org/dc/elements/1.1/description"),
    217             new PlainLiteral.with_lang("Some stuff.", "en"))));
    218 }
    219 
    220 public void test_unicode() {
    221     var g = new Graph.from_xml("""
    222             <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    223                 <rdf:Description rdf:about=""
    224                     xmlns:dc="http://purl.org/dc/elements/1.1/"
    225                     xml:lang="ru"
    226                     dc:title="ночь">
    227                     <dc:description>день</dc:description>
    228                 </rdf:Description>
    229             </rdf:RDF>""", "http://example.com/");
    230     assert(g.get_statements().size == 2);
    231     assert(g.get_statements().contains(new Statement(
    232             new URIRef("http://example.com/"),
    233             new URIRef("http://purl.org/dc/elements/1.1/title"),
    234             new PlainLiteral.with_lang("\xd0\xbd\xd0\xbe\xd1\x87\xd1\x8c", "ru"))));
    235     assert(g.get_statements().contains(new Statement(
    236             new URIRef("http://example.com/"),
    237             new URIRef("http://purl.org/dc/elements/1.1/description"),
    238             new PlainLiteral.with_lang("\xd0\xb4\xd0\xb5\xd0\xbd\xd1\x8c", "ru"))));
    239 }
    240 
    241 public void test_find_rdf_root() {
    242     var g = new Graph.from_xml("""
    243             <ex:other xmlns:ex="http://some.other.crap/">
    244                 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    245                     <rdf:Description rdf:about=""
    246                         xmlns:dc="http://purl.org/dc/elements/1.1/"
    247                         xml:lang="en">
    248                         <dc:description>Some stuff.</dc:description>
    249                     </rdf:Description>
    250                 </rdf:RDF>
    251             </ex:other>
    252             """, "http://example.com/");
    253     assert(g.get_statements().size == 1);
    254     assert(g.get_statements().contains(new Statement(
    255             new URIRef("http://example.com/"),
    256             new URIRef("http://purl.org/dc/elements/1.1/description"),
    257             new PlainLiteral.with_lang("Some stuff.", "en"))));
    258 }
    259 
    260 public void test_nested_property_elements() {
    261     var g = new Graph.from_xml("""
    262             <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    263                 <rdf:Description rdf:about=""
    264                     xmlns:foaf="http://xmlns.com/foaf/0.1/">
    265                     <foaf:knows>
    266                         <rdf:Description rdf:about="http://example.com/buddy">
    267                             <foaf:name>My Buddy</foaf:name>
    268                         </rdf:Description>
    269                     </foaf:knows>
    270                 </rdf:Description>
    271             </rdf:RDF>
    272             """, "http://example.com/");
    273     assert(g.get_statements().size == 2);
    274     assert(g.get_statements().contains(new Statement(
    275             new URIRef("http://example.com/"),
    276             new URIRef("http://xmlns.com/foaf/0.1/knows"),
    277             new URIRef("http://example.com/buddy"))));
    278     assert(g.get_statements().contains(new Statement(
    279             new URIRef("http://example.com/buddy"),
    280             new URIRef("http://xmlns.com/foaf/0.1/name"),
    281             new PlainLiteral("My Buddy"))));
    282 }
    283 
    284 public void test_blank() {
    285     var g = new Graph.from_xml("""
    286             <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    287                 <rdf:Description rdf:about=""
    288                     xmlns:foaf="http://xmlns.com/foaf/0.1/">
    289                     <foaf:knows>
    290                         <rdf:Description>
    291                             <foaf:name>My Buddy</foaf:name>
    292                         </rdf:Description>
    293                     </foaf:knows>
    294                 </rdf:Description>
    295             </rdf:RDF>
    296             """, "http://example.com/");
    297     assert(g.get_statements().size == 2);
    298     var statements = g.find_matching_statements(
    299             new URIRef("http://example.com/"),
    300             new URIRef("http://xmlns.com/foaf/0.1/knows"),
    301             null);
    302     assert(statements.size == 1);
    303     Blank blank;
    304     {
    305         Gee.Iterator<Statement> it = statements.iterator();
    306         it.next();
    307         blank = (Blank) it.get().object;
    308     }
    309     assert(g.find_matching_statements(
    310             blank,
    311             new URIRef("http://xmlns.com/foaf/0.1/name"),
    312             new PlainLiteral("My Buddy")).size == 1);
    313 }
    314 
    315 public void test_typed_node_element() {
    316     var g = new Graph.from_xml("""
    317             <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    318                 <foaf:Person rdf:about=""
    319                     xmlns:foaf="http://xmlns.com/foaf/0.1/">
    320                     <foaf:name>Person</foaf:name>
    321                 </foaf:Person>
    322             </rdf:RDF>
    323             """, "http://example.com/");
    324     assert(g.get_statements().size == 2);
    325     assert(g.find_matching_statements(
    326             new URIRef("http://example.com/"),
    327             new URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
    328             new URIRef("http://xmlns.com/foaf/0.1/Person")).size == 1);
    329     assert(g.find_matching_statements(
    330             new URIRef("http://example.com/"),
    331             new URIRef("http://xmlns.com/foaf/0.1/name"),
    332             new PlainLiteral("Person")).size == 1);
    333 }
    334 
    335 public void register_parser_tests() {
    336     Test.add_func("/rdf/parser/test_property_attributes", test_property_attributes);
    337     Test.add_func("/rdf/parser/test_property_attributes_rdf_type", test_property_attributes_rdf_type);
    338     Test.add_func("/rdf/parser/test_property_elements", test_property_elements);
    339     Test.add_func("/rdf/parser/test_property_elements_inherit_lang", test_property_elements_inherit_lang);
    340     Test.add_func("/rdf/parser/test_unicode", test_unicode);
    341     Test.add_func("/rdf/parser/test_find_rdf_root", test_find_rdf_root);
    342     Test.add_func("/rdf/parser/test_nested_property_elements", test_nested_property_elements);
    343     Test.add_func("/rdf/parser/test_blank", test_blank);
    344     Test.add_func("/rdf/parser/test_typed_node_element", test_typed_node_element);
    345 }
    346 
    347 #endif
    348 
    349 }