comparison planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/pyRdfa/parse.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 # -*- coding: utf-8 -*-
2 """
3 The core parsing function of RDFa. Some details are
4 put into other modules to make it clearer to update/modify (e.g., generation of C{@property} values, or managing the current state).
5
6 Note that the entry point (L{parse_one_node}) bifurcates into an RDFa 1.0 and RDFa 1.1 version, ie,
7 to L{_parse_1_0} and L{_parse_1_1}. Some of the parsing details (management of C{@property}, list facilities, changed behavior on C{@typeof})) have changed
8 between versions and forcing the two into one function would be counter productive.
9
10 @summary: RDFa core parser processing step
11 @organization: U{World Wide Web Consortium<http://www.w3.org>}
12 @author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
13 @license: This software is available for use under the
14 U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
15 """
16
17 """
18 $Id: parse.py,v 1.19 2013-01-07 12:46:43 ivan Exp $
19 $Date: 2013-01-07 12:46:43 $
20 """
21
22 import sys
23
24 from .state import ExecutionContext
25 from .property import ProcessProperty
26 from .embeddedRDF import handle_embeddedRDF
27 from .host import HostLanguage, host_dom_transforms
28
29 import rdflib
30 from rdflib import URIRef
31 from rdflib import Literal
32 from rdflib import BNode
33 from rdflib import Namespace
34 if rdflib.__version__ >= "3.0.0" :
35 from rdflib import Graph
36 from rdflib import RDF as ns_rdf
37 from rdflib import RDFS as ns_rdfs
38 else :
39 from rdflib.Graph import Graph
40 from rdflib.RDFS import RDFSNS as ns_rdfs
41 from rdflib.RDF import RDFNS as ns_rdf
42
43 from . import IncorrectBlankNodeUsage, err_no_blank_node
44 from .utils import has_one_of_attributes
45
46 #######################################################################
47 def parse_one_node(node, graph, parent_object, incoming_state, parent_incomplete_triples) :
48 """The (recursive) step of handling a single node.
49
50 This entry just switches between the RDFa 1.0 and RDFa 1.1 versions for parsing. This method is only invoked once,
51 actually, from the top level; the recursion then happens in the L{_parse_1_0} and L{_parse_1_1} methods for
52 RDFa 1.0 and RDFa 1.1, respectively.
53
54 @param node: the DOM node to handle
55 @param graph: the RDF graph
56 @type graph: RDFLib's Graph object instance
57 @param parent_object: the parent's object, as an RDFLib URIRef
58 @param incoming_state: the inherited state (namespaces, lang, etc.)
59 @type incoming_state: L{state.ExecutionContext}
60 @param parent_incomplete_triples: list of hanging triples (the missing resource set to None) to be handled (or not)
61 by the current node.
62 @return: whether the caller has to complete it's parent's incomplete triples
63 @rtype: Boolean
64 """
65 # Branch according to versions.
66 if incoming_state.rdfa_version >= "1.1" :
67 _parse_1_1(node, graph, parent_object, incoming_state, parent_incomplete_triples)
68 else :
69 _parse_1_0(node, graph, parent_object, incoming_state, parent_incomplete_triples)
70
71 #######################################################################
72 def _parse_1_1(node, graph, parent_object, incoming_state, parent_incomplete_triples) :
73 """The (recursive) step of handling a single node. See the
74 U{RDFa 1.1 Core document<http://www.w3.org/TR/rdfa-core/>} for further details.
75
76 This is the RDFa 1.1 version.
77
78 @param node: the DOM node to handle
79 @param graph: the RDF graph
80 @type graph: RDFLib's Graph object instance
81 @param parent_object: the parent's object, as an RDFLib URIRef
82 @param incoming_state: the inherited state (namespaces, lang, etc.)
83 @type incoming_state: L{state.ExecutionContext}
84 @param parent_incomplete_triples: list of hanging triples (the missing resource set to None) to be handled (or not)
85 by the current node.
86 @return: whether the caller has to complete it's parent's incomplete triples
87 @rtype: Boolean
88 """
89 def header_check(p_obj) :
90 """Special disposition for the HTML <head> and <body> elements..."""
91 if state.options.host_language in [ HostLanguage.xhtml, HostLanguage.html5, HostLanguage.xhtml5 ] :
92 if node.nodeName == "head" or node.nodeName == "body" :
93 if not has_one_of_attributes(node, "about", "resource", "src", "href") :
94 return p_obj
95 else :
96 return None
97
98 def lite_check() :
99 if state.options.check_lite and state.options.host_language in [ HostLanguage.html5, HostLanguage.xhtml5, HostLanguage.xhtml ] :
100 if node.tagName == "link" and node.hasAttribute("rel") and state.term_or_curie.CURIE_to_URI(node.getAttribute("rel")) != None :
101 state.options.add_warning("In RDFa Lite, attribute @rel in <link> is only used in non-RDFa way (consider using @property)", node=node)
102
103 # Update the state. This means, for example, the possible local settings of
104 # namespaces and lang
105 state = None
106 state = ExecutionContext(node, graph, inherited_state=incoming_state)
107
108 #---------------------------------------------------------------------------------
109 # Extra warning check on RDFa Lite
110 lite_check()
111
112 #---------------------------------------------------------------------------------
113 # Handling the role attribute is pretty much orthogonal to everything else...
114 handle_role_attribute(node, graph, state)
115
116 #---------------------------------------------------------------------------------
117 # Handle the special case for embedded RDF, eg, in SVG1.2.
118 # This may add some triples to the target graph that does not originate from RDFa parsing
119 # If the function return TRUE, that means that an rdf:RDF has been found. No
120 # RDFa parsing should be done on that subtree, so we simply return...
121 if state.options.embedded_rdf and node.nodeType == node.ELEMENT_NODE and handle_embeddedRDF(node, graph, state) :
122 return
123
124 #---------------------------------------------------------------------------------
125 # calling the host language specific massaging of the DOM
126 if state.options.host_language in host_dom_transforms and node.nodeType == node.ELEMENT_NODE :
127 for func in host_dom_transforms[state.options.host_language] : func(node, state)
128
129 #---------------------------------------------------------------------------------
130 # First, let us check whether there is anything to do at all. Ie,
131 # whether there is any relevant RDFa specific attribute on the element
132 #
133 if not has_one_of_attributes(node, "href", "resource", "about", "property", "rel", "rev", "typeof", "src", "vocab", "prefix") :
134 # nop, there is nothing to do here, just go down the tree and return...
135 for n in node.childNodes :
136 if n.nodeType == node.ELEMENT_NODE : parse_one_node(n, graph, parent_object, state, parent_incomplete_triples)
137 return
138
139 #-----------------------------------------------------------------
140 # The goal is to establish the subject and object for local processing
141 # The behaviour is slightly different depending on the presense or not
142 # of the @rel/@rev attributes
143 current_subject = None
144 current_object = None
145 typed_resource = None
146
147 if has_one_of_attributes(node, "rel", "rev") :
148 # in this case there is the notion of 'left' and 'right' of @rel/@rev
149 # in establishing the new Subject and the objectResource
150 current_subject = header_check(parent_object)
151
152 # set first the subject
153 if node.hasAttribute("about") :
154 current_subject = state.getURI("about")
155 if node.hasAttribute("typeof") : typed_resource = current_subject
156
157 # get_URI may return None in case of an illegal CURIE, so
158 # we have to be careful here, not use only an 'else'
159 if current_subject == None :
160 current_subject = parent_object
161 else :
162 state.reset_list_mapping(origin = current_subject)
163
164 # set the object resource
165 current_object = state.getResource("resource", "href", "src")
166
167 if node.hasAttribute("typeof") and not node.hasAttribute("about") :
168 if current_object == None :
169 current_object = BNode()
170 typed_resource = current_object
171
172 if not node.hasAttribute("inlist") and current_object != None :
173 # In this case the newly defined object is, in fact, the head of the list
174 # just reset the whole thing.
175 state.reset_list_mapping(origin = current_object)
176
177 elif node.hasAttribute("property") and not has_one_of_attributes(node, "content", "datatype") :
178 current_subject = header_check(parent_object)
179
180 # this is the case when the property may take hold of @src and friends...
181 if node.hasAttribute("about") :
182 current_subject = state.getURI("about")
183 if node.hasAttribute("typeof") : typed_resource = current_subject
184
185 # getURI may return None in case of an illegal CURIE, so
186 # we have to be careful here, not use only an 'else'
187 if current_subject == None :
188 current_subject = parent_object
189 else :
190 state.reset_list_mapping(origin = current_subject)
191
192 if typed_resource == None and node.hasAttribute("typeof") :
193 typed_resource = state.getResource("resource", "href", "src")
194 if typed_resource == None :
195 typed_resource = BNode()
196 current_object = typed_resource
197 else :
198 current_object = current_subject
199
200 else :
201 current_subject = header_check(parent_object)
202
203 # in this case all the various 'resource' setting attributes
204 # behave identically, though they also have their own priority
205 if current_subject == None :
206 current_subject = state.getResource("about", "resource", "href", "src")
207
208 # get_URI_ref may return None in case of an illegal CURIE, so
209 # we have to be careful here, not use only an 'else'
210 if current_subject == None :
211 if node.hasAttribute("typeof") :
212 current_subject = BNode()
213 state.reset_list_mapping(origin = current_subject)
214 else :
215 current_subject = parent_object
216 else :
217 state.reset_list_mapping(origin = current_subject)
218
219 # in this case no non-literal triples will be generated, so the
220 # only role of the current_object Resource is to be transferred to
221 # the children node
222 current_object = current_subject
223 if node.hasAttribute("typeof") : typed_resource = current_subject
224
225 # ---------------------------------------------------------------------
226 ## The possible typeof indicates a number of type statements on the typed resource
227 for defined_type in state.getURI("typeof") :
228 if typed_resource :
229 graph.add((typed_resource, ns_rdf["type"], defined_type))
230
231 # ---------------------------------------------------------------------
232 # In case of @rel/@rev, either triples or incomplete triples are generated
233 # the (possible) incomplete triples are collected, to be forwarded to the children
234 incomplete_triples = []
235 for prop in state.getURI("rel") :
236 if not isinstance(prop,BNode) :
237 if node.hasAttribute("inlist") :
238 if current_object != None :
239 # Add the content to the list. Note that if the same list
240 # was initialized, at some point, by a None, it will be
241 # overwritten by this real content
242 state.add_to_list_mapping(prop, current_object)
243 else :
244 # Add a dummy entry to the list... Note that
245 # if that list was initialized already with a real content
246 # this call will have no effect
247 state.add_to_list_mapping(prop, None)
248
249 # Add a placeholder into the hanging rels
250 incomplete_triples.append( (None, prop, None) )
251 else :
252 theTriple = (current_subject, prop, current_object)
253 if current_object != None :
254 graph.add(theTriple)
255 else :
256 incomplete_triples.append(theTriple)
257 else :
258 state.options.add_warning(err_no_blank_node % "rel", warning_type=IncorrectBlankNodeUsage, node=node.nodeName)
259
260 for prop in state.getURI("rev") :
261 if not isinstance(prop,BNode) :
262 theTriple = (current_object,prop,current_subject)
263 if current_object != None :
264 graph.add(theTriple)
265 else :
266 incomplete_triples.append(theTriple)
267 else :
268 state.options.add_warning(err_no_blank_node % "rev", warning_type=IncorrectBlankNodeUsage, node=node.nodeName)
269
270 # ----------------------------------------------------------------------
271 # Generation of the @property values, including literals. The newSubject is the subject
272 # A particularity of property is that it stops the parsing down the DOM tree if an XML Literal is generated,
273 # because everything down there is part of the generated literal.
274 if node.hasAttribute("property") :
275 ProcessProperty(node, graph, current_subject, state, typed_resource).generate_1_1()
276
277 # ----------------------------------------------------------------------
278 # Setting the current object to a bnode is setting up a possible resource
279 # for the incomplete triples downwards
280 if current_object == None :
281 object_to_children = BNode()
282 else :
283 object_to_children = current_object
284
285 #-----------------------------------------------------------------------
286 # Here is the recursion step for all the children
287 for n in node.childNodes :
288 if n.nodeType == node.ELEMENT_NODE :
289 _parse_1_1(n, graph, object_to_children, state, incomplete_triples)
290
291 # ---------------------------------------------------------------------
292 # At this point, the parent's incomplete triples may be completed
293 for (s,p,o) in parent_incomplete_triples :
294 if s == None and o == None :
295 # This is an encoded version of a hanging rel for a collection:
296 incoming_state.add_to_list_mapping( p, current_subject )
297 else :
298 if s == None : s = current_subject
299 if o == None : o = current_subject
300 graph.add((s,p,o))
301
302 # Generate the lists, if any and if this is the level where a new list was originally created
303 if state.new_list and not state.list_empty() :
304 for prop in state.get_list_props() :
305 vals = state.get_list_value(prop)
306 if vals == None :
307 # This was an empty list, in fact, ie, the list has been initiated by a <xxx rel="prop" inlist>
308 # but no list content has ever been added
309 graph.add( (state.get_list_origin(), prop, ns_rdf["nil"]) )
310 else :
311 heads = [ BNode() for r in vals ] + [ ns_rdf["nil"] ]
312 for i in range(0, len(vals)) :
313 graph.add( (heads[i], ns_rdf["first"], vals[i]) )
314 graph.add( (heads[i], ns_rdf["rest"], heads[i+1]) )
315 # Anchor the list
316 graph.add( (state.get_list_origin(), prop, heads[0]) )
317
318 # -------------------------------------------------------------------
319 # This should be it...
320 # -------------------------------------------------------------------
321 return
322
323
324 ##################################################################################################################
325 def _parse_1_0(node, graph, parent_object, incoming_state, parent_incomplete_triples) :
326 """The (recursive) step of handling a single node. See the
327 U{RDFa 1.0 syntax document<http://www.w3.org/TR/rdfa-syntax>} for further details.
328
329 This is the RDFa 1.0 version.
330
331 @param node: the DOM node to handle
332 @param graph: the RDF graph
333 @type graph: RDFLib's Graph object instance
334 @param parent_object: the parent's object, as an RDFLib URIRef
335 @param incoming_state: the inherited state (namespaces, lang, etc.)
336 @type incoming_state: L{state.ExecutionContext}
337 @param parent_incomplete_triples: list of hanging triples (the missing resource set to None) to be handled (or not)
338 by the current node.
339 @return: whether the caller has to complete it's parent's incomplete triples
340 @rtype: Boolean
341 """
342
343 # Update the state. This means, for example, the possible local settings of
344 # namespaces and lang
345 state = None
346 state = ExecutionContext(node, graph, inherited_state=incoming_state)
347
348 #---------------------------------------------------------------------------------
349 # Handling the role attribute is pretty much orthogonal to everything else...
350 handle_role_attribute(node, graph, state)
351
352 #---------------------------------------------------------------------------------
353 # Handle the special case for embedded RDF, eg, in SVG1.2.
354 # This may add some triples to the target graph that does not originate from RDFa parsing
355 # If the function return TRUE, that means that an rdf:RDF has been found. No
356 # RDFa parsing should be done on that subtree, so we simply return...
357 if state.options.embedded_rdf and node.nodeType == node.ELEMENT_NODE and handle_embeddedRDF(node, graph, state) :
358 return
359
360 #---------------------------------------------------------------------------------
361 # calling the host language specific massaging of the DOM
362 if state.options.host_language in host_dom_transforms and node.nodeType == node.ELEMENT_NODE :
363 for func in host_dom_transforms[state.options.host_language] : func(node, state)
364
365 #---------------------------------------------------------------------------------
366 # First, let us check whether there is anything to do at all. Ie,
367 # whether there is any relevant RDFa specific attribute on the element
368 #
369 if not has_one_of_attributes(node, "href", "resource", "about", "property", "rel", "rev", "typeof", "src") :
370 # nop, there is nothing to do here, just go down the tree and return...
371 for n in node.childNodes :
372 if n.nodeType == node.ELEMENT_NODE : parse_one_node(n, graph, parent_object, state, parent_incomplete_triples)
373 return
374
375 #-----------------------------------------------------------------
376 # The goal is to establish the subject and object for local processing
377 # The behaviour is slightly different depending on the presense or not
378 # of the @rel/@rev attributes
379 current_subject = None
380 current_object = None
381 prop_object = None
382
383 if has_one_of_attributes(node, "rel", "rev") :
384 # in this case there is the notion of 'left' and 'right' of @rel/@rev
385 # in establishing the new Subject and the objectResource
386 current_subject = state.getResource("about","src")
387
388 # get_URI may return None in case of an illegal CURIE, so
389 # we have to be careful here, not use only an 'else'
390 if current_subject == None :
391 if node.hasAttribute("typeof") :
392 current_subject = BNode()
393 else :
394 current_subject = parent_object
395 else :
396 state.reset_list_mapping(origin = current_subject)
397
398 # set the object resource
399 current_object = state.getResource("resource", "href")
400
401 else :
402 # in this case all the various 'resource' setting attributes
403 # behave identically, though they also have their own priority
404 current_subject = state.getResource("about", "src", "resource", "href")
405
406 # get_URI_ref may return None in case of an illegal CURIE, so
407 # we have to be careful here, not use only an 'else'
408 if current_subject == None :
409 if node.hasAttribute("typeof") :
410 current_subject = BNode()
411 else :
412 current_subject = parent_object
413 current_subject = parent_object
414 else :
415 state.reset_list_mapping(origin = current_subject)
416
417 # in this case no non-literal triples will be generated, so the
418 # only role of the current_object Resource is to be transferred to
419 # the children node
420 current_object = current_subject
421
422 # ---------------------------------------------------------------------
423 ## The possible typeof indicates a number of type statements on the new Subject
424 for defined_type in state.getURI("typeof") :
425 graph.add((current_subject, ns_rdf["type"], defined_type))
426
427 # ---------------------------------------------------------------------
428 # In case of @rel/@rev, either triples or incomplete triples are generated
429 # the (possible) incomplete triples are collected, to be forwarded to the children
430 incomplete_triples = []
431 for prop in state.getURI("rel") :
432 if not isinstance(prop,BNode) :
433 theTriple = (current_subject, prop, current_object)
434 if current_object != None :
435 graph.add(theTriple)
436 else :
437 incomplete_triples.append(theTriple)
438 else :
439 state.options.add_warning(err_no_blank_node % "rel", warning_type=IncorrectBlankNodeUsage, node=node.nodeName)
440
441 for prop in state.getURI("rev") :
442 if not isinstance(prop,BNode) :
443 theTriple = (current_object,prop,current_subject)
444 if current_object != None :
445 graph.add(theTriple)
446 else :
447 incomplete_triples.append(theTriple)
448 else :
449 state.options.add_warning(err_no_blank_node % "rev", warning_type=IncorrectBlankNodeUsage, node=node.nodeName)
450
451 # ----------------------------------------------------------------------
452 # Generation of the literal values. The newSubject is the subject
453 # A particularity of property is that it stops the parsing down the DOM tree if an XML Literal is generated,
454 # because everything down there is part of the generated literal.
455 if node.hasAttribute("property") :
456 ProcessProperty(node, graph, current_subject, state).generate_1_0()
457
458 # ----------------------------------------------------------------------
459 # Setting the current object to a bnode is setting up a possible resource
460 # for the incomplete triples downwards
461 if current_object == None :
462 object_to_children = BNode()
463 else :
464 object_to_children = current_object
465
466 #-----------------------------------------------------------------------
467 # Here is the recursion step for all the children
468 for n in node.childNodes :
469 if n.nodeType == node.ELEMENT_NODE :
470 _parse_1_0(n, graph, object_to_children, state, incomplete_triples)
471
472 # ---------------------------------------------------------------------
473 # At this point, the parent's incomplete triples may be completed
474 for (s,p,o) in parent_incomplete_triples :
475 if s == None and o == None :
476 # This is an encoded version of a hanging rel for a collection:
477 incoming_state.add_to_list_mapping( p, current_subject )
478 else :
479 if s == None : s = current_subject
480 if o == None : o = current_subject
481 graph.add((s,p,o))
482
483 # -------------------------------------------------------------------
484 # This should be it...
485 # -------------------------------------------------------------------
486 return
487
488
489 #######################################################################
490 # Handle the role attribute
491 def handle_role_attribute(node, graph, state) :
492 """
493 Handling the role attribute, according to http://www.w3.org/TR/role-attribute/#using-role-in-conjunction-with-rdfa
494 @param node: the DOM node to handle
495 @param graph: the RDF graph
496 @type graph: RDFLib's Graph object instance
497 @param state: the inherited state (namespaces, lang, etc.)
498 @type state: L{state.ExecutionContext}
499 """
500 if node.hasAttribute("role") :
501 if node.hasAttribute("id") :
502 id = node.getAttribute("id").strip()
503 subject = URIRef(state.base + '#' + id)
504 else :
505 subject = BNode()
506 predicate = URIRef('http://www.w3.org/1999/xhtml/vocab#role')
507 for object in state.getURI("role") :
508 graph.add((subject, predicate, object))
509
510
511
512
513
514
515
516
517
518
519