Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/pyRdfa/parse.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """ | |
3 The core parsing function of RDFa. Some details are | |
4 put into other modules to make it clearer to update/modify (e.g., generation of C{@property} values, or managing the current state). | |
5 | |
6 Note that the entry point (L{parse_one_node}) bifurcates into an RDFa 1.0 and RDFa 1.1 version, ie, | |
7 to L{_parse_1_0} and L{_parse_1_1}. Some of the parsing details (management of C{@property}, list facilities, changed behavior on C{@typeof})) have changed | |
8 between versions and forcing the two into one function would be counter productive. | |
9 | |
10 @summary: RDFa core parser processing step | |
11 @organization: U{World Wide Web Consortium<http://www.w3.org>} | |
12 @author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} | |
13 @license: This software is available for use under the | |
14 U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} | |
15 """ | |
16 | |
17 """ | |
18 $Id: parse.py,v 1.19 2013-01-07 12:46:43 ivan Exp $ | |
19 $Date: 2013-01-07 12:46:43 $ | |
20 """ | |
21 | |
22 import sys | |
23 | |
24 from .state import ExecutionContext | |
25 from .property import ProcessProperty | |
26 from .embeddedRDF import handle_embeddedRDF | |
27 from .host import HostLanguage, host_dom_transforms | |
28 | |
29 import rdflib | |
30 from rdflib import URIRef | |
31 from rdflib import Literal | |
32 from rdflib import BNode | |
33 from rdflib import Namespace | |
34 if rdflib.__version__ >= "3.0.0" : | |
35 from rdflib import Graph | |
36 from rdflib import RDF as ns_rdf | |
37 from rdflib import RDFS as ns_rdfs | |
38 else : | |
39 from rdflib.Graph import Graph | |
40 from rdflib.RDFS import RDFSNS as ns_rdfs | |
41 from rdflib.RDF import RDFNS as ns_rdf | |
42 | |
43 from . import IncorrectBlankNodeUsage, err_no_blank_node | |
44 from .utils import has_one_of_attributes | |
45 | |
46 ####################################################################### | |
47 def parse_one_node(node, graph, parent_object, incoming_state, parent_incomplete_triples) : | |
48 """The (recursive) step of handling a single node. | |
49 | |
50 This entry just switches between the RDFa 1.0 and RDFa 1.1 versions for parsing. This method is only invoked once, | |
51 actually, from the top level; the recursion then happens in the L{_parse_1_0} and L{_parse_1_1} methods for | |
52 RDFa 1.0 and RDFa 1.1, respectively. | |
53 | |
54 @param node: the DOM node to handle | |
55 @param graph: the RDF graph | |
56 @type graph: RDFLib's Graph object instance | |
57 @param parent_object: the parent's object, as an RDFLib URIRef | |
58 @param incoming_state: the inherited state (namespaces, lang, etc.) | |
59 @type incoming_state: L{state.ExecutionContext} | |
60 @param parent_incomplete_triples: list of hanging triples (the missing resource set to None) to be handled (or not) | |
61 by the current node. | |
62 @return: whether the caller has to complete it's parent's incomplete triples | |
63 @rtype: Boolean | |
64 """ | |
65 # Branch according to versions. | |
66 if incoming_state.rdfa_version >= "1.1" : | |
67 _parse_1_1(node, graph, parent_object, incoming_state, parent_incomplete_triples) | |
68 else : | |
69 _parse_1_0(node, graph, parent_object, incoming_state, parent_incomplete_triples) | |
70 | |
71 ####################################################################### | |
72 def _parse_1_1(node, graph, parent_object, incoming_state, parent_incomplete_triples) : | |
73 """The (recursive) step of handling a single node. See the | |
74 U{RDFa 1.1 Core document<http://www.w3.org/TR/rdfa-core/>} for further details. | |
75 | |
76 This is the RDFa 1.1 version. | |
77 | |
78 @param node: the DOM node to handle | |
79 @param graph: the RDF graph | |
80 @type graph: RDFLib's Graph object instance | |
81 @param parent_object: the parent's object, as an RDFLib URIRef | |
82 @param incoming_state: the inherited state (namespaces, lang, etc.) | |
83 @type incoming_state: L{state.ExecutionContext} | |
84 @param parent_incomplete_triples: list of hanging triples (the missing resource set to None) to be handled (or not) | |
85 by the current node. | |
86 @return: whether the caller has to complete it's parent's incomplete triples | |
87 @rtype: Boolean | |
88 """ | |
89 def header_check(p_obj) : | |
90 """Special disposition for the HTML <head> and <body> elements...""" | |
91 if state.options.host_language in [ HostLanguage.xhtml, HostLanguage.html5, HostLanguage.xhtml5 ] : | |
92 if node.nodeName == "head" or node.nodeName == "body" : | |
93 if not has_one_of_attributes(node, "about", "resource", "src", "href") : | |
94 return p_obj | |
95 else : | |
96 return None | |
97 | |
98 def lite_check() : | |
99 if state.options.check_lite and state.options.host_language in [ HostLanguage.html5, HostLanguage.xhtml5, HostLanguage.xhtml ] : | |
100 if node.tagName == "link" and node.hasAttribute("rel") and state.term_or_curie.CURIE_to_URI(node.getAttribute("rel")) != None : | |
101 state.options.add_warning("In RDFa Lite, attribute @rel in <link> is only used in non-RDFa way (consider using @property)", node=node) | |
102 | |
103 # Update the state. This means, for example, the possible local settings of | |
104 # namespaces and lang | |
105 state = None | |
106 state = ExecutionContext(node, graph, inherited_state=incoming_state) | |
107 | |
108 #--------------------------------------------------------------------------------- | |
109 # Extra warning check on RDFa Lite | |
110 lite_check() | |
111 | |
112 #--------------------------------------------------------------------------------- | |
113 # Handling the role attribute is pretty much orthogonal to everything else... | |
114 handle_role_attribute(node, graph, state) | |
115 | |
116 #--------------------------------------------------------------------------------- | |
117 # Handle the special case for embedded RDF, eg, in SVG1.2. | |
118 # This may add some triples to the target graph that does not originate from RDFa parsing | |
119 # If the function return TRUE, that means that an rdf:RDF has been found. No | |
120 # RDFa parsing should be done on that subtree, so we simply return... | |
121 if state.options.embedded_rdf and node.nodeType == node.ELEMENT_NODE and handle_embeddedRDF(node, graph, state) : | |
122 return | |
123 | |
124 #--------------------------------------------------------------------------------- | |
125 # calling the host language specific massaging of the DOM | |
126 if state.options.host_language in host_dom_transforms and node.nodeType == node.ELEMENT_NODE : | |
127 for func in host_dom_transforms[state.options.host_language] : func(node, state) | |
128 | |
129 #--------------------------------------------------------------------------------- | |
130 # First, let us check whether there is anything to do at all. Ie, | |
131 # whether there is any relevant RDFa specific attribute on the element | |
132 # | |
133 if not has_one_of_attributes(node, "href", "resource", "about", "property", "rel", "rev", "typeof", "src", "vocab", "prefix") : | |
134 # nop, there is nothing to do here, just go down the tree and return... | |
135 for n in node.childNodes : | |
136 if n.nodeType == node.ELEMENT_NODE : parse_one_node(n, graph, parent_object, state, parent_incomplete_triples) | |
137 return | |
138 | |
139 #----------------------------------------------------------------- | |
140 # The goal is to establish the subject and object for local processing | |
141 # The behaviour is slightly different depending on the presense or not | |
142 # of the @rel/@rev attributes | |
143 current_subject = None | |
144 current_object = None | |
145 typed_resource = None | |
146 | |
147 if has_one_of_attributes(node, "rel", "rev") : | |
148 # in this case there is the notion of 'left' and 'right' of @rel/@rev | |
149 # in establishing the new Subject and the objectResource | |
150 current_subject = header_check(parent_object) | |
151 | |
152 # set first the subject | |
153 if node.hasAttribute("about") : | |
154 current_subject = state.getURI("about") | |
155 if node.hasAttribute("typeof") : typed_resource = current_subject | |
156 | |
157 # get_URI may return None in case of an illegal CURIE, so | |
158 # we have to be careful here, not use only an 'else' | |
159 if current_subject == None : | |
160 current_subject = parent_object | |
161 else : | |
162 state.reset_list_mapping(origin = current_subject) | |
163 | |
164 # set the object resource | |
165 current_object = state.getResource("resource", "href", "src") | |
166 | |
167 if node.hasAttribute("typeof") and not node.hasAttribute("about") : | |
168 if current_object == None : | |
169 current_object = BNode() | |
170 typed_resource = current_object | |
171 | |
172 if not node.hasAttribute("inlist") and current_object != None : | |
173 # In this case the newly defined object is, in fact, the head of the list | |
174 # just reset the whole thing. | |
175 state.reset_list_mapping(origin = current_object) | |
176 | |
177 elif node.hasAttribute("property") and not has_one_of_attributes(node, "content", "datatype") : | |
178 current_subject = header_check(parent_object) | |
179 | |
180 # this is the case when the property may take hold of @src and friends... | |
181 if node.hasAttribute("about") : | |
182 current_subject = state.getURI("about") | |
183 if node.hasAttribute("typeof") : typed_resource = current_subject | |
184 | |
185 # getURI may return None in case of an illegal CURIE, so | |
186 # we have to be careful here, not use only an 'else' | |
187 if current_subject == None : | |
188 current_subject = parent_object | |
189 else : | |
190 state.reset_list_mapping(origin = current_subject) | |
191 | |
192 if typed_resource == None and node.hasAttribute("typeof") : | |
193 typed_resource = state.getResource("resource", "href", "src") | |
194 if typed_resource == None : | |
195 typed_resource = BNode() | |
196 current_object = typed_resource | |
197 else : | |
198 current_object = current_subject | |
199 | |
200 else : | |
201 current_subject = header_check(parent_object) | |
202 | |
203 # in this case all the various 'resource' setting attributes | |
204 # behave identically, though they also have their own priority | |
205 if current_subject == None : | |
206 current_subject = state.getResource("about", "resource", "href", "src") | |
207 | |
208 # get_URI_ref may return None in case of an illegal CURIE, so | |
209 # we have to be careful here, not use only an 'else' | |
210 if current_subject == None : | |
211 if node.hasAttribute("typeof") : | |
212 current_subject = BNode() | |
213 state.reset_list_mapping(origin = current_subject) | |
214 else : | |
215 current_subject = parent_object | |
216 else : | |
217 state.reset_list_mapping(origin = current_subject) | |
218 | |
219 # in this case no non-literal triples will be generated, so the | |
220 # only role of the current_object Resource is to be transferred to | |
221 # the children node | |
222 current_object = current_subject | |
223 if node.hasAttribute("typeof") : typed_resource = current_subject | |
224 | |
225 # --------------------------------------------------------------------- | |
226 ## The possible typeof indicates a number of type statements on the typed resource | |
227 for defined_type in state.getURI("typeof") : | |
228 if typed_resource : | |
229 graph.add((typed_resource, ns_rdf["type"], defined_type)) | |
230 | |
231 # --------------------------------------------------------------------- | |
232 # In case of @rel/@rev, either triples or incomplete triples are generated | |
233 # the (possible) incomplete triples are collected, to be forwarded to the children | |
234 incomplete_triples = [] | |
235 for prop in state.getURI("rel") : | |
236 if not isinstance(prop,BNode) : | |
237 if node.hasAttribute("inlist") : | |
238 if current_object != None : | |
239 # Add the content to the list. Note that if the same list | |
240 # was initialized, at some point, by a None, it will be | |
241 # overwritten by this real content | |
242 state.add_to_list_mapping(prop, current_object) | |
243 else : | |
244 # Add a dummy entry to the list... Note that | |
245 # if that list was initialized already with a real content | |
246 # this call will have no effect | |
247 state.add_to_list_mapping(prop, None) | |
248 | |
249 # Add a placeholder into the hanging rels | |
250 incomplete_triples.append( (None, prop, None) ) | |
251 else : | |
252 theTriple = (current_subject, prop, current_object) | |
253 if current_object != None : | |
254 graph.add(theTriple) | |
255 else : | |
256 incomplete_triples.append(theTriple) | |
257 else : | |
258 state.options.add_warning(err_no_blank_node % "rel", warning_type=IncorrectBlankNodeUsage, node=node.nodeName) | |
259 | |
260 for prop in state.getURI("rev") : | |
261 if not isinstance(prop,BNode) : | |
262 theTriple = (current_object,prop,current_subject) | |
263 if current_object != None : | |
264 graph.add(theTriple) | |
265 else : | |
266 incomplete_triples.append(theTriple) | |
267 else : | |
268 state.options.add_warning(err_no_blank_node % "rev", warning_type=IncorrectBlankNodeUsage, node=node.nodeName) | |
269 | |
270 # ---------------------------------------------------------------------- | |
271 # Generation of the @property values, including literals. The newSubject is the subject | |
272 # A particularity of property is that it stops the parsing down the DOM tree if an XML Literal is generated, | |
273 # because everything down there is part of the generated literal. | |
274 if node.hasAttribute("property") : | |
275 ProcessProperty(node, graph, current_subject, state, typed_resource).generate_1_1() | |
276 | |
277 # ---------------------------------------------------------------------- | |
278 # Setting the current object to a bnode is setting up a possible resource | |
279 # for the incomplete triples downwards | |
280 if current_object == None : | |
281 object_to_children = BNode() | |
282 else : | |
283 object_to_children = current_object | |
284 | |
285 #----------------------------------------------------------------------- | |
286 # Here is the recursion step for all the children | |
287 for n in node.childNodes : | |
288 if n.nodeType == node.ELEMENT_NODE : | |
289 _parse_1_1(n, graph, object_to_children, state, incomplete_triples) | |
290 | |
291 # --------------------------------------------------------------------- | |
292 # At this point, the parent's incomplete triples may be completed | |
293 for (s,p,o) in parent_incomplete_triples : | |
294 if s == None and o == None : | |
295 # This is an encoded version of a hanging rel for a collection: | |
296 incoming_state.add_to_list_mapping( p, current_subject ) | |
297 else : | |
298 if s == None : s = current_subject | |
299 if o == None : o = current_subject | |
300 graph.add((s,p,o)) | |
301 | |
302 # Generate the lists, if any and if this is the level where a new list was originally created | |
303 if state.new_list and not state.list_empty() : | |
304 for prop in state.get_list_props() : | |
305 vals = state.get_list_value(prop) | |
306 if vals == None : | |
307 # This was an empty list, in fact, ie, the list has been initiated by a <xxx rel="prop" inlist> | |
308 # but no list content has ever been added | |
309 graph.add( (state.get_list_origin(), prop, ns_rdf["nil"]) ) | |
310 else : | |
311 heads = [ BNode() for r in vals ] + [ ns_rdf["nil"] ] | |
312 for i in range(0, len(vals)) : | |
313 graph.add( (heads[i], ns_rdf["first"], vals[i]) ) | |
314 graph.add( (heads[i], ns_rdf["rest"], heads[i+1]) ) | |
315 # Anchor the list | |
316 graph.add( (state.get_list_origin(), prop, heads[0]) ) | |
317 | |
318 # ------------------------------------------------------------------- | |
319 # This should be it... | |
320 # ------------------------------------------------------------------- | |
321 return | |
322 | |
323 | |
324 ################################################################################################################## | |
325 def _parse_1_0(node, graph, parent_object, incoming_state, parent_incomplete_triples) : | |
326 """The (recursive) step of handling a single node. See the | |
327 U{RDFa 1.0 syntax document<http://www.w3.org/TR/rdfa-syntax>} for further details. | |
328 | |
329 This is the RDFa 1.0 version. | |
330 | |
331 @param node: the DOM node to handle | |
332 @param graph: the RDF graph | |
333 @type graph: RDFLib's Graph object instance | |
334 @param parent_object: the parent's object, as an RDFLib URIRef | |
335 @param incoming_state: the inherited state (namespaces, lang, etc.) | |
336 @type incoming_state: L{state.ExecutionContext} | |
337 @param parent_incomplete_triples: list of hanging triples (the missing resource set to None) to be handled (or not) | |
338 by the current node. | |
339 @return: whether the caller has to complete it's parent's incomplete triples | |
340 @rtype: Boolean | |
341 """ | |
342 | |
343 # Update the state. This means, for example, the possible local settings of | |
344 # namespaces and lang | |
345 state = None | |
346 state = ExecutionContext(node, graph, inherited_state=incoming_state) | |
347 | |
348 #--------------------------------------------------------------------------------- | |
349 # Handling the role attribute is pretty much orthogonal to everything else... | |
350 handle_role_attribute(node, graph, state) | |
351 | |
352 #--------------------------------------------------------------------------------- | |
353 # Handle the special case for embedded RDF, eg, in SVG1.2. | |
354 # This may add some triples to the target graph that does not originate from RDFa parsing | |
355 # If the function return TRUE, that means that an rdf:RDF has been found. No | |
356 # RDFa parsing should be done on that subtree, so we simply return... | |
357 if state.options.embedded_rdf and node.nodeType == node.ELEMENT_NODE and handle_embeddedRDF(node, graph, state) : | |
358 return | |
359 | |
360 #--------------------------------------------------------------------------------- | |
361 # calling the host language specific massaging of the DOM | |
362 if state.options.host_language in host_dom_transforms and node.nodeType == node.ELEMENT_NODE : | |
363 for func in host_dom_transforms[state.options.host_language] : func(node, state) | |
364 | |
365 #--------------------------------------------------------------------------------- | |
366 # First, let us check whether there is anything to do at all. Ie, | |
367 # whether there is any relevant RDFa specific attribute on the element | |
368 # | |
369 if not has_one_of_attributes(node, "href", "resource", "about", "property", "rel", "rev", "typeof", "src") : | |
370 # nop, there is nothing to do here, just go down the tree and return... | |
371 for n in node.childNodes : | |
372 if n.nodeType == node.ELEMENT_NODE : parse_one_node(n, graph, parent_object, state, parent_incomplete_triples) | |
373 return | |
374 | |
375 #----------------------------------------------------------------- | |
376 # The goal is to establish the subject and object for local processing | |
377 # The behaviour is slightly different depending on the presense or not | |
378 # of the @rel/@rev attributes | |
379 current_subject = None | |
380 current_object = None | |
381 prop_object = None | |
382 | |
383 if has_one_of_attributes(node, "rel", "rev") : | |
384 # in this case there is the notion of 'left' and 'right' of @rel/@rev | |
385 # in establishing the new Subject and the objectResource | |
386 current_subject = state.getResource("about","src") | |
387 | |
388 # get_URI may return None in case of an illegal CURIE, so | |
389 # we have to be careful here, not use only an 'else' | |
390 if current_subject == None : | |
391 if node.hasAttribute("typeof") : | |
392 current_subject = BNode() | |
393 else : | |
394 current_subject = parent_object | |
395 else : | |
396 state.reset_list_mapping(origin = current_subject) | |
397 | |
398 # set the object resource | |
399 current_object = state.getResource("resource", "href") | |
400 | |
401 else : | |
402 # in this case all the various 'resource' setting attributes | |
403 # behave identically, though they also have their own priority | |
404 current_subject = state.getResource("about", "src", "resource", "href") | |
405 | |
406 # get_URI_ref may return None in case of an illegal CURIE, so | |
407 # we have to be careful here, not use only an 'else' | |
408 if current_subject == None : | |
409 if node.hasAttribute("typeof") : | |
410 current_subject = BNode() | |
411 else : | |
412 current_subject = parent_object | |
413 current_subject = parent_object | |
414 else : | |
415 state.reset_list_mapping(origin = current_subject) | |
416 | |
417 # in this case no non-literal triples will be generated, so the | |
418 # only role of the current_object Resource is to be transferred to | |
419 # the children node | |
420 current_object = current_subject | |
421 | |
422 # --------------------------------------------------------------------- | |
423 ## The possible typeof indicates a number of type statements on the new Subject | |
424 for defined_type in state.getURI("typeof") : | |
425 graph.add((current_subject, ns_rdf["type"], defined_type)) | |
426 | |
427 # --------------------------------------------------------------------- | |
428 # In case of @rel/@rev, either triples or incomplete triples are generated | |
429 # the (possible) incomplete triples are collected, to be forwarded to the children | |
430 incomplete_triples = [] | |
431 for prop in state.getURI("rel") : | |
432 if not isinstance(prop,BNode) : | |
433 theTriple = (current_subject, prop, current_object) | |
434 if current_object != None : | |
435 graph.add(theTriple) | |
436 else : | |
437 incomplete_triples.append(theTriple) | |
438 else : | |
439 state.options.add_warning(err_no_blank_node % "rel", warning_type=IncorrectBlankNodeUsage, node=node.nodeName) | |
440 | |
441 for prop in state.getURI("rev") : | |
442 if not isinstance(prop,BNode) : | |
443 theTriple = (current_object,prop,current_subject) | |
444 if current_object != None : | |
445 graph.add(theTriple) | |
446 else : | |
447 incomplete_triples.append(theTriple) | |
448 else : | |
449 state.options.add_warning(err_no_blank_node % "rev", warning_type=IncorrectBlankNodeUsage, node=node.nodeName) | |
450 | |
451 # ---------------------------------------------------------------------- | |
452 # Generation of the literal values. The newSubject is the subject | |
453 # A particularity of property is that it stops the parsing down the DOM tree if an XML Literal is generated, | |
454 # because everything down there is part of the generated literal. | |
455 if node.hasAttribute("property") : | |
456 ProcessProperty(node, graph, current_subject, state).generate_1_0() | |
457 | |
458 # ---------------------------------------------------------------------- | |
459 # Setting the current object to a bnode is setting up a possible resource | |
460 # for the incomplete triples downwards | |
461 if current_object == None : | |
462 object_to_children = BNode() | |
463 else : | |
464 object_to_children = current_object | |
465 | |
466 #----------------------------------------------------------------------- | |
467 # Here is the recursion step for all the children | |
468 for n in node.childNodes : | |
469 if n.nodeType == node.ELEMENT_NODE : | |
470 _parse_1_0(n, graph, object_to_children, state, incomplete_triples) | |
471 | |
472 # --------------------------------------------------------------------- | |
473 # At this point, the parent's incomplete triples may be completed | |
474 for (s,p,o) in parent_incomplete_triples : | |
475 if s == None and o == None : | |
476 # This is an encoded version of a hanging rel for a collection: | |
477 incoming_state.add_to_list_mapping( p, current_subject ) | |
478 else : | |
479 if s == None : s = current_subject | |
480 if o == None : o = current_subject | |
481 graph.add((s,p,o)) | |
482 | |
483 # ------------------------------------------------------------------- | |
484 # This should be it... | |
485 # ------------------------------------------------------------------- | |
486 return | |
487 | |
488 | |
489 ####################################################################### | |
490 # Handle the role attribute | |
491 def handle_role_attribute(node, graph, state) : | |
492 """ | |
493 Handling the role attribute, according to http://www.w3.org/TR/role-attribute/#using-role-in-conjunction-with-rdfa | |
494 @param node: the DOM node to handle | |
495 @param graph: the RDF graph | |
496 @type graph: RDFLib's Graph object instance | |
497 @param state: the inherited state (namespaces, lang, etc.) | |
498 @type state: L{state.ExecutionContext} | |
499 """ | |
500 if node.hasAttribute("role") : | |
501 if node.hasAttribute("id") : | |
502 id = node.getAttribute("id").strip() | |
503 subject = URIRef(state.base + '#' + id) | |
504 else : | |
505 subject = BNode() | |
506 predicate = URIRef('http://www.w3.org/1999/xhtml/vocab#role') | |
507 for object in state.getURI("role") : | |
508 graph.add((subject, predicate, object)) | |
509 | |
510 | |
511 | |
512 | |
513 | |
514 | |
515 | |
516 | |
517 | |
518 | |
519 |