comparison planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/pyRdfa/property.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 # -*- coding: utf-8 -*-
2 """
3 Implementation of the C{@property} value handling.
4
5 RDFa 1.0 and RDFa 1.1 are fairly different. RDFa 1.0 generates only literals, see
6 U{RDFa Task Force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>} for the details.
7 On the other hand, RDFa 1.1, beyond literals, can also generate URI references. Hence the duplicate method in the L{ProcessProperty} class, one for RDFa 1.0 and the other for RDFa 1.1.
8
9 @summary: RDFa Literal generation
10 @requires: U{RDFLib package<http://rdflib.net>}
11 @organization: U{World Wide Web Consortium<http://www.w3.org>}
12 @author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
13 @license: This software is available for use under the
14 U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
15 """
16
17 """
18 $Id: property.py,v 1.11 2012/06/12 11:47:11 ivan Exp $
19 $Date: 2012/06/12 11:47:11 $
20 """
21
22 import re, sys
23
24 import rdflib
25 from rdflib import BNode
26 from rdflib import Literal, URIRef, Namespace
27 if rdflib.__version__ >= "3.0.0" :
28 from rdflib import RDF as ns_rdf
29 from rdflib.term import XSDToPython
30 else :
31 from rdflib.RDF import RDFNS as ns_rdf
32 from rdflib.Literal import XSDToPython
33
34 from . import IncorrectBlankNodeUsage, IncorrectLiteral, err_no_blank_node, ns_xsd
35 from .utils import has_one_of_attributes, return_XML
36 from .host.html5 import handled_time_types
37
38 XMLLiteral = ns_rdf["XMLLiteral"]
39 HTMLLiteral = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML")
40
41 class ProcessProperty :
42 """Generate the value for C{@property} taking into account datatype, etc.
43 Note: this class is created only if the C{@property} is indeed present, no need to check.
44
45 @ivar node: DOM element node
46 @ivar graph: the (RDF) graph to add the properies to
47 @ivar subject: the RDFLib URIRef serving as a subject for the generated triples
48 @ivar state: the current state to be used for the CURIE-s
49 @type state: L{state.ExecutionContext}
50 @ivar typed_resource: Typically the bnode generated by a @typeof
51 """
52 def __init__(self, node, graph, subject, state, typed_resource = None) :
53 """
54 @param node: DOM element node
55 @param graph: the (RDF) graph to add the properies to
56 @param subject: the RDFLib URIRef serving as a subject for the generated triples
57 @param state: the current state to be used for the CURIE-s
58 @param state: L{state.ExecutionContext}
59 @param typed_resource: Typically the bnode generated by a @typeof; in RDFa 1.1, that becomes the object for C{@property}
60 """
61 self.node = node
62 self.graph = graph
63 self.subject = subject
64 self.state = state
65 self.typed_resource = typed_resource
66
67 def generate(self) :
68 """
69 Common entry point for the RDFa 1.0 and RDFa 1.1 versions; bifurcates based on the RDFa version, as retrieved from the state object.
70 """
71 if self.state.rdfa_version >= "1.1" :
72 self.generate_1_1()
73 else :
74 self.generate_1_0()
75
76 def generate_1_1(self) :
77 """Generate the property object, 1.1 version"""
78
79 #########################################################################
80 # See if the target is _not_ a literal
81 irirefs = ("resource", "href", "src")
82 noiri = ("content", "datatype", "rel", "rev")
83 notypediri = ("content", "datatype", "rel", "rev", "about", "about_pruned")
84 if has_one_of_attributes(self.node, irirefs) and not has_one_of_attributes(self.node, noiri) :
85 # @href/@resource/@src takes the lead here...
86 object = self.state.getResource(irirefs)
87 elif self.node.hasAttribute("typeof") and not has_one_of_attributes(self.node, notypediri) and self.typed_resource != None :
88 # a @typeof creates a special branch in case the typed resource was set during parsing
89 object = self.typed_resource
90 else :
91 # We have to generate a literal
92
93 # Get, if exists, the value of @datatype
94 datatype = ''
95 dtset = False
96 if self.node.hasAttribute("datatype") :
97 dtset = True
98 dt = self.node.getAttribute("datatype")
99 if dt != "" :
100 datatype = self.state.getURI("datatype")
101
102 # Supress lange is set in case some elements explicitly want to supress the effect of language
103 # There were discussions, for example, that the <time> element should do so. Although,
104 # after all, this was reversed, the functionality is kept in the code in case another
105 # element might need it...
106 if self.state.lang != None and self.state.supress_lang == False :
107 lang = self.state.lang
108 else :
109 lang = ''
110
111 # The simple case: separate @content attribute
112 if self.node.hasAttribute("content") :
113 val = self.node.getAttribute("content")
114 # Handling the automatic uri conversion case
115 if dtset == False :
116 object = Literal(val, lang=lang)
117 else :
118 object = self._create_Literal(val, datatype=datatype, lang=lang)
119 # The value of datatype has been set, and the keyword parameters take care of the rest
120 else :
121 # see if there *is* a datatype (even if it is empty!)
122 if dtset :
123 if datatype == XMLLiteral :
124 litval = self._get_XML_literal(self.node)
125 object = Literal(litval,datatype=XMLLiteral)
126 elif datatype == HTMLLiteral :
127 # I am not sure why this hack is necessary, but otherwise an encoding error occurs
128 # In Python3 all this should become moot, due to the unicode everywhere approach...
129 if sys.version_info[0] >= 3 :
130 object = Literal(self._get_HTML_literal(self.node), datatype=HTMLLiteral)
131 else :
132 litval = self._get_HTML_literal(self.node)
133 o = Literal(litval, datatype=XMLLiteral)
134 object = Literal(o, datatype=HTMLLiteral)
135 else :
136 object = self._create_Literal(self._get_literal(self.node), datatype=datatype, lang=lang)
137 else :
138 object = self._create_Literal(self._get_literal(self.node), lang=lang)
139
140 if object != None :
141 for prop in self.state.getURI("property") :
142 if not isinstance(prop, BNode) :
143 if self.node.hasAttribute("inlist") :
144 self.state.add_to_list_mapping(prop, object)
145 else :
146 self.graph.add( (self.subject, prop, object) )
147 else :
148 self.state.options.add_warning(err_no_blank_node % "property", warning_type=IncorrectBlankNodeUsage, node=self.node.nodeName)
149
150 # return
151
152 def generate_1_0(self) :
153 """Generate the property object, 1.0 version"""
154
155 #########################################################################
156 # We have to generate a literal indeed.
157 # Get, if exists, the value of @datatype
158 datatype = ''
159 dtset = False
160 if self.node.hasAttribute("datatype") :
161 dtset = True
162 dt = self.node.getAttribute("datatype")
163 if dt != "" :
164 datatype = self.state.getURI("datatype")
165
166 if self.state.lang != None :
167 lang = self.state.lang
168 else :
169 lang = ''
170
171 # The simple case: separate @content attribute
172 if self.node.hasAttribute("content") :
173 val = self.node.getAttribute("content")
174 # Handling the automatic uri conversion case
175 if dtset == False :
176 object = Literal(val, lang=lang)
177 else :
178 object = self._create_Literal(val, datatype=datatype, lang=lang)
179 # The value of datatype has been set, and the keyword parameters take care of the rest
180 else :
181 # see if there *is* a datatype (even if it is empty!)
182 if dtset :
183 # yep. The Literal content is the pure text part of the current element:
184 # We have to check whether the specified datatype is, in fact, an
185 # explicit XML Literal
186 if datatype == XMLLiteral :
187 litval = self._get_XML_literal(self.node)
188 object = Literal(litval,datatype=XMLLiteral)
189 elif datatype == HTMLLiteral :
190 # I am not sure why this hack is necessary, but otherwise an encoding error occurs
191 # In Python3 all this should become moot, due to the unicode everywhere approach...
192 if sys.version_info[0] >= 3 :
193 object = Literal(self._get_HTML_literal(self.node), datatype=HTMLLiteral)
194 else :
195 litval = self._get_HTML_literal(self.node)
196 o = Literal(litval, datatype=XMLLiteral)
197 object = Literal(o, datatype=HTMLLiteral)
198 else :
199 object = self._create_Literal(self._get_literal(self.node), datatype=datatype, lang=lang)
200 else :
201 # no controlling @datatype. We have to see if there is markup in the contained
202 # element
203 if True in [ n.nodeType == self.node.ELEMENT_NODE for n in self.node.childNodes ] :
204 # yep, and XML Literal should be generated
205 object = self._create_Literal(self._get_XML_literal(self.node), datatype=XMLLiteral)
206 else :
207 # At this point, there might be entities in the string that are returned as real characters by the dom
208 # implementation. That should be turned back
209 object = self._create_Literal(self._get_literal(self.node), lang=lang)
210
211 for prop in self.state.getURI("property") :
212 if not isinstance(prop,BNode) :
213 self.graph.add( (self.subject,prop,object) )
214 else :
215 self.state.options.add_warning(err_no_blank_node % "property", warning_type=IncorrectBlankNodeUsage, node=self.node.nodeName)
216
217 # return
218
219 ######################################################################################################################################
220
221
222 def _putBackEntities(self, str) :
223 """Put 'back' entities for the '&','<', and '>' characters, to produce a proper XML string.
224 Used by the XML Literal extraction.
225 @param str: string to be converted
226 @return: string with entities
227 @rtype: string
228 """
229 return str.replace('&','&amp;').replace('<','&lt;').replace('>','&gt;')
230
231 def _get_literal(self, Pnode):
232 """
233 Get (recursively) the full text from a DOM Node.
234
235 @param Pnode: DOM Node
236 @return: string
237 """
238 rc = ""
239 for node in Pnode.childNodes:
240 if node.nodeType == node.TEXT_NODE:
241 rc = rc + node.data
242 elif node.nodeType == node.ELEMENT_NODE :
243 rc = rc + self._get_literal(node)
244
245 # The decision of the group in February 2008 is not to normalize the result by default.
246 # This is reflected in the default value of the option
247
248 if self.state.options.space_preserve :
249 return rc
250 else :
251 return re.sub(r'(\r| |\n|\t)+'," ",rc).strip()
252 # end getLiteral
253
254 def _get_XML_literal(self, Pnode) :
255 """
256 Get (recursively) the XML Literal content of a DOM Node.
257
258 @param Pnode: DOM Node
259 @return: string
260 """
261 rc = ""
262 for node in Pnode.childNodes:
263 if node.nodeType == node.TEXT_NODE:
264 rc = rc + self._putBackEntities(node.data)
265 elif node.nodeType == node.ELEMENT_NODE :
266 rc = rc + return_XML(self.state, node, base = False)
267 return rc
268 # end getXMLLiteral
269
270 def _get_HTML_literal(self, Pnode) :
271 """
272 Get (recursively) the XML Literal content of a DOM Node.
273
274 @param Pnode: DOM Node
275 @return: string
276 """
277 rc = ""
278 for node in Pnode.childNodes:
279 if node.nodeType == node.TEXT_NODE:
280 rc = rc + self._putBackEntities(node.data)
281 elif node.nodeType == node.ELEMENT_NODE :
282 rc = rc + return_XML(self.state, node, base = False, xmlns = False )
283 return rc
284 # end getXMLLiteral
285
286 def _create_Literal(self, val, datatype = '', lang = '') :
287 """
288 Create a literal, taking into account the datatype and language.
289 @return: Literal
290 """
291 if datatype == None or datatype == '' :
292 return Literal(val, lang=lang)
293 #elif datatype == ns_xsd["string"] :
294 # return Literal(val)
295 else :
296 # This is a bit convoluted... the default setup of rdflib does not gracefully react if the
297 # datatype cannot properly be converted to Python. I have to copy and reuse some of the
298 # rdflib code to get this working...
299 # To make things worse: rdlib 3.1.0 does not handle the various xsd date types properly, ie,
300 # the conversion function below will generate errors. Ie, the check should be skipped for those
301 if ("%s" % datatype) in handled_time_types and rdflib.__version__ < "3.2.0" :
302 convFunc = False
303 else :
304 convFunc = XSDToPython.get(datatype, None)
305 if convFunc :
306 try :
307 pv = convFunc(val)
308 # If we got there the literal value and its datatype match
309 except :
310 self.state.options.add_warning("Incompatible value (%s) and datatype (%s) in Literal definition." % (val, datatype), warning_type=IncorrectLiteral, node=self.node.nodeName)
311 return Literal(val, datatype=datatype)