Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/plugins/serializers/rdfxml.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 (2020-07-31) |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 from rdflib.plugins.serializers.xmlwriter import XMLWriter | |
2 | |
3 from rdflib.namespace import Namespace, RDF, RDFS # , split_uri | |
4 | |
5 from rdflib.term import URIRef, Literal, BNode | |
6 from rdflib.util import first, more_than | |
7 from rdflib.collection import Collection | |
8 from rdflib.serializer import Serializer | |
9 | |
10 # from rdflib.exceptions import Error | |
11 | |
12 from rdflib.py3compat import b | |
13 | |
14 from xml.sax.saxutils import quoteattr, escape | |
15 import xml.dom.minidom | |
16 | |
17 from .xmlwriter import ESCAPE_ENTITIES | |
18 | |
19 __all__ = ['fix', 'XMLSerializer', 'PrettyXMLSerializer'] | |
20 | |
21 | |
22 class XMLSerializer(Serializer): | |
23 | |
24 def __init__(self, store): | |
25 super(XMLSerializer, self).__init__(store) | |
26 | |
27 def __bindings(self): | |
28 store = self.store | |
29 nm = store.namespace_manager | |
30 bindings = {} | |
31 | |
32 for predicate in set(store.predicates()): | |
33 prefix, namespace, name = nm.compute_qname(predicate) | |
34 bindings[prefix] = URIRef(namespace) | |
35 | |
36 RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#") | |
37 | |
38 if "rdf" in bindings: | |
39 assert bindings["rdf"] == RDFNS | |
40 else: | |
41 bindings["rdf"] = RDFNS | |
42 | |
43 for prefix, namespace in bindings.items(): | |
44 yield prefix, namespace | |
45 | |
46 def serialize(self, stream, base=None, encoding=None, **args): | |
47 self.base = base | |
48 self.__stream = stream | |
49 self.__serialized = {} | |
50 encoding = self.encoding | |
51 self.write = write = lambda uni: stream.write( | |
52 uni.encode(encoding, 'replace')) | |
53 | |
54 # startDocument | |
55 write('<?xml version="1.0" encoding="%s"?>\n' % self.encoding) | |
56 | |
57 # startRDF | |
58 write('<rdf:RDF\n') | |
59 | |
60 # If provided, write xml:base attribute for the RDF | |
61 if "xml_base" in args: | |
62 write(' xml:base="%s"\n' % args['xml_base']) | |
63 # TODO: | |
64 # assert( | |
65 # namespaces["http://www.w3.org/1999/02/22-rdf-syntax-ns#"]=='rdf') | |
66 bindings = list(self.__bindings()) | |
67 bindings.sort() | |
68 | |
69 for prefix, namespace in bindings: | |
70 if prefix: | |
71 write(' xmlns:%s="%s"\n' % (prefix, namespace)) | |
72 else: | |
73 write(' xmlns="%s"\n' % namespace) | |
74 write('>\n') | |
75 | |
76 # write out triples by subject | |
77 for subject in self.store.subjects(): | |
78 self.subject(subject, 1) | |
79 | |
80 # endRDF | |
81 write("</rdf:RDF>\n") | |
82 | |
83 # Set to None so that the memory can get garbage collected. | |
84 # self.__serialized = None | |
85 del self.__serialized | |
86 | |
87 def subject(self, subject, depth=1): | |
88 if not subject in self.__serialized: | |
89 self.__serialized[subject] = 1 | |
90 | |
91 if isinstance(subject, (BNode, URIRef)): | |
92 write = self.write | |
93 indent = " " * depth | |
94 element_name = "rdf:Description" | |
95 | |
96 if isinstance(subject, BNode): | |
97 write('%s<%s rdf:nodeID="%s"' % ( | |
98 indent, element_name, subject)) | |
99 else: | |
100 uri = quoteattr(self.relativize(subject)) | |
101 write("%s<%s rdf:about=%s" % (indent, element_name, uri)) | |
102 | |
103 if (subject, None, None) in self.store: | |
104 write(">\n") | |
105 | |
106 for predicate, object in self.store.predicate_objects( | |
107 subject): | |
108 self.predicate(predicate, object, depth + 1) | |
109 write("%s</%s>\n" % (indent, element_name)) | |
110 | |
111 else: | |
112 write("/>\n") | |
113 | |
114 def predicate(self, predicate, object, depth=1): | |
115 write = self.write | |
116 indent = " " * depth | |
117 qname = self.store.namespace_manager.qname(predicate) | |
118 | |
119 if isinstance(object, Literal): | |
120 attributes = "" | |
121 | |
122 if object.language: | |
123 attributes += ' xml:lang="%s"' % object.language | |
124 | |
125 if object.datatype: | |
126 attributes += ' rdf:datatype="%s"' % object.datatype | |
127 | |
128 write("%s<%s%s>%s</%s>\n" % | |
129 (indent, qname, attributes, | |
130 escape(object, ESCAPE_ENTITIES), qname)) | |
131 else: | |
132 | |
133 if isinstance(object, BNode): | |
134 write('%s<%s rdf:nodeID="%s"/>\n' % | |
135 (indent, qname, object)) | |
136 else: | |
137 write("%s<%s rdf:resource=%s/>\n" % | |
138 (indent, qname, quoteattr(self.relativize(object)))) | |
139 | |
140 XMLLANG = "http://www.w3.org/XML/1998/namespacelang" | |
141 XMLBASE = "http://www.w3.org/XML/1998/namespacebase" | |
142 OWL_NS = Namespace('http://www.w3.org/2002/07/owl#') | |
143 | |
144 | |
145 # TODO: | |
146 def fix(val): | |
147 "strip off _: from nodeIDs... as they are not valid NCNames" | |
148 if val.startswith("_:"): | |
149 return val[2:] | |
150 else: | |
151 return val | |
152 | |
153 | |
154 class PrettyXMLSerializer(Serializer): | |
155 | |
156 def __init__(self, store, max_depth=3): | |
157 super(PrettyXMLSerializer, self).__init__(store) | |
158 self.forceRDFAbout = set() | |
159 | |
160 def serialize(self, stream, base=None, encoding=None, **args): | |
161 self.__serialized = {} | |
162 store = self.store | |
163 self.base = base | |
164 self.max_depth = args.get("max_depth", 3) | |
165 assert self.max_depth > 0, "max_depth must be greater than 0" | |
166 | |
167 self.nm = nm = store.namespace_manager | |
168 self.writer = writer = XMLWriter(stream, nm, encoding) | |
169 namespaces = {} | |
170 | |
171 possible = set(store.predicates()).union( | |
172 store.objects(None, RDF.type)) | |
173 | |
174 for predicate in possible: | |
175 prefix, namespace, local = nm.compute_qname(predicate) | |
176 namespaces[prefix] = namespace | |
177 | |
178 namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" | |
179 | |
180 writer.push(RDF.RDF) | |
181 | |
182 if "xml_base" in args: | |
183 writer.attribute(XMLBASE, args["xml_base"]) | |
184 | |
185 writer.namespaces(iter(namespaces.items())) | |
186 | |
187 # Write out subjects that can not be inline | |
188 for subject in store.subjects(): | |
189 if (None, None, subject) in store: | |
190 if (subject, None, subject) in store: | |
191 self.subject(subject, 1) | |
192 else: | |
193 self.subject(subject, 1) | |
194 | |
195 # write out anything that has not yet been reached | |
196 # write out BNodes last (to ensure they can be inlined where possible) | |
197 bnodes = set() | |
198 | |
199 for subject in store.subjects(): | |
200 if isinstance(subject, BNode): | |
201 bnodes.add(subject) | |
202 continue | |
203 self.subject(subject, 1) | |
204 | |
205 # now serialize only those BNodes that have not been serialized yet | |
206 for bnode in bnodes: | |
207 if bnode not in self.__serialized: | |
208 self.subject(subject, 1) | |
209 | |
210 writer.pop(RDF.RDF) | |
211 stream.write(b("\n")) | |
212 | |
213 # Set to None so that the memory can get garbage collected. | |
214 self.__serialized = None | |
215 | |
216 def subject(self, subject, depth=1): | |
217 store = self.store | |
218 writer = self.writer | |
219 | |
220 if subject in self.forceRDFAbout: | |
221 writer.push(RDF.Description) | |
222 writer.attribute(RDF.about, self.relativize(subject)) | |
223 writer.pop(RDF.Description) | |
224 self.forceRDFAbout.remove(subject) | |
225 | |
226 elif not subject in self.__serialized: | |
227 self.__serialized[subject] = 1 | |
228 type = first(store.objects(subject, RDF.type)) | |
229 | |
230 try: | |
231 self.nm.qname(type) | |
232 except: | |
233 type = None | |
234 | |
235 element = type or RDF.Description | |
236 writer.push(element) | |
237 | |
238 if isinstance(subject, BNode): | |
239 def subj_as_obj_more_than(ceil): | |
240 return True | |
241 # more_than(store.triples((None, None, subject)), ceil) | |
242 | |
243 # here we only include BNode labels if they are referenced | |
244 # more than once (this reduces the use of redundant BNode | |
245 # identifiers) | |
246 if subj_as_obj_more_than(1): | |
247 writer.attribute(RDF.nodeID, fix(subject)) | |
248 | |
249 else: | |
250 writer.attribute(RDF.about, self.relativize(subject)) | |
251 | |
252 if (subject, None, None) in store: | |
253 for predicate, object in store.predicate_objects(subject): | |
254 if not (predicate == RDF.type and object == type): | |
255 self.predicate(predicate, object, depth + 1) | |
256 | |
257 writer.pop(element) | |
258 | |
259 elif subject in self.forceRDFAbout: | |
260 writer.push(RDF.Description) | |
261 writer.attribute(RDF.about, self.relativize(subject)) | |
262 writer.pop(RDF.Description) | |
263 self.forceRDFAbout.remove(subject) | |
264 | |
265 def predicate(self, predicate, object, depth=1): | |
266 writer = self.writer | |
267 store = self.store | |
268 writer.push(predicate) | |
269 | |
270 if isinstance(object, Literal): | |
271 if object.language: | |
272 writer.attribute(XMLLANG, object.language) | |
273 | |
274 if (object.datatype == RDF.XMLLiteral and | |
275 isinstance(object.value, xml.dom.minidom.Document)): | |
276 writer.attribute(RDF.parseType, "Literal") | |
277 writer.text("") | |
278 writer.stream.write(object) | |
279 else: | |
280 if object.datatype: | |
281 writer.attribute(RDF.datatype, object.datatype) | |
282 writer.text(object) | |
283 | |
284 elif object in self.__serialized or not (object, None, None) in store: | |
285 | |
286 if isinstance(object, BNode): | |
287 if more_than(store.triples((None, None, object)), 0): | |
288 writer.attribute(RDF.nodeID, fix(object)) | |
289 else: | |
290 writer.attribute(RDF.resource, self.relativize(object)) | |
291 | |
292 else: | |
293 if first(store.objects(object, RDF.first)): # may not have type | |
294 # RDF.List | |
295 | |
296 self.__serialized[object] = 1 | |
297 | |
298 # Warn that any assertions on object other than | |
299 # RDF.first and RDF.rest are ignored... including RDF.List | |
300 import warnings | |
301 warnings.warn( | |
302 "Assertions on %s other than RDF.first " % repr(object) + | |
303 "and RDF.rest are ignored ... including RDF.List", | |
304 UserWarning, stacklevel=2) | |
305 writer.attribute(RDF.parseType, "Collection") | |
306 | |
307 col = Collection(store, object) | |
308 | |
309 for item in col: | |
310 | |
311 if isinstance(item, URIRef): | |
312 self.forceRDFAbout.add(item) | |
313 self.subject(item) | |
314 | |
315 if not isinstance(item, URIRef): | |
316 self.__serialized[item] = 1 | |
317 else: | |
318 if first(store.triples_choices( | |
319 (object, RDF.type, [OWL_NS.Class, RDFS.Class]))) \ | |
320 and isinstance(object, URIRef): | |
321 writer.attribute(RDF.resource, self.relativize(object)) | |
322 | |
323 elif depth <= self.max_depth: | |
324 self.subject(object, depth + 1) | |
325 | |
326 elif isinstance(object, BNode): | |
327 | |
328 if not object in self.__serialized \ | |
329 and (object, None, None) in store \ | |
330 and len(list(store.subjects(object=object))) == 1: | |
331 # inline blank nodes if they haven't been serialized yet | |
332 # and are only referenced once (regardless of depth) | |
333 self.subject(object, depth + 1) | |
334 else: | |
335 writer.attribute(RDF.nodeID, fix(object)) | |
336 | |
337 else: | |
338 writer.attribute(RDF.resource, self.relativize(object)) | |
339 | |
340 writer.pop(predicate) |