comparison planemo/lib/python3.7/site-packages/rdflib/plugins/serializers/rdfxml.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400 (2020-07-31)
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 from rdflib.plugins.serializers.xmlwriter import XMLWriter
2
3 from rdflib.namespace import Namespace, RDF, RDFS # , split_uri
4
5 from rdflib.term import URIRef, Literal, BNode
6 from rdflib.util import first, more_than
7 from rdflib.collection import Collection
8 from rdflib.serializer import Serializer
9
10 # from rdflib.exceptions import Error
11
12 from rdflib.py3compat import b
13
14 from xml.sax.saxutils import quoteattr, escape
15 import xml.dom.minidom
16
17 from .xmlwriter import ESCAPE_ENTITIES
18
19 __all__ = ['fix', 'XMLSerializer', 'PrettyXMLSerializer']
20
21
22 class XMLSerializer(Serializer):
23
24 def __init__(self, store):
25 super(XMLSerializer, self).__init__(store)
26
27 def __bindings(self):
28 store = self.store
29 nm = store.namespace_manager
30 bindings = {}
31
32 for predicate in set(store.predicates()):
33 prefix, namespace, name = nm.compute_qname(predicate)
34 bindings[prefix] = URIRef(namespace)
35
36 RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
37
38 if "rdf" in bindings:
39 assert bindings["rdf"] == RDFNS
40 else:
41 bindings["rdf"] = RDFNS
42
43 for prefix, namespace in bindings.items():
44 yield prefix, namespace
45
46 def serialize(self, stream, base=None, encoding=None, **args):
47 self.base = base
48 self.__stream = stream
49 self.__serialized = {}
50 encoding = self.encoding
51 self.write = write = lambda uni: stream.write(
52 uni.encode(encoding, 'replace'))
53
54 # startDocument
55 write('<?xml version="1.0" encoding="%s"?>\n' % self.encoding)
56
57 # startRDF
58 write('<rdf:RDF\n')
59
60 # If provided, write xml:base attribute for the RDF
61 if "xml_base" in args:
62 write(' xml:base="%s"\n' % args['xml_base'])
63 # TODO:
64 # assert(
65 # namespaces["http://www.w3.org/1999/02/22-rdf-syntax-ns#"]=='rdf')
66 bindings = list(self.__bindings())
67 bindings.sort()
68
69 for prefix, namespace in bindings:
70 if prefix:
71 write(' xmlns:%s="%s"\n' % (prefix, namespace))
72 else:
73 write(' xmlns="%s"\n' % namespace)
74 write('>\n')
75
76 # write out triples by subject
77 for subject in self.store.subjects():
78 self.subject(subject, 1)
79
80 # endRDF
81 write("</rdf:RDF>\n")
82
83 # Set to None so that the memory can get garbage collected.
84 # self.__serialized = None
85 del self.__serialized
86
87 def subject(self, subject, depth=1):
88 if not subject in self.__serialized:
89 self.__serialized[subject] = 1
90
91 if isinstance(subject, (BNode, URIRef)):
92 write = self.write
93 indent = " " * depth
94 element_name = "rdf:Description"
95
96 if isinstance(subject, BNode):
97 write('%s<%s rdf:nodeID="%s"' % (
98 indent, element_name, subject))
99 else:
100 uri = quoteattr(self.relativize(subject))
101 write("%s<%s rdf:about=%s" % (indent, element_name, uri))
102
103 if (subject, None, None) in self.store:
104 write(">\n")
105
106 for predicate, object in self.store.predicate_objects(
107 subject):
108 self.predicate(predicate, object, depth + 1)
109 write("%s</%s>\n" % (indent, element_name))
110
111 else:
112 write("/>\n")
113
114 def predicate(self, predicate, object, depth=1):
115 write = self.write
116 indent = " " * depth
117 qname = self.store.namespace_manager.qname(predicate)
118
119 if isinstance(object, Literal):
120 attributes = ""
121
122 if object.language:
123 attributes += ' xml:lang="%s"' % object.language
124
125 if object.datatype:
126 attributes += ' rdf:datatype="%s"' % object.datatype
127
128 write("%s<%s%s>%s</%s>\n" %
129 (indent, qname, attributes,
130 escape(object, ESCAPE_ENTITIES), qname))
131 else:
132
133 if isinstance(object, BNode):
134 write('%s<%s rdf:nodeID="%s"/>\n' %
135 (indent, qname, object))
136 else:
137 write("%s<%s rdf:resource=%s/>\n" %
138 (indent, qname, quoteattr(self.relativize(object))))
139
140 XMLLANG = "http://www.w3.org/XML/1998/namespacelang"
141 XMLBASE = "http://www.w3.org/XML/1998/namespacebase"
142 OWL_NS = Namespace('http://www.w3.org/2002/07/owl#')
143
144
145 # TODO:
146 def fix(val):
147 "strip off _: from nodeIDs... as they are not valid NCNames"
148 if val.startswith("_:"):
149 return val[2:]
150 else:
151 return val
152
153
154 class PrettyXMLSerializer(Serializer):
155
156 def __init__(self, store, max_depth=3):
157 super(PrettyXMLSerializer, self).__init__(store)
158 self.forceRDFAbout = set()
159
160 def serialize(self, stream, base=None, encoding=None, **args):
161 self.__serialized = {}
162 store = self.store
163 self.base = base
164 self.max_depth = args.get("max_depth", 3)
165 assert self.max_depth > 0, "max_depth must be greater than 0"
166
167 self.nm = nm = store.namespace_manager
168 self.writer = writer = XMLWriter(stream, nm, encoding)
169 namespaces = {}
170
171 possible = set(store.predicates()).union(
172 store.objects(None, RDF.type))
173
174 for predicate in possible:
175 prefix, namespace, local = nm.compute_qname(predicate)
176 namespaces[prefix] = namespace
177
178 namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
179
180 writer.push(RDF.RDF)
181
182 if "xml_base" in args:
183 writer.attribute(XMLBASE, args["xml_base"])
184
185 writer.namespaces(iter(namespaces.items()))
186
187 # Write out subjects that can not be inline
188 for subject in store.subjects():
189 if (None, None, subject) in store:
190 if (subject, None, subject) in store:
191 self.subject(subject, 1)
192 else:
193 self.subject(subject, 1)
194
195 # write out anything that has not yet been reached
196 # write out BNodes last (to ensure they can be inlined where possible)
197 bnodes = set()
198
199 for subject in store.subjects():
200 if isinstance(subject, BNode):
201 bnodes.add(subject)
202 continue
203 self.subject(subject, 1)
204
205 # now serialize only those BNodes that have not been serialized yet
206 for bnode in bnodes:
207 if bnode not in self.__serialized:
208 self.subject(subject, 1)
209
210 writer.pop(RDF.RDF)
211 stream.write(b("\n"))
212
213 # Set to None so that the memory can get garbage collected.
214 self.__serialized = None
215
216 def subject(self, subject, depth=1):
217 store = self.store
218 writer = self.writer
219
220 if subject in self.forceRDFAbout:
221 writer.push(RDF.Description)
222 writer.attribute(RDF.about, self.relativize(subject))
223 writer.pop(RDF.Description)
224 self.forceRDFAbout.remove(subject)
225
226 elif not subject in self.__serialized:
227 self.__serialized[subject] = 1
228 type = first(store.objects(subject, RDF.type))
229
230 try:
231 self.nm.qname(type)
232 except:
233 type = None
234
235 element = type or RDF.Description
236 writer.push(element)
237
238 if isinstance(subject, BNode):
239 def subj_as_obj_more_than(ceil):
240 return True
241 # more_than(store.triples((None, None, subject)), ceil)
242
243 # here we only include BNode labels if they are referenced
244 # more than once (this reduces the use of redundant BNode
245 # identifiers)
246 if subj_as_obj_more_than(1):
247 writer.attribute(RDF.nodeID, fix(subject))
248
249 else:
250 writer.attribute(RDF.about, self.relativize(subject))
251
252 if (subject, None, None) in store:
253 for predicate, object in store.predicate_objects(subject):
254 if not (predicate == RDF.type and object == type):
255 self.predicate(predicate, object, depth + 1)
256
257 writer.pop(element)
258
259 elif subject in self.forceRDFAbout:
260 writer.push(RDF.Description)
261 writer.attribute(RDF.about, self.relativize(subject))
262 writer.pop(RDF.Description)
263 self.forceRDFAbout.remove(subject)
264
265 def predicate(self, predicate, object, depth=1):
266 writer = self.writer
267 store = self.store
268 writer.push(predicate)
269
270 if isinstance(object, Literal):
271 if object.language:
272 writer.attribute(XMLLANG, object.language)
273
274 if (object.datatype == RDF.XMLLiteral and
275 isinstance(object.value, xml.dom.minidom.Document)):
276 writer.attribute(RDF.parseType, "Literal")
277 writer.text("")
278 writer.stream.write(object)
279 else:
280 if object.datatype:
281 writer.attribute(RDF.datatype, object.datatype)
282 writer.text(object)
283
284 elif object in self.__serialized or not (object, None, None) in store:
285
286 if isinstance(object, BNode):
287 if more_than(store.triples((None, None, object)), 0):
288 writer.attribute(RDF.nodeID, fix(object))
289 else:
290 writer.attribute(RDF.resource, self.relativize(object))
291
292 else:
293 if first(store.objects(object, RDF.first)): # may not have type
294 # RDF.List
295
296 self.__serialized[object] = 1
297
298 # Warn that any assertions on object other than
299 # RDF.first and RDF.rest are ignored... including RDF.List
300 import warnings
301 warnings.warn(
302 "Assertions on %s other than RDF.first " % repr(object) +
303 "and RDF.rest are ignored ... including RDF.List",
304 UserWarning, stacklevel=2)
305 writer.attribute(RDF.parseType, "Collection")
306
307 col = Collection(store, object)
308
309 for item in col:
310
311 if isinstance(item, URIRef):
312 self.forceRDFAbout.add(item)
313 self.subject(item)
314
315 if not isinstance(item, URIRef):
316 self.__serialized[item] = 1
317 else:
318 if first(store.triples_choices(
319 (object, RDF.type, [OWL_NS.Class, RDFS.Class]))) \
320 and isinstance(object, URIRef):
321 writer.attribute(RDF.resource, self.relativize(object))
322
323 elif depth <= self.max_depth:
324 self.subject(object, depth + 1)
325
326 elif isinstance(object, BNode):
327
328 if not object in self.__serialized \
329 and (object, None, None) in store \
330 and len(list(store.subjects(object=object))) == 1:
331 # inline blank nodes if they haven't been serialized yet
332 # and are only referenced once (regardless of depth)
333 self.subject(object, depth + 1)
334 else:
335 writer.attribute(RDF.nodeID, fix(object))
336
337 else:
338 writer.attribute(RDF.resource, self.relativize(object))
339
340 writer.pop(predicate)