Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/plugins/serializers/turtle.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 """ | |
2 Turtle RDF graph serializer for RDFLib. | |
3 See <http://www.w3.org/TeamSubmission/turtle/> for syntax specification. | |
4 """ | |
5 | |
6 from collections import defaultdict | |
7 | |
8 from rdflib.compat import cmp_to_key | |
9 from rdflib.term import BNode, Literal, URIRef | |
10 from rdflib.exceptions import Error | |
11 from rdflib.serializer import Serializer | |
12 from rdflib.namespace import RDF, RDFS | |
13 | |
14 __all__ = ['RecursiveSerializer', 'TurtleSerializer'] | |
15 | |
16 def _object_comparator(a,b): | |
17 """ | |
18 for nice clean output we sort the objects of triples, | |
19 some of them are literals, | |
20 these are sorted according to the sort order of the underlying python objects | |
21 in py3 not all things are comparable. | |
22 This falls back on comparing string representations when not. | |
23 """ | |
24 | |
25 try: | |
26 if a>b: return 1 | |
27 if a<b: return -1 | |
28 return 0 | |
29 | |
30 except TypeError: | |
31 a = str(a) | |
32 b = str(b) | |
33 return (a > b) - (a < b) | |
34 | |
35 | |
36 class RecursiveSerializer(Serializer): | |
37 | |
38 topClasses = [RDFS.Class] | |
39 predicateOrder = [RDF.type, RDFS.label] | |
40 maxDepth = 10 | |
41 indentString = " " | |
42 | |
43 def __init__(self, store): | |
44 | |
45 super(RecursiveSerializer, self).__init__(store) | |
46 self.stream = None | |
47 self.reset() | |
48 | |
49 def addNamespace(self, prefix, uri): | |
50 if prefix in self.namespaces and self.namespaces[prefix]!=uri: | |
51 raise Exception("Trying to override namespace prefix %s => %s, but it's already bound to %s"%(prefix, uri, self.namespaces[prefix])) | |
52 self.namespaces[prefix] = uri | |
53 | |
54 def checkSubject(self, subject): | |
55 """Check to see if the subject should be serialized yet""" | |
56 if ((self.isDone(subject)) | |
57 or (subject not in self._subjects) | |
58 or ((subject in self._topLevels) and (self.depth > 1)) | |
59 or (isinstance(subject, URIRef) | |
60 and (self.depth >= self.maxDepth))): | |
61 return False | |
62 return True | |
63 | |
64 def isDone(self, subject): | |
65 """Return true if subject is serialized""" | |
66 return subject in self._serialized | |
67 | |
68 def orderSubjects(self): | |
69 seen = {} | |
70 subjects = [] | |
71 | |
72 for classURI in self.topClasses: | |
73 members = list(self.store.subjects(RDF.type, classURI)) | |
74 members.sort() | |
75 | |
76 for member in members: | |
77 subjects.append(member) | |
78 self._topLevels[member] = True | |
79 seen[member] = True | |
80 | |
81 recursable = [ | |
82 (isinstance(subject, BNode), | |
83 self._references[subject], subject) | |
84 for subject in self._subjects if subject not in seen] | |
85 | |
86 recursable.sort() | |
87 subjects.extend([subject for (isbnode, refs, subject) in recursable]) | |
88 | |
89 return subjects | |
90 | |
91 def preprocess(self): | |
92 for triple in self.store.triples((None, None, None)): | |
93 self.preprocessTriple(triple) | |
94 | |
95 def preprocessTriple(self, xxx_todo_changeme): | |
96 (s, p, o) = xxx_todo_changeme | |
97 self._references[o]+=1 | |
98 self._subjects[s] = True | |
99 | |
100 def reset(self): | |
101 self.depth = 0 | |
102 self.lists = {} | |
103 self.namespaces = {} | |
104 self._references = defaultdict(int) | |
105 self._serialized = {} | |
106 self._subjects = {} | |
107 self._topLevels = {} | |
108 | |
109 for prefix, ns in self.store.namespaces(): | |
110 self.addNamespace(prefix, ns) | |
111 | |
112 def buildPredicateHash(self, subject): | |
113 """ | |
114 Build a hash key by predicate to a list of objects for the given | |
115 subject | |
116 """ | |
117 properties = {} | |
118 for s, p, o in self.store.triples((subject, None, None)): | |
119 oList = properties.get(p, []) | |
120 oList.append(o) | |
121 properties[p] = oList | |
122 return properties | |
123 | |
124 def sortProperties(self, properties): | |
125 """Take a hash from predicate uris to lists of values. | |
126 Sort the lists of values. Return a sorted list of properties.""" | |
127 # Sort object lists | |
128 for prop, objects in list(properties.items()): | |
129 objects.sort(key=cmp_to_key(_object_comparator)) | |
130 | |
131 # Make sorted list of properties | |
132 propList = [] | |
133 seen = {} | |
134 for prop in self.predicateOrder: | |
135 if (prop in properties) and (prop not in seen): | |
136 propList.append(prop) | |
137 seen[prop] = True | |
138 props = list(properties.keys()) | |
139 props.sort() | |
140 for prop in props: | |
141 if prop not in seen: | |
142 propList.append(prop) | |
143 seen[prop] = True | |
144 return propList | |
145 | |
146 def subjectDone(self, subject): | |
147 """Mark a subject as done.""" | |
148 self._serialized[subject] = True | |
149 | |
150 def indent(self, modifier=0): | |
151 """Returns indent string multiplied by the depth""" | |
152 return (self.depth + modifier) * self.indentString | |
153 | |
154 def write(self, text): | |
155 """Write text in given encoding.""" | |
156 self.stream.write(text.encode(self.encoding, 'replace')) | |
157 | |
158 | |
159 SUBJECT = 0 | |
160 VERB = 1 | |
161 OBJECT = 2 | |
162 | |
163 _GEN_QNAME_FOR_DT = False | |
164 _SPACIOUS_OUTPUT = False | |
165 | |
166 | |
167 class TurtleSerializer(RecursiveSerializer): | |
168 | |
169 short_name = "turtle" | |
170 indentString = ' ' | |
171 | |
172 def __init__(self, store): | |
173 self._ns_rewrite = {} | |
174 super(TurtleSerializer, self).__init__(store) | |
175 self.keywords = { | |
176 RDF.type: 'a' | |
177 } | |
178 self.reset() | |
179 self.stream = None | |
180 self._spacious = _SPACIOUS_OUTPUT | |
181 | |
182 def addNamespace(self, prefix, namespace): | |
183 # Turtle does not support prefix that start with _ | |
184 # if they occur in the graph, rewrite to p_blah | |
185 # this is more complicated since we need to make sure p_blah | |
186 # does not already exist. And we register namespaces as we go, i.e. | |
187 # we may first see a triple with prefix _9 - rewrite it to p_9 | |
188 # and then later find a triple with a "real" p_9 prefix | |
189 | |
190 # so we need to keep track of ns rewrites we made so far. | |
191 | |
192 if (prefix > '' and prefix[0] == '_') \ | |
193 or self.namespaces.get(prefix, namespace) != namespace: | |
194 | |
195 if prefix not in self._ns_rewrite: | |
196 p = "p" + prefix | |
197 while p in self.namespaces: | |
198 p = "p" + p | |
199 self._ns_rewrite[prefix] = p | |
200 | |
201 prefix = self._ns_rewrite.get(prefix, prefix) | |
202 | |
203 super(TurtleSerializer, self).addNamespace(prefix, namespace) | |
204 return prefix | |
205 | |
206 def reset(self): | |
207 super(TurtleSerializer, self).reset() | |
208 self._shortNames = {} | |
209 self._started = False | |
210 self._ns_rewrite = {} | |
211 | |
212 def serialize(self, stream, base=None, encoding=None, | |
213 spacious=None, **args): | |
214 self.reset() | |
215 self.stream = stream | |
216 self.base = base | |
217 | |
218 if spacious is not None: | |
219 self._spacious = spacious | |
220 | |
221 self.preprocess() | |
222 subjects_list = self.orderSubjects() | |
223 | |
224 self.startDocument() | |
225 | |
226 firstTime = True | |
227 for subject in subjects_list: | |
228 if self.isDone(subject): | |
229 continue | |
230 if firstTime: | |
231 firstTime = False | |
232 if self.statement(subject) and not firstTime: | |
233 self.write('\n') | |
234 | |
235 self.endDocument() | |
236 stream.write("\n".encode('ascii')) | |
237 | |
238 def preprocessTriple(self, triple): | |
239 super(TurtleSerializer, self).preprocessTriple(triple) | |
240 for i, node in enumerate(triple): | |
241 if node in self.keywords: | |
242 continue | |
243 # Don't use generated prefixes for subjects and objects | |
244 self.getQName(node, gen_prefix=(i == VERB)) | |
245 if isinstance(node, Literal) and node.datatype: | |
246 self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT) | |
247 p = triple[1] | |
248 if isinstance(p, BNode): # hmm - when is P ever a bnode? | |
249 self._references[p]+=1 | |
250 | |
251 def getQName(self, uri, gen_prefix=True): | |
252 if not isinstance(uri, URIRef): | |
253 return None | |
254 | |
255 parts = None | |
256 | |
257 try: | |
258 parts = self.store.compute_qname(uri, generate=gen_prefix) | |
259 except: | |
260 | |
261 # is the uri a namespace in itself? | |
262 pfx = self.store.store.prefix(uri) | |
263 | |
264 if pfx is not None: | |
265 parts = (pfx, uri, '') | |
266 else: | |
267 # nothing worked | |
268 return None | |
269 | |
270 prefix, namespace, local = parts | |
271 | |
272 # QName cannot end with . | |
273 if local.endswith("."): return None | |
274 | |
275 prefix = self.addNamespace(prefix, namespace) | |
276 | |
277 return '%s:%s' % (prefix, local) | |
278 | |
279 def startDocument(self): | |
280 self._started = True | |
281 ns_list = sorted(self.namespaces.items()) | |
282 for prefix, uri in ns_list: | |
283 self.write(self.indent() + '@prefix %s: <%s> .\n' % (prefix, uri)) | |
284 if ns_list and self._spacious: | |
285 self.write('\n') | |
286 | |
287 def endDocument(self): | |
288 if self._spacious: | |
289 self.write('\n') | |
290 | |
291 def statement(self, subject): | |
292 self.subjectDone(subject) | |
293 return self.s_squared(subject) or self.s_default(subject) | |
294 | |
295 def s_default(self, subject): | |
296 self.write('\n' + self.indent()) | |
297 self.path(subject, SUBJECT) | |
298 self.predicateList(subject) | |
299 self.write(' .') | |
300 return True | |
301 | |
302 def s_squared(self, subject): | |
303 if (self._references[subject] > 0) or not isinstance(subject, BNode): | |
304 return False | |
305 self.write('\n' + self.indent() + '[]') | |
306 self.predicateList(subject) | |
307 self.write(' .') | |
308 return True | |
309 | |
310 def path(self, node, position, newline=False): | |
311 if not (self.p_squared(node, position, newline) | |
312 or self.p_default(node, position, newline)): | |
313 raise Error("Cannot serialize node '%s'" % (node, )) | |
314 | |
315 def p_default(self, node, position, newline=False): | |
316 if position != SUBJECT and not newline: | |
317 self.write(' ') | |
318 self.write(self.label(node, position)) | |
319 return True | |
320 | |
321 def label(self, node, position): | |
322 if node == RDF.nil: | |
323 return '()' | |
324 if position is VERB and node in self.keywords: | |
325 return self.keywords[node] | |
326 if isinstance(node, Literal): | |
327 return node._literal_n3( | |
328 use_plain=True, | |
329 qname_callback=lambda dt: self.getQName( | |
330 dt, _GEN_QNAME_FOR_DT)) | |
331 else: | |
332 node = self.relativize(node) | |
333 | |
334 return self.getQName(node, position == VERB) or node.n3() | |
335 | |
336 def p_squared(self, node, position, newline=False): | |
337 if (not isinstance(node, BNode) | |
338 or node in self._serialized | |
339 or self._references[node] > 1 | |
340 or position == SUBJECT): | |
341 return False | |
342 | |
343 if not newline: | |
344 self.write(' ') | |
345 | |
346 if self.isValidList(node): | |
347 # this is a list | |
348 self.write('(') | |
349 self.depth += 1 # 2 | |
350 self.doList(node) | |
351 self.depth -= 1 # 2 | |
352 self.write(' )') | |
353 else: | |
354 self.subjectDone(node) | |
355 self.depth += 2 | |
356 # self.write('[\n' + self.indent()) | |
357 self.write('[') | |
358 self.depth -= 1 | |
359 # self.predicateList(node, newline=True) | |
360 self.predicateList(node, newline=False) | |
361 # self.write('\n' + self.indent() + ']') | |
362 self.write(' ]') | |
363 self.depth -= 1 | |
364 | |
365 return True | |
366 | |
367 def isValidList(self, l): | |
368 """ | |
369 Checks if l is a valid RDF list, i.e. no nodes have other properties. | |
370 """ | |
371 try: | |
372 if self.store.value(l, RDF.first) is None: | |
373 return False | |
374 except: | |
375 return False | |
376 while l: | |
377 if l != RDF.nil and len( | |
378 list(self.store.predicate_objects(l))) != 2: | |
379 return False | |
380 l = self.store.value(l, RDF.rest) | |
381 return True | |
382 | |
383 def doList(self, l): | |
384 while l: | |
385 item = self.store.value(l, RDF.first) | |
386 if item is not None: | |
387 self.path(item, OBJECT) | |
388 self.subjectDone(l) | |
389 l = self.store.value(l, RDF.rest) | |
390 | |
391 def predicateList(self, subject, newline=False): | |
392 properties = self.buildPredicateHash(subject) | |
393 propList = self.sortProperties(properties) | |
394 if len(propList) == 0: | |
395 return | |
396 self.verb(propList[0], newline=newline) | |
397 self.objectList(properties[propList[0]]) | |
398 for predicate in propList[1:]: | |
399 self.write(' ;\n' + self.indent(1)) | |
400 self.verb(predicate, newline=True) | |
401 self.objectList(properties[predicate]) | |
402 | |
403 def verb(self, node, newline=False): | |
404 self.path(node, VERB, newline) | |
405 | |
406 def objectList(self, objects): | |
407 count = len(objects) | |
408 if count == 0: | |
409 return | |
410 depthmod = (count == 1) and 0 or 1 | |
411 self.depth += depthmod | |
412 self.path(objects[0], OBJECT) | |
413 for obj in objects[1:]: | |
414 self.write(',\n' + self.indent(1)) | |
415 self.path(obj, OBJECT, newline=True) | |
416 self.depth -= depthmod |