comparison planemo/lib/python3.7/site-packages/rdflib/plugins/serializers/turtle.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 """
2 Turtle RDF graph serializer for RDFLib.
3 See <http://www.w3.org/TeamSubmission/turtle/> for syntax specification.
4 """
5
6 from collections import defaultdict
7
8 from rdflib.compat import cmp_to_key
9 from rdflib.term import BNode, Literal, URIRef
10 from rdflib.exceptions import Error
11 from rdflib.serializer import Serializer
12 from rdflib.namespace import RDF, RDFS
13
14 __all__ = ['RecursiveSerializer', 'TurtleSerializer']
15
16 def _object_comparator(a,b):
17 """
18 for nice clean output we sort the objects of triples,
19 some of them are literals,
20 these are sorted according to the sort order of the underlying python objects
21 in py3 not all things are comparable.
22 This falls back on comparing string representations when not.
23 """
24
25 try:
26 if a>b: return 1
27 if a<b: return -1
28 return 0
29
30 except TypeError:
31 a = str(a)
32 b = str(b)
33 return (a > b) - (a < b)
34
35
36 class RecursiveSerializer(Serializer):
37
38 topClasses = [RDFS.Class]
39 predicateOrder = [RDF.type, RDFS.label]
40 maxDepth = 10
41 indentString = " "
42
43 def __init__(self, store):
44
45 super(RecursiveSerializer, self).__init__(store)
46 self.stream = None
47 self.reset()
48
49 def addNamespace(self, prefix, uri):
50 if prefix in self.namespaces and self.namespaces[prefix]!=uri:
51 raise Exception("Trying to override namespace prefix %s => %s, but it's already bound to %s"%(prefix, uri, self.namespaces[prefix]))
52 self.namespaces[prefix] = uri
53
54 def checkSubject(self, subject):
55 """Check to see if the subject should be serialized yet"""
56 if ((self.isDone(subject))
57 or (subject not in self._subjects)
58 or ((subject in self._topLevels) and (self.depth > 1))
59 or (isinstance(subject, URIRef)
60 and (self.depth >= self.maxDepth))):
61 return False
62 return True
63
64 def isDone(self, subject):
65 """Return true if subject is serialized"""
66 return subject in self._serialized
67
68 def orderSubjects(self):
69 seen = {}
70 subjects = []
71
72 for classURI in self.topClasses:
73 members = list(self.store.subjects(RDF.type, classURI))
74 members.sort()
75
76 for member in members:
77 subjects.append(member)
78 self._topLevels[member] = True
79 seen[member] = True
80
81 recursable = [
82 (isinstance(subject, BNode),
83 self._references[subject], subject)
84 for subject in self._subjects if subject not in seen]
85
86 recursable.sort()
87 subjects.extend([subject for (isbnode, refs, subject) in recursable])
88
89 return subjects
90
91 def preprocess(self):
92 for triple in self.store.triples((None, None, None)):
93 self.preprocessTriple(triple)
94
95 def preprocessTriple(self, xxx_todo_changeme):
96 (s, p, o) = xxx_todo_changeme
97 self._references[o]+=1
98 self._subjects[s] = True
99
100 def reset(self):
101 self.depth = 0
102 self.lists = {}
103 self.namespaces = {}
104 self._references = defaultdict(int)
105 self._serialized = {}
106 self._subjects = {}
107 self._topLevels = {}
108
109 for prefix, ns in self.store.namespaces():
110 self.addNamespace(prefix, ns)
111
112 def buildPredicateHash(self, subject):
113 """
114 Build a hash key by predicate to a list of objects for the given
115 subject
116 """
117 properties = {}
118 for s, p, o in self.store.triples((subject, None, None)):
119 oList = properties.get(p, [])
120 oList.append(o)
121 properties[p] = oList
122 return properties
123
124 def sortProperties(self, properties):
125 """Take a hash from predicate uris to lists of values.
126 Sort the lists of values. Return a sorted list of properties."""
127 # Sort object lists
128 for prop, objects in list(properties.items()):
129 objects.sort(key=cmp_to_key(_object_comparator))
130
131 # Make sorted list of properties
132 propList = []
133 seen = {}
134 for prop in self.predicateOrder:
135 if (prop in properties) and (prop not in seen):
136 propList.append(prop)
137 seen[prop] = True
138 props = list(properties.keys())
139 props.sort()
140 for prop in props:
141 if prop not in seen:
142 propList.append(prop)
143 seen[prop] = True
144 return propList
145
146 def subjectDone(self, subject):
147 """Mark a subject as done."""
148 self._serialized[subject] = True
149
150 def indent(self, modifier=0):
151 """Returns indent string multiplied by the depth"""
152 return (self.depth + modifier) * self.indentString
153
154 def write(self, text):
155 """Write text in given encoding."""
156 self.stream.write(text.encode(self.encoding, 'replace'))
157
158
159 SUBJECT = 0
160 VERB = 1
161 OBJECT = 2
162
163 _GEN_QNAME_FOR_DT = False
164 _SPACIOUS_OUTPUT = False
165
166
167 class TurtleSerializer(RecursiveSerializer):
168
169 short_name = "turtle"
170 indentString = ' '
171
172 def __init__(self, store):
173 self._ns_rewrite = {}
174 super(TurtleSerializer, self).__init__(store)
175 self.keywords = {
176 RDF.type: 'a'
177 }
178 self.reset()
179 self.stream = None
180 self._spacious = _SPACIOUS_OUTPUT
181
182 def addNamespace(self, prefix, namespace):
183 # Turtle does not support prefix that start with _
184 # if they occur in the graph, rewrite to p_blah
185 # this is more complicated since we need to make sure p_blah
186 # does not already exist. And we register namespaces as we go, i.e.
187 # we may first see a triple with prefix _9 - rewrite it to p_9
188 # and then later find a triple with a "real" p_9 prefix
189
190 # so we need to keep track of ns rewrites we made so far.
191
192 if (prefix > '' and prefix[0] == '_') \
193 or self.namespaces.get(prefix, namespace) != namespace:
194
195 if prefix not in self._ns_rewrite:
196 p = "p" + prefix
197 while p in self.namespaces:
198 p = "p" + p
199 self._ns_rewrite[prefix] = p
200
201 prefix = self._ns_rewrite.get(prefix, prefix)
202
203 super(TurtleSerializer, self).addNamespace(prefix, namespace)
204 return prefix
205
206 def reset(self):
207 super(TurtleSerializer, self).reset()
208 self._shortNames = {}
209 self._started = False
210 self._ns_rewrite = {}
211
212 def serialize(self, stream, base=None, encoding=None,
213 spacious=None, **args):
214 self.reset()
215 self.stream = stream
216 self.base = base
217
218 if spacious is not None:
219 self._spacious = spacious
220
221 self.preprocess()
222 subjects_list = self.orderSubjects()
223
224 self.startDocument()
225
226 firstTime = True
227 for subject in subjects_list:
228 if self.isDone(subject):
229 continue
230 if firstTime:
231 firstTime = False
232 if self.statement(subject) and not firstTime:
233 self.write('\n')
234
235 self.endDocument()
236 stream.write("\n".encode('ascii'))
237
238 def preprocessTriple(self, triple):
239 super(TurtleSerializer, self).preprocessTriple(triple)
240 for i, node in enumerate(triple):
241 if node in self.keywords:
242 continue
243 # Don't use generated prefixes for subjects and objects
244 self.getQName(node, gen_prefix=(i == VERB))
245 if isinstance(node, Literal) and node.datatype:
246 self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT)
247 p = triple[1]
248 if isinstance(p, BNode): # hmm - when is P ever a bnode?
249 self._references[p]+=1
250
251 def getQName(self, uri, gen_prefix=True):
252 if not isinstance(uri, URIRef):
253 return None
254
255 parts = None
256
257 try:
258 parts = self.store.compute_qname(uri, generate=gen_prefix)
259 except:
260
261 # is the uri a namespace in itself?
262 pfx = self.store.store.prefix(uri)
263
264 if pfx is not None:
265 parts = (pfx, uri, '')
266 else:
267 # nothing worked
268 return None
269
270 prefix, namespace, local = parts
271
272 # QName cannot end with .
273 if local.endswith("."): return None
274
275 prefix = self.addNamespace(prefix, namespace)
276
277 return '%s:%s' % (prefix, local)
278
279 def startDocument(self):
280 self._started = True
281 ns_list = sorted(self.namespaces.items())
282 for prefix, uri in ns_list:
283 self.write(self.indent() + '@prefix %s: <%s> .\n' % (prefix, uri))
284 if ns_list and self._spacious:
285 self.write('\n')
286
287 def endDocument(self):
288 if self._spacious:
289 self.write('\n')
290
291 def statement(self, subject):
292 self.subjectDone(subject)
293 return self.s_squared(subject) or self.s_default(subject)
294
295 def s_default(self, subject):
296 self.write('\n' + self.indent())
297 self.path(subject, SUBJECT)
298 self.predicateList(subject)
299 self.write(' .')
300 return True
301
302 def s_squared(self, subject):
303 if (self._references[subject] > 0) or not isinstance(subject, BNode):
304 return False
305 self.write('\n' + self.indent() + '[]')
306 self.predicateList(subject)
307 self.write(' .')
308 return True
309
310 def path(self, node, position, newline=False):
311 if not (self.p_squared(node, position, newline)
312 or self.p_default(node, position, newline)):
313 raise Error("Cannot serialize node '%s'" % (node, ))
314
315 def p_default(self, node, position, newline=False):
316 if position != SUBJECT and not newline:
317 self.write(' ')
318 self.write(self.label(node, position))
319 return True
320
321 def label(self, node, position):
322 if node == RDF.nil:
323 return '()'
324 if position is VERB and node in self.keywords:
325 return self.keywords[node]
326 if isinstance(node, Literal):
327 return node._literal_n3(
328 use_plain=True,
329 qname_callback=lambda dt: self.getQName(
330 dt, _GEN_QNAME_FOR_DT))
331 else:
332 node = self.relativize(node)
333
334 return self.getQName(node, position == VERB) or node.n3()
335
336 def p_squared(self, node, position, newline=False):
337 if (not isinstance(node, BNode)
338 or node in self._serialized
339 or self._references[node] > 1
340 or position == SUBJECT):
341 return False
342
343 if not newline:
344 self.write(' ')
345
346 if self.isValidList(node):
347 # this is a list
348 self.write('(')
349 self.depth += 1 # 2
350 self.doList(node)
351 self.depth -= 1 # 2
352 self.write(' )')
353 else:
354 self.subjectDone(node)
355 self.depth += 2
356 # self.write('[\n' + self.indent())
357 self.write('[')
358 self.depth -= 1
359 # self.predicateList(node, newline=True)
360 self.predicateList(node, newline=False)
361 # self.write('\n' + self.indent() + ']')
362 self.write(' ]')
363 self.depth -= 1
364
365 return True
366
367 def isValidList(self, l):
368 """
369 Checks if l is a valid RDF list, i.e. no nodes have other properties.
370 """
371 try:
372 if self.store.value(l, RDF.first) is None:
373 return False
374 except:
375 return False
376 while l:
377 if l != RDF.nil and len(
378 list(self.store.predicate_objects(l))) != 2:
379 return False
380 l = self.store.value(l, RDF.rest)
381 return True
382
383 def doList(self, l):
384 while l:
385 item = self.store.value(l, RDF.first)
386 if item is not None:
387 self.path(item, OBJECT)
388 self.subjectDone(l)
389 l = self.store.value(l, RDF.rest)
390
391 def predicateList(self, subject, newline=False):
392 properties = self.buildPredicateHash(subject)
393 propList = self.sortProperties(properties)
394 if len(propList) == 0:
395 return
396 self.verb(propList[0], newline=newline)
397 self.objectList(properties[propList[0]])
398 for predicate in propList[1:]:
399 self.write(' ;\n' + self.indent(1))
400 self.verb(predicate, newline=True)
401 self.objectList(properties[predicate])
402
403 def verb(self, node, newline=False):
404 self.path(node, VERB, newline)
405
406 def objectList(self, objects):
407 count = len(objects)
408 if count == 0:
409 return
410 depthmod = (count == 1) and 0 or 1
411 self.depth += depthmod
412 self.path(objects[0], OBJECT)
413 for obj in objects[1:]:
414 self.write(',\n' + self.indent(1))
415 self.path(obj, OBJECT, newline=True)
416 self.depth -= depthmod