Mercurial > repos > guerler > springsuite
diff planemo/lib/python3.7/site-packages/rdflib/plugins/serializers/turtle.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/planemo/lib/python3.7/site-packages/rdflib/plugins/serializers/turtle.py Fri Jul 31 00:32:28 2020 -0400 @@ -0,0 +1,416 @@ +""" +Turtle RDF graph serializer for RDFLib. +See <http://www.w3.org/TeamSubmission/turtle/> for syntax specification. +""" + +from collections import defaultdict + +from rdflib.compat import cmp_to_key +from rdflib.term import BNode, Literal, URIRef +from rdflib.exceptions import Error +from rdflib.serializer import Serializer +from rdflib.namespace import RDF, RDFS + +__all__ = ['RecursiveSerializer', 'TurtleSerializer'] + +def _object_comparator(a,b): + """ + for nice clean output we sort the objects of triples, + some of them are literals, + these are sorted according to the sort order of the underlying python objects + in py3 not all things are comparable. + This falls back on comparing string representations when not. + """ + + try: + if a>b: return 1 + if a<b: return -1 + return 0 + + except TypeError: + a = str(a) + b = str(b) + return (a > b) - (a < b) + + +class RecursiveSerializer(Serializer): + + topClasses = [RDFS.Class] + predicateOrder = [RDF.type, RDFS.label] + maxDepth = 10 + indentString = " " + + def __init__(self, store): + + super(RecursiveSerializer, self).__init__(store) + self.stream = None + self.reset() + + def addNamespace(self, prefix, uri): + if prefix in self.namespaces and self.namespaces[prefix]!=uri: + raise Exception("Trying to override namespace prefix %s => %s, but it's already bound to %s"%(prefix, uri, self.namespaces[prefix])) + self.namespaces[prefix] = uri + + def checkSubject(self, subject): + """Check to see if the subject should be serialized yet""" + if ((self.isDone(subject)) + or (subject not in self._subjects) + or ((subject in self._topLevels) and (self.depth > 1)) + or (isinstance(subject, URIRef) + and (self.depth >= self.maxDepth))): + return False + return True + + def isDone(self, subject): + """Return true if subject is serialized""" + return subject in self._serialized + + def orderSubjects(self): + seen = {} + subjects = [] + + for classURI in self.topClasses: + members = list(self.store.subjects(RDF.type, classURI)) + members.sort() + + for member in members: + subjects.append(member) + self._topLevels[member] = True + seen[member] = True + + recursable = [ + (isinstance(subject, BNode), + self._references[subject], subject) + for subject in self._subjects if subject not in seen] + + recursable.sort() + subjects.extend([subject for (isbnode, refs, subject) in recursable]) + + return subjects + + def preprocess(self): + for triple in self.store.triples((None, None, None)): + self.preprocessTriple(triple) + + def preprocessTriple(self, xxx_todo_changeme): + (s, p, o) = xxx_todo_changeme + self._references[o]+=1 + self._subjects[s] = True + + def reset(self): + self.depth = 0 + self.lists = {} + self.namespaces = {} + self._references = defaultdict(int) + self._serialized = {} + self._subjects = {} + self._topLevels = {} + + for prefix, ns in self.store.namespaces(): + self.addNamespace(prefix, ns) + + def buildPredicateHash(self, subject): + """ + Build a hash key by predicate to a list of objects for the given + subject + """ + properties = {} + for s, p, o in self.store.triples((subject, None, None)): + oList = properties.get(p, []) + oList.append(o) + properties[p] = oList + return properties + + def sortProperties(self, properties): + """Take a hash from predicate uris to lists of values. + Sort the lists of values. Return a sorted list of properties.""" + # Sort object lists + for prop, objects in list(properties.items()): + objects.sort(key=cmp_to_key(_object_comparator)) + + # Make sorted list of properties + propList = [] + seen = {} + for prop in self.predicateOrder: + if (prop in properties) and (prop not in seen): + propList.append(prop) + seen[prop] = True + props = list(properties.keys()) + props.sort() + for prop in props: + if prop not in seen: + propList.append(prop) + seen[prop] = True + return propList + + def subjectDone(self, subject): + """Mark a subject as done.""" + self._serialized[subject] = True + + def indent(self, modifier=0): + """Returns indent string multiplied by the depth""" + return (self.depth + modifier) * self.indentString + + def write(self, text): + """Write text in given encoding.""" + self.stream.write(text.encode(self.encoding, 'replace')) + + +SUBJECT = 0 +VERB = 1 +OBJECT = 2 + +_GEN_QNAME_FOR_DT = False +_SPACIOUS_OUTPUT = False + + +class TurtleSerializer(RecursiveSerializer): + + short_name = "turtle" + indentString = ' ' + + def __init__(self, store): + self._ns_rewrite = {} + super(TurtleSerializer, self).__init__(store) + self.keywords = { + RDF.type: 'a' + } + self.reset() + self.stream = None + self._spacious = _SPACIOUS_OUTPUT + + def addNamespace(self, prefix, namespace): + # Turtle does not support prefix that start with _ + # if they occur in the graph, rewrite to p_blah + # this is more complicated since we need to make sure p_blah + # does not already exist. And we register namespaces as we go, i.e. + # we may first see a triple with prefix _9 - rewrite it to p_9 + # and then later find a triple with a "real" p_9 prefix + + # so we need to keep track of ns rewrites we made so far. + + if (prefix > '' and prefix[0] == '_') \ + or self.namespaces.get(prefix, namespace) != namespace: + + if prefix not in self._ns_rewrite: + p = "p" + prefix + while p in self.namespaces: + p = "p" + p + self._ns_rewrite[prefix] = p + + prefix = self._ns_rewrite.get(prefix, prefix) + + super(TurtleSerializer, self).addNamespace(prefix, namespace) + return prefix + + def reset(self): + super(TurtleSerializer, self).reset() + self._shortNames = {} + self._started = False + self._ns_rewrite = {} + + def serialize(self, stream, base=None, encoding=None, + spacious=None, **args): + self.reset() + self.stream = stream + self.base = base + + if spacious is not None: + self._spacious = spacious + + self.preprocess() + subjects_list = self.orderSubjects() + + self.startDocument() + + firstTime = True + for subject in subjects_list: + if self.isDone(subject): + continue + if firstTime: + firstTime = False + if self.statement(subject) and not firstTime: + self.write('\n') + + self.endDocument() + stream.write("\n".encode('ascii')) + + def preprocessTriple(self, triple): + super(TurtleSerializer, self).preprocessTriple(triple) + for i, node in enumerate(triple): + if node in self.keywords: + continue + # Don't use generated prefixes for subjects and objects + self.getQName(node, gen_prefix=(i == VERB)) + if isinstance(node, Literal) and node.datatype: + self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT) + p = triple[1] + if isinstance(p, BNode): # hmm - when is P ever a bnode? + self._references[p]+=1 + + def getQName(self, uri, gen_prefix=True): + if not isinstance(uri, URIRef): + return None + + parts = None + + try: + parts = self.store.compute_qname(uri, generate=gen_prefix) + except: + + # is the uri a namespace in itself? + pfx = self.store.store.prefix(uri) + + if pfx is not None: + parts = (pfx, uri, '') + else: + # nothing worked + return None + + prefix, namespace, local = parts + + # QName cannot end with . + if local.endswith("."): return None + + prefix = self.addNamespace(prefix, namespace) + + return '%s:%s' % (prefix, local) + + def startDocument(self): + self._started = True + ns_list = sorted(self.namespaces.items()) + for prefix, uri in ns_list: + self.write(self.indent() + '@prefix %s: <%s> .\n' % (prefix, uri)) + if ns_list and self._spacious: + self.write('\n') + + def endDocument(self): + if self._spacious: + self.write('\n') + + def statement(self, subject): + self.subjectDone(subject) + return self.s_squared(subject) or self.s_default(subject) + + def s_default(self, subject): + self.write('\n' + self.indent()) + self.path(subject, SUBJECT) + self.predicateList(subject) + self.write(' .') + return True + + def s_squared(self, subject): + if (self._references[subject] > 0) or not isinstance(subject, BNode): + return False + self.write('\n' + self.indent() + '[]') + self.predicateList(subject) + self.write(' .') + return True + + def path(self, node, position, newline=False): + if not (self.p_squared(node, position, newline) + or self.p_default(node, position, newline)): + raise Error("Cannot serialize node '%s'" % (node, )) + + def p_default(self, node, position, newline=False): + if position != SUBJECT and not newline: + self.write(' ') + self.write(self.label(node, position)) + return True + + def label(self, node, position): + if node == RDF.nil: + return '()' + if position is VERB and node in self.keywords: + return self.keywords[node] + if isinstance(node, Literal): + return node._literal_n3( + use_plain=True, + qname_callback=lambda dt: self.getQName( + dt, _GEN_QNAME_FOR_DT)) + else: + node = self.relativize(node) + + return self.getQName(node, position == VERB) or node.n3() + + def p_squared(self, node, position, newline=False): + if (not isinstance(node, BNode) + or node in self._serialized + or self._references[node] > 1 + or position == SUBJECT): + return False + + if not newline: + self.write(' ') + + if self.isValidList(node): + # this is a list + self.write('(') + self.depth += 1 # 2 + self.doList(node) + self.depth -= 1 # 2 + self.write(' )') + else: + self.subjectDone(node) + self.depth += 2 + # self.write('[\n' + self.indent()) + self.write('[') + self.depth -= 1 + # self.predicateList(node, newline=True) + self.predicateList(node, newline=False) + # self.write('\n' + self.indent() + ']') + self.write(' ]') + self.depth -= 1 + + return True + + def isValidList(self, l): + """ + Checks if l is a valid RDF list, i.e. no nodes have other properties. + """ + try: + if self.store.value(l, RDF.first) is None: + return False + except: + return False + while l: + if l != RDF.nil and len( + list(self.store.predicate_objects(l))) != 2: + return False + l = self.store.value(l, RDF.rest) + return True + + def doList(self, l): + while l: + item = self.store.value(l, RDF.first) + if item is not None: + self.path(item, OBJECT) + self.subjectDone(l) + l = self.store.value(l, RDF.rest) + + def predicateList(self, subject, newline=False): + properties = self.buildPredicateHash(subject) + propList = self.sortProperties(properties) + if len(propList) == 0: + return + self.verb(propList[0], newline=newline) + self.objectList(properties[propList[0]]) + for predicate in propList[1:]: + self.write(' ;\n' + self.indent(1)) + self.verb(predicate, newline=True) + self.objectList(properties[predicate]) + + def verb(self, node, newline=False): + self.path(node, VERB, newline) + + def objectList(self, objects): + count = len(objects) + if count == 0: + return + depthmod = (count == 1) and 0 or 1 + self.depth += depthmod + self.path(objects[0], OBJECT) + for obj in objects[1:]: + self.write(',\n' + self.indent(1)) + self.path(obj, OBJECT, newline=True) + self.depth -= depthmod