Mercurial > repos > guerler > springsuite
diff planemo/lib/python3.7/site-packages/rdflib_jsonld/serializer.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/planemo/lib/python3.7/site-packages/rdflib_jsonld/serializer.py Fri Jul 31 00:32:28 2020 -0400 @@ -0,0 +1,359 @@ +# -*- coding: utf-8 -*- +""" +This serialiser will output an RDF Graph as a JSON-LD formatted document. See: + + http://json-ld.org/ + +Example usage:: + + >>> from rdflib.plugin import register, Serializer + >>> register('json-ld', Serializer, 'rdflib_jsonld.serializer', 'JsonLDSerializer') + + >>> from rdflib import Graph + + >>> testrdf = ''' + ... @prefix dc: <http://purl.org/dc/terms/> . + ... <http://example.org/about> + ... dc:title "Someone's Homepage"@en . + ... ''' + + >>> g = Graph().parse(data=testrdf, format='n3') + + >>> print((g.serialize(format='json-ld', indent=4).decode())) + [ + { + "@id": "http://example.org/about", + "http://purl.org/dc/terms/title": [ + { + "@language": "en", + "@value": "Someone's Homepage" + } + ] + } + ] + +""" + +# NOTE: This code writes the entire JSON object into memory before serialising, +# but we should consider streaming the output to deal with arbitrarily large +# graphs. + +import warnings + +from rdflib.serializer import Serializer +from rdflib.graph import Graph +from rdflib.term import URIRef, Literal, BNode +from rdflib.namespace import RDF, XSD + +from .context import Context, UNDEF +from .util import json +from .keys import CONTEXT, GRAPH, ID, VOCAB, LIST, SET, LANG + +__all__ = ['JsonLDSerializer', 'from_rdf'] + + +PLAIN_LITERAL_TYPES = set([XSD.boolean, XSD.integer, XSD.double, XSD.string]) + + +class JsonLDSerializer(Serializer): + def __init__(self, store): + super(JsonLDSerializer, self).__init__(store) + + def serialize(self, stream, base=None, encoding=None, **kwargs): + # TODO: docstring w. args and return value + encoding = encoding or 'utf-8' + if encoding not in ('utf-8', 'utf-16'): + warnings.warn("JSON should be encoded as unicode. " + + "Given encoding was: %s" % encoding) + + context_data = kwargs.get('context') + use_native_types = kwargs.get('use_native_types', False), + use_rdf_type = kwargs.get('use_rdf_type', False) + auto_compact = kwargs.get('auto_compact', False) + + indent = kwargs.get('indent', 2) + separators = kwargs.get('separators', (',', ': ')) + sort_keys = kwargs.get('sort_keys', True) + ensure_ascii = kwargs.get('ensure_ascii', False) + + obj = from_rdf(self.store, context_data, base, + use_native_types, use_rdf_type, + auto_compact=auto_compact) + + data = json.dumps(obj, indent=indent, separators=separators, + sort_keys=sort_keys, ensure_ascii=ensure_ascii) + + stream.write(data.encode(encoding, 'replace')) + + +def from_rdf(graph, context_data=None, base=None, + use_native_types=False, use_rdf_type=False, + auto_compact=False, startnode=None, index=False): + # TODO: docstring w. args and return value + # TODO: support for index and startnode + + if not context_data and auto_compact: + context_data = dict( + (pfx, str(ns)) + for (pfx, ns) in graph.namespaces() if pfx and + str(ns) != "http://www.w3.org/XML/1998/namespace") + + if isinstance(context_data, Context): + context = context_data + context_data = context.to_dict() + else: + context = Context(context_data, base=base) + + converter = Converter(context, use_native_types, use_rdf_type) + result = converter.convert(graph) + + if converter.context.active: + if isinstance(result, list): + result = {context.get_key(GRAPH): result} + result[CONTEXT] = context_data + + return result + + +class Converter(object): + + def __init__(self, context, use_native_types, use_rdf_type): + self.context = context + self.use_native_types = context.active or use_native_types + self.use_rdf_type = use_rdf_type + + def convert(self, graph): + # TODO: bug in rdflib dataset parsing (nquads et al): + # plain triples end up in separate unnamed graphs (rdflib issue #436) + if graph.context_aware: + default_graph = Graph() + graphs = [default_graph] + for g in graph.contexts(): + if isinstance(g.identifier, URIRef): + graphs.append(g) + else: + default_graph += g + else: + graphs = [graph] + + context = self.context + + objs = [] + for g in graphs: + obj = {} + graphname = None + + if isinstance(g.identifier, URIRef): + graphname = context.shrink_iri(g.identifier) + obj[context.id_key] = graphname + + nodes = self.from_graph(g) + + if not graphname and len(nodes) == 1: + obj.update(nodes[0]) + else: + if not nodes: + continue + obj[context.graph_key] = nodes + + if objs and objs[0].get(context.get_key(ID)) == graphname: + objs[0].update(obj) + else: + objs.append(obj) + + if len(graphs) == 1 and len(objs) == 1 and not self.context.active: + default = objs[0] + items = default.get(context.graph_key) + if len(default) == 1 and items: + objs = items + elif len(objs) == 1 and self.context.active: + objs = objs[0] + + return objs + + def from_graph(self, graph): + nodemap = {} + + for s in set(graph.subjects()): + ## only iri:s and unreferenced (rest will be promoted to top if needed) + if isinstance(s, URIRef) or (isinstance(s, BNode) + and not any(graph.subjects(None, s))): + self.process_subject(graph, s, nodemap) + + return list(nodemap.values()) + + def process_subject(self, graph, s, nodemap): + if isinstance(s, URIRef): + node_id = self.context.shrink_iri(s) + elif isinstance(s, BNode): + node_id = s.n3() + else: + node_id = None + + #used_as_object = any(graph.subjects(None, s)) + if node_id in nodemap: + return None + + node = {} + node[self.context.id_key] = node_id + nodemap[node_id] = node + + for p, o in graph.predicate_objects(s): + self.add_to_node(graph, s, p, o, node, nodemap) + + return node + + def add_to_node(self, graph, s, p, o, s_node, nodemap): + context = self.context + + if isinstance(o, Literal): + datatype = str(o.datatype) if o.datatype else None + language = o.language + term = context.find_term(str(p), datatype, language=language) + else: + containers = [LIST, None] if graph.value(o, RDF.first) else [None] + for container in containers: + for coercion in (ID, VOCAB, UNDEF): + term = context.find_term(str(p), coercion, container) + if term: + break + if term: + break + + node = None + use_set = not context.active + + if term: + p_key = term.name + + if term.type: + node = self.type_coerce(o, term.type) + elif term.language and o.language == term.language: + node = str(o) + elif context.language and ( + term.language is None and o.language is None): + node = str(o) + + if term.container == SET: + use_set = True + elif term.container == LIST: + node = [self.type_coerce(v, term.type) or self.to_raw_value(graph, s, v, nodemap) + for v in self.to_collection(graph, o)] + elif term.container == LANG and language: + value = s_node.setdefault(p_key, {}) + values = value.get(language) + node = str(o) + if values: + if not isinstance(values, list): + value[language] = values = [values] + values.append(node) + else: + value[language] = node + return + + else: + p_key = context.to_symbol(p) + # TODO: for coercing curies - quite clumsy; unify to_symbol and find_term? + key_term = context.terms.get(p_key) + if key_term and (key_term.type or key_term.container): + p_key = p + if not term and p == RDF.type and not self.use_rdf_type: + if isinstance(o, URIRef): + node = context.to_symbol(o) + p_key = context.type_key + + if node is None: + node = self.to_raw_value(graph, s, o, nodemap) + + value = s_node.get(p_key) + if value: + if not isinstance(value, list): + value = [value] + value.append(node) + elif use_set: + value = [node] + else: + value = node + s_node[p_key] = value + + def type_coerce(self, o, coerce_type): + if coerce_type == ID: + if isinstance(o, URIRef): + return self.context.shrink_iri(o) + elif isinstance(o, BNode): + return o.n3() + else: + return o + elif coerce_type == VOCAB and isinstance(o, URIRef): + return self.context.to_symbol(o) + elif isinstance(o, Literal) and str(o.datatype) == coerce_type: + return o + else: + return None + + def to_raw_value(self, graph, s, o, nodemap): + context = self.context + coll = self.to_collection(graph, o) + if coll is not None: + coll = [self.to_raw_value(graph, s, lo, nodemap) + for lo in self.to_collection(graph, o)] + return {context.list_key: coll} + elif isinstance(o, BNode): + embed = False # TODO: self.context.active or using startnode and only one ref + onode = self.process_subject(graph, o, nodemap) + if onode: + if embed and not any(s2 for s2 in graph.subjects(None, o) if s2 != s): + return onode + else: + nodemap[onode[context.id_key]] = onode + return {context.id_key: o.n3()} + elif isinstance(o, URIRef): + # TODO: embed if o != startnode (else reverse) + return {context.id_key: context.shrink_iri(o)} + elif isinstance(o, Literal): + # TODO: if compact + native = self.use_native_types and o.datatype in PLAIN_LITERAL_TYPES + if native: + v = o.toPython() + else: + v = str(o) + if o.datatype: + if native: + if self.context.active: + return v + else: + return {context.value_key: v} + return {context.type_key: context.to_symbol(o.datatype), + context.value_key: v} + elif o.language and o.language != context.language: + return {context.lang_key: o.language, + context.value_key: v} + elif not context.active or context.language and not o.language: + return {context.value_key: v} + else: + return v + + def to_collection(self, graph, l): + if l != RDF.nil and not graph.value(l, RDF.first): + return None + list_nodes = [] + chain = set([l]) + while l: + if l == RDF.nil: + return list_nodes + if isinstance(l, URIRef): + return None + first, rest = None, None + for p, o in graph.predicate_objects(l): + if not first and p == RDF.first: + first = o + elif not rest and p == RDF.rest: + rest = o + elif p != RDF.type or o != RDF.List: + return None + list_nodes.append(first) + l = rest + if l in chain: + return None + chain.add(l) +