Mercurial > repos > shellac > guppy_basecaller
view env/lib/python3.7/site-packages/rdflib_jsonld/serializer.py @ 3:758bc20232e8 draft
"planemo upload commit 2a0fe2cc28b09e101d37293e53e82f61762262ec"
author | shellac |
---|---|
date | Thu, 14 May 2020 16:20:52 -0400 |
parents | 26e78fe6e8c4 |
children |
line wrap: on
line source
# -*- coding: utf-8 -*- """ This serialiser will output an RDF Graph as a JSON-LD formatted document. See: http://json-ld.org/ Example usage:: >>> from rdflib.plugin import register, Serializer >>> register('json-ld', Serializer, 'rdflib_jsonld.serializer', 'JsonLDSerializer') >>> from rdflib import Graph >>> testrdf = ''' ... @prefix dc: <http://purl.org/dc/terms/> . ... <http://example.org/about> ... dc:title "Someone's Homepage"@en . ... ''' >>> g = Graph().parse(data=testrdf, format='n3') >>> print((g.serialize(format='json-ld', indent=4).decode())) [ { "@id": "http://example.org/about", "http://purl.org/dc/terms/title": [ { "@language": "en", "@value": "Someone's Homepage" } ] } ] """ # NOTE: This code writes the entire JSON object into memory before serialising, # but we should consider streaming the output to deal with arbitrarily large # graphs. import warnings from rdflib.serializer import Serializer from rdflib.graph import Graph from rdflib.term import URIRef, Literal, BNode from rdflib.namespace import RDF, XSD from .context import Context, UNDEF from .util import json from .keys import CONTEXT, GRAPH, ID, VOCAB, LIST, SET, LANG __all__ = ['JsonLDSerializer', 'from_rdf'] PLAIN_LITERAL_TYPES = set([XSD.boolean, XSD.integer, XSD.double, XSD.string]) class JsonLDSerializer(Serializer): def __init__(self, store): super(JsonLDSerializer, self).__init__(store) def serialize(self, stream, base=None, encoding=None, **kwargs): # TODO: docstring w. args and return value encoding = encoding or 'utf-8' if encoding not in ('utf-8', 'utf-16'): warnings.warn("JSON should be encoded as unicode. " + "Given encoding was: %s" % encoding) context_data = kwargs.get('context') use_native_types = kwargs.get('use_native_types', False), use_rdf_type = kwargs.get('use_rdf_type', False) auto_compact = kwargs.get('auto_compact', False) indent = kwargs.get('indent', 2) separators = kwargs.get('separators', (',', ': ')) sort_keys = kwargs.get('sort_keys', True) ensure_ascii = kwargs.get('ensure_ascii', False) obj = from_rdf(self.store, context_data, base, use_native_types, use_rdf_type, auto_compact=auto_compact) data = json.dumps(obj, indent=indent, separators=separators, sort_keys=sort_keys, ensure_ascii=ensure_ascii) stream.write(data.encode(encoding, 'replace')) def from_rdf(graph, context_data=None, base=None, use_native_types=False, use_rdf_type=False, auto_compact=False, startnode=None, index=False): # TODO: docstring w. args and return value # TODO: support for index and startnode if not context_data and auto_compact: context_data = dict( (pfx, str(ns)) for (pfx, ns) in graph.namespaces() if pfx and str(ns) != "http://www.w3.org/XML/1998/namespace") if isinstance(context_data, Context): context = context_data context_data = context.to_dict() else: context = Context(context_data, base=base) converter = Converter(context, use_native_types, use_rdf_type) result = converter.convert(graph) if converter.context.active: if isinstance(result, list): result = {context.get_key(GRAPH): result} result[CONTEXT] = context_data return result class Converter(object): def __init__(self, context, use_native_types, use_rdf_type): self.context = context self.use_native_types = context.active or use_native_types self.use_rdf_type = use_rdf_type def convert(self, graph): # TODO: bug in rdflib dataset parsing (nquads et al): # plain triples end up in separate unnamed graphs (rdflib issue #436) if graph.context_aware: default_graph = Graph() graphs = [default_graph] for g in graph.contexts(): if isinstance(g.identifier, URIRef): graphs.append(g) else: default_graph += g else: graphs = [graph] context = self.context objs = [] for g in graphs: obj = {} graphname = None if isinstance(g.identifier, URIRef): graphname = context.shrink_iri(g.identifier) obj[context.id_key] = graphname nodes = self.from_graph(g) if not graphname and len(nodes) == 1: obj.update(nodes[0]) else: if not nodes: continue obj[context.graph_key] = nodes if objs and objs[0].get(context.get_key(ID)) == graphname: objs[0].update(obj) else: objs.append(obj) if len(graphs) == 1 and len(objs) == 1 and not self.context.active: default = objs[0] items = default.get(context.graph_key) if len(default) == 1 and items: objs = items elif len(objs) == 1 and self.context.active: objs = objs[0] return objs def from_graph(self, graph): nodemap = {} for s in set(graph.subjects()): ## only iri:s and unreferenced (rest will be promoted to top if needed) if isinstance(s, URIRef) or (isinstance(s, BNode) and not any(graph.subjects(None, s))): self.process_subject(graph, s, nodemap) return list(nodemap.values()) def process_subject(self, graph, s, nodemap): if isinstance(s, URIRef): node_id = self.context.shrink_iri(s) elif isinstance(s, BNode): node_id = s.n3() else: node_id = None #used_as_object = any(graph.subjects(None, s)) if node_id in nodemap: return None node = {} node[self.context.id_key] = node_id nodemap[node_id] = node for p, o in graph.predicate_objects(s): self.add_to_node(graph, s, p, o, node, nodemap) return node def add_to_node(self, graph, s, p, o, s_node, nodemap): context = self.context if isinstance(o, Literal): datatype = str(o.datatype) if o.datatype else None language = o.language term = context.find_term(str(p), datatype, language=language) else: containers = [LIST, None] if graph.value(o, RDF.first) else [None] for container in containers: for coercion in (ID, VOCAB, UNDEF): term = context.find_term(str(p), coercion, container) if term: break if term: break node = None use_set = not context.active if term: p_key = term.name if term.type: node = self.type_coerce(o, term.type) elif term.language and o.language == term.language: node = str(o) elif context.language and ( term.language is None and o.language is None): node = str(o) if term.container == SET: use_set = True elif term.container == LIST: node = [self.type_coerce(v, term.type) or self.to_raw_value(graph, s, v, nodemap) for v in self.to_collection(graph, o)] elif term.container == LANG and language: value = s_node.setdefault(p_key, {}) values = value.get(language) node = str(o) if values: if not isinstance(values, list): value[language] = values = [values] values.append(node) else: value[language] = node return else: p_key = context.to_symbol(p) # TODO: for coercing curies - quite clumsy; unify to_symbol and find_term? key_term = context.terms.get(p_key) if key_term and (key_term.type or key_term.container): p_key = p if not term and p == RDF.type and not self.use_rdf_type: if isinstance(o, URIRef): node = context.to_symbol(o) p_key = context.type_key if node is None: node = self.to_raw_value(graph, s, o, nodemap) value = s_node.get(p_key) if value: if not isinstance(value, list): value = [value] value.append(node) elif use_set: value = [node] else: value = node s_node[p_key] = value def type_coerce(self, o, coerce_type): if coerce_type == ID: if isinstance(o, URIRef): return self.context.shrink_iri(o) elif isinstance(o, BNode): return o.n3() else: return o elif coerce_type == VOCAB and isinstance(o, URIRef): return self.context.to_symbol(o) elif isinstance(o, Literal) and str(o.datatype) == coerce_type: return o else: return None def to_raw_value(self, graph, s, o, nodemap): context = self.context coll = self.to_collection(graph, o) if coll is not None: coll = [self.to_raw_value(graph, s, lo, nodemap) for lo in self.to_collection(graph, o)] return {context.list_key: coll} elif isinstance(o, BNode): embed = False # TODO: self.context.active or using startnode and only one ref onode = self.process_subject(graph, o, nodemap) if onode: if embed and not any(s2 for s2 in graph.subjects(None, o) if s2 != s): return onode else: nodemap[onode[context.id_key]] = onode return {context.id_key: o.n3()} elif isinstance(o, URIRef): # TODO: embed if o != startnode (else reverse) return {context.id_key: context.shrink_iri(o)} elif isinstance(o, Literal): # TODO: if compact native = self.use_native_types and o.datatype in PLAIN_LITERAL_TYPES if native: v = o.toPython() else: v = str(o) if o.datatype: if native: if self.context.active: return v else: return {context.value_key: v} return {context.type_key: context.to_symbol(o.datatype), context.value_key: v} elif o.language and o.language != context.language: return {context.lang_key: o.language, context.value_key: v} elif not context.active or context.language and not o.language: return {context.value_key: v} else: return v def to_collection(self, graph, l): if l != RDF.nil and not graph.value(l, RDF.first): return None list_nodes = [] chain = set([l]) while l: if l == RDF.nil: return list_nodes if isinstance(l, URIRef): return None first, rest = None, None for p, o in graph.predicate_objects(l): if not first and p == RDF.first: first = o elif not rest and p == RDF.rest: rest = o elif p != RDF.type or o != RDF.List: return None list_nodes.append(first) l = rest if l in chain: return None chain.add(l)