Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/rdflib_jsonld/serializer.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac |
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4f3585e2f14b |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 """ | |
| 3 This serialiser will output an RDF Graph as a JSON-LD formatted document. See: | |
| 4 | |
| 5 http://json-ld.org/ | |
| 6 | |
| 7 Example usage:: | |
| 8 | |
| 9 >>> from rdflib.plugin import register, Serializer | |
| 10 >>> register('json-ld', Serializer, 'rdflib_jsonld.serializer', 'JsonLDSerializer') | |
| 11 | |
| 12 >>> from rdflib import Graph | |
| 13 | |
| 14 >>> testrdf = ''' | |
| 15 ... @prefix dc: <http://purl.org/dc/terms/> . | |
| 16 ... <http://example.org/about> | |
| 17 ... dc:title "Someone's Homepage"@en . | |
| 18 ... ''' | |
| 19 | |
| 20 >>> g = Graph().parse(data=testrdf, format='n3') | |
| 21 | |
| 22 >>> print((g.serialize(format='json-ld', indent=4).decode())) | |
| 23 [ | |
| 24 { | |
| 25 "@id": "http://example.org/about", | |
| 26 "http://purl.org/dc/terms/title": [ | |
| 27 { | |
| 28 "@language": "en", | |
| 29 "@value": "Someone's Homepage" | |
| 30 } | |
| 31 ] | |
| 32 } | |
| 33 ] | |
| 34 | |
| 35 """ | |
| 36 | |
| 37 # NOTE: This code writes the entire JSON object into memory before serialising, | |
| 38 # but we should consider streaming the output to deal with arbitrarily large | |
| 39 # graphs. | |
| 40 | |
| 41 import warnings | |
| 42 | |
| 43 from rdflib.serializer import Serializer | |
| 44 from rdflib.graph import Graph | |
| 45 from rdflib.term import URIRef, Literal, BNode | |
| 46 from rdflib.namespace import RDF, XSD | |
| 47 | |
| 48 from ._compat import str | |
| 49 from .context import Context, UNDEF | |
| 50 from .util import json | |
| 51 from .keys import CONTEXT, GRAPH, ID, VOCAB, LIST, SET, LANG | |
| 52 | |
| 53 __all__ = ['JsonLDSerializer', 'from_rdf'] | |
| 54 | |
| 55 | |
| 56 PLAIN_LITERAL_TYPES = set([XSD.boolean, XSD.integer, XSD.double, XSD.string]) | |
| 57 | |
| 58 | |
| 59 class JsonLDSerializer(Serializer): | |
| 60 def __init__(self, store): | |
| 61 super(JsonLDSerializer, self).__init__(store) | |
| 62 | |
| 63 def serialize(self, stream, base=None, encoding=None, **kwargs): | |
| 64 # TODO: docstring w. args and return value | |
| 65 encoding = encoding or 'utf-8' | |
| 66 if encoding not in ('utf-8', 'utf-16'): | |
| 67 warnings.warn("JSON should be encoded as unicode. " + | |
| 68 "Given encoding was: %s" % encoding) | |
| 69 | |
| 70 context_data = kwargs.get('context') | |
| 71 use_native_types = kwargs.get('use_native_types', False), | |
| 72 use_rdf_type = kwargs.get('use_rdf_type', False) | |
| 73 auto_compact = kwargs.get('auto_compact', False) | |
| 74 | |
| 75 indent = kwargs.get('indent', 2) | |
| 76 separators = kwargs.get('separators', (',', ': ')) | |
| 77 sort_keys = kwargs.get('sort_keys', True) | |
| 78 ensure_ascii = kwargs.get('ensure_ascii', False) | |
| 79 | |
| 80 obj = from_rdf(self.store, context_data, base, | |
| 81 use_native_types, use_rdf_type, | |
| 82 auto_compact=auto_compact) | |
| 83 | |
| 84 data = json.dumps(obj, indent=indent, separators=separators, | |
| 85 sort_keys=sort_keys, ensure_ascii=ensure_ascii) | |
| 86 | |
| 87 stream.write(data.encode(encoding, 'replace')) | |
| 88 | |
| 89 | |
| 90 def from_rdf(graph, context_data=None, base=None, | |
| 91 use_native_types=False, use_rdf_type=False, | |
| 92 auto_compact=False, startnode=None, index=False): | |
| 93 # TODO: docstring w. args and return value | |
| 94 # TODO: support for index and startnode | |
| 95 | |
| 96 if not context_data and auto_compact: | |
| 97 context_data = dict( | |
| 98 (pfx, str(ns)) | |
| 99 for (pfx, ns) in graph.namespaces() if pfx and | |
| 100 str(ns) != "http://www.w3.org/XML/1998/namespace") | |
| 101 | |
| 102 if isinstance(context_data, Context): | |
| 103 context = context_data | |
| 104 context_data = context.to_dict() | |
| 105 else: | |
| 106 context = Context(context_data, base=base) | |
| 107 | |
| 108 converter = Converter(context, use_native_types, use_rdf_type) | |
| 109 result = converter.convert(graph) | |
| 110 | |
| 111 if converter.context.active: | |
| 112 if isinstance(result, list): | |
| 113 result = {context.get_key(GRAPH): result} | |
| 114 result[CONTEXT] = context_data | |
| 115 | |
| 116 return result | |
| 117 | |
| 118 | |
| 119 class Converter(object): | |
| 120 | |
| 121 def __init__(self, context, use_native_types, use_rdf_type): | |
| 122 self.context = context | |
| 123 self.use_native_types = context.active or use_native_types | |
| 124 self.use_rdf_type = use_rdf_type | |
| 125 | |
| 126 def convert(self, graph): | |
| 127 # TODO: bug in rdflib dataset parsing (nquads et al): | |
| 128 # plain triples end up in separate unnamed graphs (rdflib issue #436) | |
| 129 if graph.context_aware: | |
| 130 default_graph = Graph() | |
| 131 graphs = [default_graph] | |
| 132 for g in graph.contexts(): | |
| 133 if isinstance(g.identifier, URIRef): | |
| 134 graphs.append(g) | |
| 135 else: | |
| 136 default_graph += g | |
| 137 else: | |
| 138 graphs = [graph] | |
| 139 | |
| 140 context = self.context | |
| 141 | |
| 142 objs = [] | |
| 143 for g in graphs: | |
| 144 obj = {} | |
| 145 graphname = None | |
| 146 | |
| 147 if isinstance(g.identifier, URIRef): | |
| 148 graphname = context.shrink_iri(g.identifier) | |
| 149 obj[context.id_key] = graphname | |
| 150 | |
| 151 nodes = self.from_graph(g) | |
| 152 | |
| 153 if not graphname and len(nodes) == 1: | |
| 154 obj.update(nodes[0]) | |
| 155 else: | |
| 156 if not nodes: | |
| 157 continue | |
| 158 obj[context.graph_key] = nodes | |
| 159 | |
| 160 if objs and objs[0].get(context.get_key(ID)) == graphname: | |
| 161 objs[0].update(obj) | |
| 162 else: | |
| 163 objs.append(obj) | |
| 164 | |
| 165 if len(graphs) == 1 and len(objs) == 1 and not self.context.active: | |
| 166 default = objs[0] | |
| 167 items = default.get(context.graph_key) | |
| 168 if len(default) == 1 and items: | |
| 169 objs = items | |
| 170 elif len(objs) == 1 and self.context.active: | |
| 171 objs = objs[0] | |
| 172 | |
| 173 return objs | |
| 174 | |
| 175 def from_graph(self, graph): | |
| 176 nodemap = {} | |
| 177 | |
| 178 for s in set(graph.subjects()): | |
| 179 ## only iri:s and unreferenced (rest will be promoted to top if needed) | |
| 180 if isinstance(s, URIRef) or (isinstance(s, BNode) | |
| 181 and not any(graph.subjects(None, s))): | |
| 182 self.process_subject(graph, s, nodemap) | |
| 183 | |
| 184 return list(nodemap.values()) | |
| 185 | |
| 186 def process_subject(self, graph, s, nodemap): | |
| 187 if isinstance(s, URIRef): | |
| 188 node_id = self.context.shrink_iri(s) | |
| 189 elif isinstance(s, BNode): | |
| 190 node_id = s.n3() | |
| 191 else: | |
| 192 node_id = None | |
| 193 | |
| 194 #used_as_object = any(graph.subjects(None, s)) | |
| 195 if node_id in nodemap: | |
| 196 return None | |
| 197 | |
| 198 node = {} | |
| 199 node[self.context.id_key] = node_id | |
| 200 nodemap[node_id] = node | |
| 201 | |
| 202 for p, o in graph.predicate_objects(s): | |
| 203 self.add_to_node(graph, s, p, o, node, nodemap) | |
| 204 | |
| 205 return node | |
| 206 | |
| 207 def add_to_node(self, graph, s, p, o, s_node, nodemap): | |
| 208 context = self.context | |
| 209 | |
| 210 if isinstance(o, Literal): | |
| 211 datatype = str(o.datatype) if o.datatype else None | |
| 212 language = o.language | |
| 213 term = context.find_term(str(p), datatype, language=language) | |
| 214 else: | |
| 215 containers = [LIST, None] if graph.value(o, RDF.first) else [None] | |
| 216 for container in containers: | |
| 217 for coercion in (ID, VOCAB, UNDEF): | |
| 218 term = context.find_term(str(p), coercion, container) | |
| 219 if term: | |
| 220 break | |
| 221 if term: | |
| 222 break | |
| 223 | |
| 224 node = None | |
| 225 use_set = not context.active | |
| 226 | |
| 227 if term: | |
| 228 p_key = term.name | |
| 229 | |
| 230 if term.type: | |
| 231 node = self.type_coerce(o, term.type) | |
| 232 elif term.language and o.language == term.language: | |
| 233 node = str(o) | |
| 234 elif context.language and ( | |
| 235 term.language is None and o.language is None): | |
| 236 node = str(o) | |
| 237 | |
| 238 if term.container == SET: | |
| 239 use_set = True | |
| 240 elif term.container == LIST: | |
| 241 node = [self.type_coerce(v, term.type) or self.to_raw_value(graph, s, v, nodemap) | |
| 242 for v in self.to_collection(graph, o)] | |
| 243 elif term.container == LANG and language: | |
| 244 value = s_node.setdefault(p_key, {}) | |
| 245 values = value.get(language) | |
| 246 node = str(o) | |
| 247 if values: | |
| 248 if not isinstance(values, list): | |
| 249 value[language] = values = [values] | |
| 250 values.append(node) | |
| 251 else: | |
| 252 value[language] = node | |
| 253 return | |
| 254 | |
| 255 else: | |
| 256 p_key = context.to_symbol(p) | |
| 257 # TODO: for coercing curies - quite clumsy; unify to_symbol and find_term? | |
| 258 key_term = context.terms.get(p_key) | |
| 259 if key_term and (key_term.type or key_term.container): | |
| 260 p_key = p | |
| 261 if not term and p == RDF.type and not self.use_rdf_type: | |
| 262 if isinstance(o, URIRef): | |
| 263 node = context.to_symbol(o) | |
| 264 p_key = context.type_key | |
| 265 | |
| 266 if node is None: | |
| 267 node = self.to_raw_value(graph, s, o, nodemap) | |
| 268 | |
| 269 value = s_node.get(p_key) | |
| 270 if value: | |
| 271 if not isinstance(value, list): | |
| 272 value = [value] | |
| 273 value.append(node) | |
| 274 elif use_set: | |
| 275 value = [node] | |
| 276 else: | |
| 277 value = node | |
| 278 s_node[p_key] = value | |
| 279 | |
| 280 def type_coerce(self, o, coerce_type): | |
| 281 if coerce_type == ID: | |
| 282 if isinstance(o, URIRef): | |
| 283 return self.context.shrink_iri(o) | |
| 284 elif isinstance(o, BNode): | |
| 285 return o.n3() | |
| 286 else: | |
| 287 return o | |
| 288 elif coerce_type == VOCAB and isinstance(o, URIRef): | |
| 289 return self.context.to_symbol(o) | |
| 290 elif isinstance(o, Literal) and str(o.datatype) == coerce_type: | |
| 291 return o | |
| 292 else: | |
| 293 return None | |
| 294 | |
| 295 def to_raw_value(self, graph, s, o, nodemap): | |
| 296 context = self.context | |
| 297 coll = self.to_collection(graph, o) | |
| 298 if coll is not None: | |
| 299 coll = [self.to_raw_value(graph, s, lo, nodemap) | |
| 300 for lo in self.to_collection(graph, o)] | |
| 301 return {context.list_key: coll} | |
| 302 elif isinstance(o, BNode): | |
| 303 embed = False # TODO: self.context.active or using startnode and only one ref | |
| 304 onode = self.process_subject(graph, o, nodemap) | |
| 305 if onode: | |
| 306 if embed and not any(s2 for s2 in graph.subjects(None, o) if s2 != s): | |
| 307 return onode | |
| 308 else: | |
| 309 nodemap[onode[context.id_key]] = onode | |
| 310 return {context.id_key: o.n3()} | |
| 311 elif isinstance(o, URIRef): | |
| 312 # TODO: embed if o != startnode (else reverse) | |
| 313 return {context.id_key: context.shrink_iri(o)} | |
| 314 elif isinstance(o, Literal): | |
| 315 # TODO: if compact | |
| 316 native = self.use_native_types and o.datatype in PLAIN_LITERAL_TYPES | |
| 317 if native: | |
| 318 v = o.toPython() | |
| 319 else: | |
| 320 v = str(o) | |
| 321 if o.datatype: | |
| 322 if native: | |
| 323 if self.context.active: | |
| 324 return v | |
| 325 else: | |
| 326 return {context.value_key: v} | |
| 327 return {context.type_key: context.to_symbol(o.datatype), | |
| 328 context.value_key: v} | |
| 329 elif o.language and o.language != context.language: | |
| 330 return {context.lang_key: o.language, | |
| 331 context.value_key: v} | |
| 332 elif not context.active or context.language and not o.language: | |
| 333 return {context.value_key: v} | |
| 334 else: | |
| 335 return v | |
| 336 | |
| 337 def to_collection(self, graph, l): | |
| 338 if l != RDF.nil and not graph.value(l, RDF.first): | |
| 339 return None | |
| 340 list_nodes = [] | |
| 341 chain = set([l]) | |
| 342 while l: | |
| 343 if l == RDF.nil: | |
| 344 return list_nodes | |
| 345 if isinstance(l, URIRef): | |
| 346 return None | |
| 347 first, rest = None, None | |
| 348 for p, o in graph.predicate_objects(l): | |
| 349 if not first and p == RDF.first: | |
| 350 first = o | |
| 351 elif not rest and p == RDF.rest: | |
| 352 rest = o | |
| 353 elif p != RDF.type or o != RDF.List: | |
| 354 return None | |
| 355 list_nodes.append(first) | |
| 356 l = rest | |
| 357 if l in chain: | |
| 358 return None | |
| 359 chain.add(l) | |
| 360 |
