Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/rdflib_jsonld/serializer.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """ | |
3 This serialiser will output an RDF Graph as a JSON-LD formatted document. See: | |
4 | |
5 http://json-ld.org/ | |
6 | |
7 Example usage:: | |
8 | |
9 >>> from rdflib.plugin import register, Serializer | |
10 >>> register('json-ld', Serializer, 'rdflib_jsonld.serializer', 'JsonLDSerializer') | |
11 | |
12 >>> from rdflib import Graph | |
13 | |
14 >>> testrdf = ''' | |
15 ... @prefix dc: <http://purl.org/dc/terms/> . | |
16 ... <http://example.org/about> | |
17 ... dc:title "Someone's Homepage"@en . | |
18 ... ''' | |
19 | |
20 >>> g = Graph().parse(data=testrdf, format='n3') | |
21 | |
22 >>> print((g.serialize(format='json-ld', indent=4).decode())) | |
23 [ | |
24 { | |
25 "@id": "http://example.org/about", | |
26 "http://purl.org/dc/terms/title": [ | |
27 { | |
28 "@language": "en", | |
29 "@value": "Someone's Homepage" | |
30 } | |
31 ] | |
32 } | |
33 ] | |
34 | |
35 """ | |
36 | |
37 # NOTE: This code writes the entire JSON object into memory before serialising, | |
38 # but we should consider streaming the output to deal with arbitrarily large | |
39 # graphs. | |
40 | |
41 import warnings | |
42 | |
43 from rdflib.serializer import Serializer | |
44 from rdflib.graph import Graph | |
45 from rdflib.term import URIRef, Literal, BNode | |
46 from rdflib.namespace import RDF, XSD | |
47 | |
48 from ._compat import str | |
49 from .context import Context, UNDEF | |
50 from .util import json | |
51 from .keys import CONTEXT, GRAPH, ID, VOCAB, LIST, SET, LANG | |
52 | |
53 __all__ = ['JsonLDSerializer', 'from_rdf'] | |
54 | |
55 | |
56 PLAIN_LITERAL_TYPES = set([XSD.boolean, XSD.integer, XSD.double, XSD.string]) | |
57 | |
58 | |
59 class JsonLDSerializer(Serializer): | |
60 def __init__(self, store): | |
61 super(JsonLDSerializer, self).__init__(store) | |
62 | |
63 def serialize(self, stream, base=None, encoding=None, **kwargs): | |
64 # TODO: docstring w. args and return value | |
65 encoding = encoding or 'utf-8' | |
66 if encoding not in ('utf-8', 'utf-16'): | |
67 warnings.warn("JSON should be encoded as unicode. " + | |
68 "Given encoding was: %s" % encoding) | |
69 | |
70 context_data = kwargs.get('context') | |
71 use_native_types = kwargs.get('use_native_types', False), | |
72 use_rdf_type = kwargs.get('use_rdf_type', False) | |
73 auto_compact = kwargs.get('auto_compact', False) | |
74 | |
75 indent = kwargs.get('indent', 2) | |
76 separators = kwargs.get('separators', (',', ': ')) | |
77 sort_keys = kwargs.get('sort_keys', True) | |
78 ensure_ascii = kwargs.get('ensure_ascii', False) | |
79 | |
80 obj = from_rdf(self.store, context_data, base, | |
81 use_native_types, use_rdf_type, | |
82 auto_compact=auto_compact) | |
83 | |
84 data = json.dumps(obj, indent=indent, separators=separators, | |
85 sort_keys=sort_keys, ensure_ascii=ensure_ascii) | |
86 | |
87 stream.write(data.encode(encoding, 'replace')) | |
88 | |
89 | |
90 def from_rdf(graph, context_data=None, base=None, | |
91 use_native_types=False, use_rdf_type=False, | |
92 auto_compact=False, startnode=None, index=False): | |
93 # TODO: docstring w. args and return value | |
94 # TODO: support for index and startnode | |
95 | |
96 if not context_data and auto_compact: | |
97 context_data = dict( | |
98 (pfx, str(ns)) | |
99 for (pfx, ns) in graph.namespaces() if pfx and | |
100 str(ns) != "http://www.w3.org/XML/1998/namespace") | |
101 | |
102 if isinstance(context_data, Context): | |
103 context = context_data | |
104 context_data = context.to_dict() | |
105 else: | |
106 context = Context(context_data, base=base) | |
107 | |
108 converter = Converter(context, use_native_types, use_rdf_type) | |
109 result = converter.convert(graph) | |
110 | |
111 if converter.context.active: | |
112 if isinstance(result, list): | |
113 result = {context.get_key(GRAPH): result} | |
114 result[CONTEXT] = context_data | |
115 | |
116 return result | |
117 | |
118 | |
119 class Converter(object): | |
120 | |
121 def __init__(self, context, use_native_types, use_rdf_type): | |
122 self.context = context | |
123 self.use_native_types = context.active or use_native_types | |
124 self.use_rdf_type = use_rdf_type | |
125 | |
126 def convert(self, graph): | |
127 # TODO: bug in rdflib dataset parsing (nquads et al): | |
128 # plain triples end up in separate unnamed graphs (rdflib issue #436) | |
129 if graph.context_aware: | |
130 default_graph = Graph() | |
131 graphs = [default_graph] | |
132 for g in graph.contexts(): | |
133 if isinstance(g.identifier, URIRef): | |
134 graphs.append(g) | |
135 else: | |
136 default_graph += g | |
137 else: | |
138 graphs = [graph] | |
139 | |
140 context = self.context | |
141 | |
142 objs = [] | |
143 for g in graphs: | |
144 obj = {} | |
145 graphname = None | |
146 | |
147 if isinstance(g.identifier, URIRef): | |
148 graphname = context.shrink_iri(g.identifier) | |
149 obj[context.id_key] = graphname | |
150 | |
151 nodes = self.from_graph(g) | |
152 | |
153 if not graphname and len(nodes) == 1: | |
154 obj.update(nodes[0]) | |
155 else: | |
156 if not nodes: | |
157 continue | |
158 obj[context.graph_key] = nodes | |
159 | |
160 if objs and objs[0].get(context.get_key(ID)) == graphname: | |
161 objs[0].update(obj) | |
162 else: | |
163 objs.append(obj) | |
164 | |
165 if len(graphs) == 1 and len(objs) == 1 and not self.context.active: | |
166 default = objs[0] | |
167 items = default.get(context.graph_key) | |
168 if len(default) == 1 and items: | |
169 objs = items | |
170 elif len(objs) == 1 and self.context.active: | |
171 objs = objs[0] | |
172 | |
173 return objs | |
174 | |
175 def from_graph(self, graph): | |
176 nodemap = {} | |
177 | |
178 for s in set(graph.subjects()): | |
179 ## only iri:s and unreferenced (rest will be promoted to top if needed) | |
180 if isinstance(s, URIRef) or (isinstance(s, BNode) | |
181 and not any(graph.subjects(None, s))): | |
182 self.process_subject(graph, s, nodemap) | |
183 | |
184 return list(nodemap.values()) | |
185 | |
186 def process_subject(self, graph, s, nodemap): | |
187 if isinstance(s, URIRef): | |
188 node_id = self.context.shrink_iri(s) | |
189 elif isinstance(s, BNode): | |
190 node_id = s.n3() | |
191 else: | |
192 node_id = None | |
193 | |
194 #used_as_object = any(graph.subjects(None, s)) | |
195 if node_id in nodemap: | |
196 return None | |
197 | |
198 node = {} | |
199 node[self.context.id_key] = node_id | |
200 nodemap[node_id] = node | |
201 | |
202 for p, o in graph.predicate_objects(s): | |
203 self.add_to_node(graph, s, p, o, node, nodemap) | |
204 | |
205 return node | |
206 | |
207 def add_to_node(self, graph, s, p, o, s_node, nodemap): | |
208 context = self.context | |
209 | |
210 if isinstance(o, Literal): | |
211 datatype = str(o.datatype) if o.datatype else None | |
212 language = o.language | |
213 term = context.find_term(str(p), datatype, language=language) | |
214 else: | |
215 containers = [LIST, None] if graph.value(o, RDF.first) else [None] | |
216 for container in containers: | |
217 for coercion in (ID, VOCAB, UNDEF): | |
218 term = context.find_term(str(p), coercion, container) | |
219 if term: | |
220 break | |
221 if term: | |
222 break | |
223 | |
224 node = None | |
225 use_set = not context.active | |
226 | |
227 if term: | |
228 p_key = term.name | |
229 | |
230 if term.type: | |
231 node = self.type_coerce(o, term.type) | |
232 elif term.language and o.language == term.language: | |
233 node = str(o) | |
234 elif context.language and ( | |
235 term.language is None and o.language is None): | |
236 node = str(o) | |
237 | |
238 if term.container == SET: | |
239 use_set = True | |
240 elif term.container == LIST: | |
241 node = [self.type_coerce(v, term.type) or self.to_raw_value(graph, s, v, nodemap) | |
242 for v in self.to_collection(graph, o)] | |
243 elif term.container == LANG and language: | |
244 value = s_node.setdefault(p_key, {}) | |
245 values = value.get(language) | |
246 node = str(o) | |
247 if values: | |
248 if not isinstance(values, list): | |
249 value[language] = values = [values] | |
250 values.append(node) | |
251 else: | |
252 value[language] = node | |
253 return | |
254 | |
255 else: | |
256 p_key = context.to_symbol(p) | |
257 # TODO: for coercing curies - quite clumsy; unify to_symbol and find_term? | |
258 key_term = context.terms.get(p_key) | |
259 if key_term and (key_term.type or key_term.container): | |
260 p_key = p | |
261 if not term and p == RDF.type and not self.use_rdf_type: | |
262 if isinstance(o, URIRef): | |
263 node = context.to_symbol(o) | |
264 p_key = context.type_key | |
265 | |
266 if node is None: | |
267 node = self.to_raw_value(graph, s, o, nodemap) | |
268 | |
269 value = s_node.get(p_key) | |
270 if value: | |
271 if not isinstance(value, list): | |
272 value = [value] | |
273 value.append(node) | |
274 elif use_set: | |
275 value = [node] | |
276 else: | |
277 value = node | |
278 s_node[p_key] = value | |
279 | |
280 def type_coerce(self, o, coerce_type): | |
281 if coerce_type == ID: | |
282 if isinstance(o, URIRef): | |
283 return self.context.shrink_iri(o) | |
284 elif isinstance(o, BNode): | |
285 return o.n3() | |
286 else: | |
287 return o | |
288 elif coerce_type == VOCAB and isinstance(o, URIRef): | |
289 return self.context.to_symbol(o) | |
290 elif isinstance(o, Literal) and str(o.datatype) == coerce_type: | |
291 return o | |
292 else: | |
293 return None | |
294 | |
295 def to_raw_value(self, graph, s, o, nodemap): | |
296 context = self.context | |
297 coll = self.to_collection(graph, o) | |
298 if coll is not None: | |
299 coll = [self.to_raw_value(graph, s, lo, nodemap) | |
300 for lo in self.to_collection(graph, o)] | |
301 return {context.list_key: coll} | |
302 elif isinstance(o, BNode): | |
303 embed = False # TODO: self.context.active or using startnode and only one ref | |
304 onode = self.process_subject(graph, o, nodemap) | |
305 if onode: | |
306 if embed and not any(s2 for s2 in graph.subjects(None, o) if s2 != s): | |
307 return onode | |
308 else: | |
309 nodemap[onode[context.id_key]] = onode | |
310 return {context.id_key: o.n3()} | |
311 elif isinstance(o, URIRef): | |
312 # TODO: embed if o != startnode (else reverse) | |
313 return {context.id_key: context.shrink_iri(o)} | |
314 elif isinstance(o, Literal): | |
315 # TODO: if compact | |
316 native = self.use_native_types and o.datatype in PLAIN_LITERAL_TYPES | |
317 if native: | |
318 v = o.toPython() | |
319 else: | |
320 v = str(o) | |
321 if o.datatype: | |
322 if native: | |
323 if self.context.active: | |
324 return v | |
325 else: | |
326 return {context.value_key: v} | |
327 return {context.type_key: context.to_symbol(o.datatype), | |
328 context.value_key: v} | |
329 elif o.language and o.language != context.language: | |
330 return {context.lang_key: o.language, | |
331 context.value_key: v} | |
332 elif not context.active or context.language and not o.language: | |
333 return {context.value_key: v} | |
334 else: | |
335 return v | |
336 | |
337 def to_collection(self, graph, l): | |
338 if l != RDF.nil and not graph.value(l, RDF.first): | |
339 return None | |
340 list_nodes = [] | |
341 chain = set([l]) | |
342 while l: | |
343 if l == RDF.nil: | |
344 return list_nodes | |
345 if isinstance(l, URIRef): | |
346 return None | |
347 first, rest = None, None | |
348 for p, o in graph.predicate_objects(l): | |
349 if not first and p == RDF.first: | |
350 first = o | |
351 elif not rest and p == RDF.rest: | |
352 rest = o | |
353 elif p != RDF.type or o != RDF.List: | |
354 return None | |
355 list_nodes.append(first) | |
356 l = rest | |
357 if l in chain: | |
358 return None | |
359 chain.add(l) | |
360 |