Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/rdflib_jsonld/serializer.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
comparison
equal
deleted
inserted
replaced
4:79f47841a781 | 5:9b1c78e6ba9c |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """ | |
3 This serialiser will output an RDF Graph as a JSON-LD formatted document. See: | |
4 | |
5 http://json-ld.org/ | |
6 | |
7 Example usage:: | |
8 | |
9 >>> from rdflib.plugin import register, Serializer | |
10 >>> register('json-ld', Serializer, 'rdflib_jsonld.serializer', 'JsonLDSerializer') | |
11 | |
12 >>> from rdflib import Graph | |
13 | |
14 >>> testrdf = ''' | |
15 ... @prefix dc: <http://purl.org/dc/terms/> . | |
16 ... <http://example.org/about> | |
17 ... dc:title "Someone's Homepage"@en . | |
18 ... ''' | |
19 | |
20 >>> g = Graph().parse(data=testrdf, format='n3') | |
21 | |
22 >>> print((g.serialize(format='json-ld', indent=4).decode())) | |
23 [ | |
24 { | |
25 "@id": "http://example.org/about", | |
26 "http://purl.org/dc/terms/title": [ | |
27 { | |
28 "@language": "en", | |
29 "@value": "Someone's Homepage" | |
30 } | |
31 ] | |
32 } | |
33 ] | |
34 | |
35 """ | |
36 | |
37 # NOTE: This code writes the entire JSON object into memory before serialising, | |
38 # but we should consider streaming the output to deal with arbitrarily large | |
39 # graphs. | |
40 | |
41 import warnings | |
42 | |
43 from rdflib.serializer import Serializer | |
44 from rdflib.graph import Graph | |
45 from rdflib.term import URIRef, Literal, BNode | |
46 from rdflib.namespace import RDF, XSD | |
47 | |
48 from .context import Context, UNDEF | |
49 from .util import json | |
50 from .keys import CONTEXT, GRAPH, ID, VOCAB, LIST, SET, LANG | |
51 | |
52 __all__ = ['JsonLDSerializer', 'from_rdf'] | |
53 | |
54 | |
55 PLAIN_LITERAL_TYPES = set([XSD.boolean, XSD.integer, XSD.double, XSD.string]) | |
56 | |
57 | |
58 class JsonLDSerializer(Serializer): | |
59 def __init__(self, store): | |
60 super(JsonLDSerializer, self).__init__(store) | |
61 | |
62 def serialize(self, stream, base=None, encoding=None, **kwargs): | |
63 # TODO: docstring w. args and return value | |
64 encoding = encoding or 'utf-8' | |
65 if encoding not in ('utf-8', 'utf-16'): | |
66 warnings.warn("JSON should be encoded as unicode. " + | |
67 "Given encoding was: %s" % encoding) | |
68 | |
69 context_data = kwargs.get('context') | |
70 use_native_types = kwargs.get('use_native_types', False), | |
71 use_rdf_type = kwargs.get('use_rdf_type', False) | |
72 auto_compact = kwargs.get('auto_compact', False) | |
73 | |
74 indent = kwargs.get('indent', 2) | |
75 separators = kwargs.get('separators', (',', ': ')) | |
76 sort_keys = kwargs.get('sort_keys', True) | |
77 ensure_ascii = kwargs.get('ensure_ascii', False) | |
78 | |
79 obj = from_rdf(self.store, context_data, base, | |
80 use_native_types, use_rdf_type, | |
81 auto_compact=auto_compact) | |
82 | |
83 data = json.dumps(obj, indent=indent, separators=separators, | |
84 sort_keys=sort_keys, ensure_ascii=ensure_ascii) | |
85 | |
86 stream.write(data.encode(encoding, 'replace')) | |
87 | |
88 | |
89 def from_rdf(graph, context_data=None, base=None, | |
90 use_native_types=False, use_rdf_type=False, | |
91 auto_compact=False, startnode=None, index=False): | |
92 # TODO: docstring w. args and return value | |
93 # TODO: support for index and startnode | |
94 | |
95 if not context_data and auto_compact: | |
96 context_data = dict( | |
97 (pfx, str(ns)) | |
98 for (pfx, ns) in graph.namespaces() if pfx and | |
99 str(ns) != "http://www.w3.org/XML/1998/namespace") | |
100 | |
101 if isinstance(context_data, Context): | |
102 context = context_data | |
103 context_data = context.to_dict() | |
104 else: | |
105 context = Context(context_data, base=base) | |
106 | |
107 converter = Converter(context, use_native_types, use_rdf_type) | |
108 result = converter.convert(graph) | |
109 | |
110 if converter.context.active: | |
111 if isinstance(result, list): | |
112 result = {context.get_key(GRAPH): result} | |
113 result[CONTEXT] = context_data | |
114 | |
115 return result | |
116 | |
117 | |
118 class Converter(object): | |
119 | |
120 def __init__(self, context, use_native_types, use_rdf_type): | |
121 self.context = context | |
122 self.use_native_types = context.active or use_native_types | |
123 self.use_rdf_type = use_rdf_type | |
124 | |
125 def convert(self, graph): | |
126 # TODO: bug in rdflib dataset parsing (nquads et al): | |
127 # plain triples end up in separate unnamed graphs (rdflib issue #436) | |
128 if graph.context_aware: | |
129 default_graph = Graph() | |
130 graphs = [default_graph] | |
131 for g in graph.contexts(): | |
132 if isinstance(g.identifier, URIRef): | |
133 graphs.append(g) | |
134 else: | |
135 default_graph += g | |
136 else: | |
137 graphs = [graph] | |
138 | |
139 context = self.context | |
140 | |
141 objs = [] | |
142 for g in graphs: | |
143 obj = {} | |
144 graphname = None | |
145 | |
146 if isinstance(g.identifier, URIRef): | |
147 graphname = context.shrink_iri(g.identifier) | |
148 obj[context.id_key] = graphname | |
149 | |
150 nodes = self.from_graph(g) | |
151 | |
152 if not graphname and len(nodes) == 1: | |
153 obj.update(nodes[0]) | |
154 else: | |
155 if not nodes: | |
156 continue | |
157 obj[context.graph_key] = nodes | |
158 | |
159 if objs and objs[0].get(context.get_key(ID)) == graphname: | |
160 objs[0].update(obj) | |
161 else: | |
162 objs.append(obj) | |
163 | |
164 if len(graphs) == 1 and len(objs) == 1 and not self.context.active: | |
165 default = objs[0] | |
166 items = default.get(context.graph_key) | |
167 if len(default) == 1 and items: | |
168 objs = items | |
169 elif len(objs) == 1 and self.context.active: | |
170 objs = objs[0] | |
171 | |
172 return objs | |
173 | |
174 def from_graph(self, graph): | |
175 nodemap = {} | |
176 | |
177 for s in set(graph.subjects()): | |
178 ## only iri:s and unreferenced (rest will be promoted to top if needed) | |
179 if isinstance(s, URIRef) or (isinstance(s, BNode) | |
180 and not any(graph.subjects(None, s))): | |
181 self.process_subject(graph, s, nodemap) | |
182 | |
183 return list(nodemap.values()) | |
184 | |
185 def process_subject(self, graph, s, nodemap): | |
186 if isinstance(s, URIRef): | |
187 node_id = self.context.shrink_iri(s) | |
188 elif isinstance(s, BNode): | |
189 node_id = s.n3() | |
190 else: | |
191 node_id = None | |
192 | |
193 #used_as_object = any(graph.subjects(None, s)) | |
194 if node_id in nodemap: | |
195 return None | |
196 | |
197 node = {} | |
198 node[self.context.id_key] = node_id | |
199 nodemap[node_id] = node | |
200 | |
201 for p, o in graph.predicate_objects(s): | |
202 self.add_to_node(graph, s, p, o, node, nodemap) | |
203 | |
204 return node | |
205 | |
206 def add_to_node(self, graph, s, p, o, s_node, nodemap): | |
207 context = self.context | |
208 | |
209 if isinstance(o, Literal): | |
210 datatype = str(o.datatype) if o.datatype else None | |
211 language = o.language | |
212 term = context.find_term(str(p), datatype, language=language) | |
213 else: | |
214 containers = [LIST, None] if graph.value(o, RDF.first) else [None] | |
215 for container in containers: | |
216 for coercion in (ID, VOCAB, UNDEF): | |
217 term = context.find_term(str(p), coercion, container) | |
218 if term: | |
219 break | |
220 if term: | |
221 break | |
222 | |
223 node = None | |
224 use_set = not context.active | |
225 | |
226 if term: | |
227 p_key = term.name | |
228 | |
229 if term.type: | |
230 node = self.type_coerce(o, term.type) | |
231 elif term.language and o.language == term.language: | |
232 node = str(o) | |
233 elif context.language and ( | |
234 term.language is None and o.language is None): | |
235 node = str(o) | |
236 | |
237 if term.container == SET: | |
238 use_set = True | |
239 elif term.container == LIST: | |
240 node = [self.type_coerce(v, term.type) or self.to_raw_value(graph, s, v, nodemap) | |
241 for v in self.to_collection(graph, o)] | |
242 elif term.container == LANG and language: | |
243 value = s_node.setdefault(p_key, {}) | |
244 values = value.get(language) | |
245 node = str(o) | |
246 if values: | |
247 if not isinstance(values, list): | |
248 value[language] = values = [values] | |
249 values.append(node) | |
250 else: | |
251 value[language] = node | |
252 return | |
253 | |
254 else: | |
255 p_key = context.to_symbol(p) | |
256 # TODO: for coercing curies - quite clumsy; unify to_symbol and find_term? | |
257 key_term = context.terms.get(p_key) | |
258 if key_term and (key_term.type or key_term.container): | |
259 p_key = p | |
260 if not term and p == RDF.type and not self.use_rdf_type: | |
261 if isinstance(o, URIRef): | |
262 node = context.to_symbol(o) | |
263 p_key = context.type_key | |
264 | |
265 if node is None: | |
266 node = self.to_raw_value(graph, s, o, nodemap) | |
267 | |
268 value = s_node.get(p_key) | |
269 if value: | |
270 if not isinstance(value, list): | |
271 value = [value] | |
272 value.append(node) | |
273 elif use_set: | |
274 value = [node] | |
275 else: | |
276 value = node | |
277 s_node[p_key] = value | |
278 | |
279 def type_coerce(self, o, coerce_type): | |
280 if coerce_type == ID: | |
281 if isinstance(o, URIRef): | |
282 return self.context.shrink_iri(o) | |
283 elif isinstance(o, BNode): | |
284 return o.n3() | |
285 else: | |
286 return o | |
287 elif coerce_type == VOCAB and isinstance(o, URIRef): | |
288 return self.context.to_symbol(o) | |
289 elif isinstance(o, Literal) and str(o.datatype) == coerce_type: | |
290 return o | |
291 else: | |
292 return None | |
293 | |
294 def to_raw_value(self, graph, s, o, nodemap): | |
295 context = self.context | |
296 coll = self.to_collection(graph, o) | |
297 if coll is not None: | |
298 coll = [self.to_raw_value(graph, s, lo, nodemap) | |
299 for lo in self.to_collection(graph, o)] | |
300 return {context.list_key: coll} | |
301 elif isinstance(o, BNode): | |
302 embed = False # TODO: self.context.active or using startnode and only one ref | |
303 onode = self.process_subject(graph, o, nodemap) | |
304 if onode: | |
305 if embed and not any(s2 for s2 in graph.subjects(None, o) if s2 != s): | |
306 return onode | |
307 else: | |
308 nodemap[onode[context.id_key]] = onode | |
309 return {context.id_key: o.n3()} | |
310 elif isinstance(o, URIRef): | |
311 # TODO: embed if o != startnode (else reverse) | |
312 return {context.id_key: context.shrink_iri(o)} | |
313 elif isinstance(o, Literal): | |
314 # TODO: if compact | |
315 native = self.use_native_types and o.datatype in PLAIN_LITERAL_TYPES | |
316 if native: | |
317 v = o.toPython() | |
318 else: | |
319 v = str(o) | |
320 if o.datatype: | |
321 if native: | |
322 if self.context.active: | |
323 return v | |
324 else: | |
325 return {context.value_key: v} | |
326 return {context.type_key: context.to_symbol(o.datatype), | |
327 context.value_key: v} | |
328 elif o.language and o.language != context.language: | |
329 return {context.lang_key: o.language, | |
330 context.value_key: v} | |
331 elif not context.active or context.language and not o.language: | |
332 return {context.value_key: v} | |
333 else: | |
334 return v | |
335 | |
336 def to_collection(self, graph, l): | |
337 if l != RDF.nil and not graph.value(l, RDF.first): | |
338 return None | |
339 list_nodes = [] | |
340 chain = set([l]) | |
341 while l: | |
342 if l == RDF.nil: | |
343 return list_nodes | |
344 if isinstance(l, URIRef): | |
345 return None | |
346 first, rest = None, None | |
347 for p, o in graph.predicate_objects(l): | |
348 if not first and p == RDF.first: | |
349 first = o | |
350 elif not rest and p == RDF.rest: | |
351 rest = o | |
352 elif p != RDF.type or o != RDF.List: | |
353 return None | |
354 list_nodes.append(first) | |
355 l = rest | |
356 if l in chain: | |
357 return None | |
358 chain.add(l) | |
359 |