comparison env/lib/python3.9/site-packages/rdflib_jsonld/serializer.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # -*- coding: utf-8 -*-
2 """
3 This serialiser will output an RDF Graph as a JSON-LD formatted document. See:
4
5 http://json-ld.org/
6
7 Example usage::
8
9 >>> from rdflib.plugin import register, Serializer
10 >>> register('json-ld', Serializer, 'rdflib_jsonld.serializer', 'JsonLDSerializer')
11
12 >>> from rdflib import Graph
13
14 >>> testrdf = '''
15 ... @prefix dc: <http://purl.org/dc/terms/> .
16 ... <http://example.org/about>
17 ... dc:title "Someone's Homepage"@en .
18 ... '''
19
20 >>> g = Graph().parse(data=testrdf, format='n3')
21
22 >>> print((g.serialize(format='json-ld', indent=4).decode()))
23 [
24 {
25 "@id": "http://example.org/about",
26 "http://purl.org/dc/terms/title": [
27 {
28 "@language": "en",
29 "@value": "Someone's Homepage"
30 }
31 ]
32 }
33 ]
34
35 """
36
37 # NOTE: This code writes the entire JSON object into memory before serialising,
38 # but we should consider streaming the output to deal with arbitrarily large
39 # graphs.
40
41 import warnings
42
43 from rdflib.serializer import Serializer
44 from rdflib.graph import Graph
45 from rdflib.term import URIRef, Literal, BNode
46 from rdflib.namespace import RDF, XSD
47
48 from ._compat import str
49 from .context import Context, UNDEF
50 from .util import json
51 from .keys import CONTEXT, GRAPH, ID, VOCAB, LIST, SET, LANG
52
53 __all__ = ['JsonLDSerializer', 'from_rdf']
54
55
56 PLAIN_LITERAL_TYPES = set([XSD.boolean, XSD.integer, XSD.double, XSD.string])
57
58
59 class JsonLDSerializer(Serializer):
60 def __init__(self, store):
61 super(JsonLDSerializer, self).__init__(store)
62
63 def serialize(self, stream, base=None, encoding=None, **kwargs):
64 # TODO: docstring w. args and return value
65 encoding = encoding or 'utf-8'
66 if encoding not in ('utf-8', 'utf-16'):
67 warnings.warn("JSON should be encoded as unicode. " +
68 "Given encoding was: %s" % encoding)
69
70 context_data = kwargs.get('context')
71 use_native_types = kwargs.get('use_native_types', False),
72 use_rdf_type = kwargs.get('use_rdf_type', False)
73 auto_compact = kwargs.get('auto_compact', False)
74
75 indent = kwargs.get('indent', 2)
76 separators = kwargs.get('separators', (',', ': '))
77 sort_keys = kwargs.get('sort_keys', True)
78 ensure_ascii = kwargs.get('ensure_ascii', False)
79
80 obj = from_rdf(self.store, context_data, base,
81 use_native_types, use_rdf_type,
82 auto_compact=auto_compact)
83
84 data = json.dumps(obj, indent=indent, separators=separators,
85 sort_keys=sort_keys, ensure_ascii=ensure_ascii)
86
87 stream.write(data.encode(encoding, 'replace'))
88
89
90 def from_rdf(graph, context_data=None, base=None,
91 use_native_types=False, use_rdf_type=False,
92 auto_compact=False, startnode=None, index=False):
93 # TODO: docstring w. args and return value
94 # TODO: support for index and startnode
95
96 if not context_data and auto_compact:
97 context_data = dict(
98 (pfx, str(ns))
99 for (pfx, ns) in graph.namespaces() if pfx and
100 str(ns) != "http://www.w3.org/XML/1998/namespace")
101
102 if isinstance(context_data, Context):
103 context = context_data
104 context_data = context.to_dict()
105 else:
106 context = Context(context_data, base=base)
107
108 converter = Converter(context, use_native_types, use_rdf_type)
109 result = converter.convert(graph)
110
111 if converter.context.active:
112 if isinstance(result, list):
113 result = {context.get_key(GRAPH): result}
114 result[CONTEXT] = context_data
115
116 return result
117
118
119 class Converter(object):
120
121 def __init__(self, context, use_native_types, use_rdf_type):
122 self.context = context
123 self.use_native_types = context.active or use_native_types
124 self.use_rdf_type = use_rdf_type
125
126 def convert(self, graph):
127 # TODO: bug in rdflib dataset parsing (nquads et al):
128 # plain triples end up in separate unnamed graphs (rdflib issue #436)
129 if graph.context_aware:
130 default_graph = Graph()
131 graphs = [default_graph]
132 for g in graph.contexts():
133 if isinstance(g.identifier, URIRef):
134 graphs.append(g)
135 else:
136 default_graph += g
137 else:
138 graphs = [graph]
139
140 context = self.context
141
142 objs = []
143 for g in graphs:
144 obj = {}
145 graphname = None
146
147 if isinstance(g.identifier, URIRef):
148 graphname = context.shrink_iri(g.identifier)
149 obj[context.id_key] = graphname
150
151 nodes = self.from_graph(g)
152
153 if not graphname and len(nodes) == 1:
154 obj.update(nodes[0])
155 else:
156 if not nodes:
157 continue
158 obj[context.graph_key] = nodes
159
160 if objs and objs[0].get(context.get_key(ID)) == graphname:
161 objs[0].update(obj)
162 else:
163 objs.append(obj)
164
165 if len(graphs) == 1 and len(objs) == 1 and not self.context.active:
166 default = objs[0]
167 items = default.get(context.graph_key)
168 if len(default) == 1 and items:
169 objs = items
170 elif len(objs) == 1 and self.context.active:
171 objs = objs[0]
172
173 return objs
174
175 def from_graph(self, graph):
176 nodemap = {}
177
178 for s in set(graph.subjects()):
179 ## only iri:s and unreferenced (rest will be promoted to top if needed)
180 if isinstance(s, URIRef) or (isinstance(s, BNode)
181 and not any(graph.subjects(None, s))):
182 self.process_subject(graph, s, nodemap)
183
184 return list(nodemap.values())
185
186 def process_subject(self, graph, s, nodemap):
187 if isinstance(s, URIRef):
188 node_id = self.context.shrink_iri(s)
189 elif isinstance(s, BNode):
190 node_id = s.n3()
191 else:
192 node_id = None
193
194 #used_as_object = any(graph.subjects(None, s))
195 if node_id in nodemap:
196 return None
197
198 node = {}
199 node[self.context.id_key] = node_id
200 nodemap[node_id] = node
201
202 for p, o in graph.predicate_objects(s):
203 self.add_to_node(graph, s, p, o, node, nodemap)
204
205 return node
206
207 def add_to_node(self, graph, s, p, o, s_node, nodemap):
208 context = self.context
209
210 if isinstance(o, Literal):
211 datatype = str(o.datatype) if o.datatype else None
212 language = o.language
213 term = context.find_term(str(p), datatype, language=language)
214 else:
215 containers = [LIST, None] if graph.value(o, RDF.first) else [None]
216 for container in containers:
217 for coercion in (ID, VOCAB, UNDEF):
218 term = context.find_term(str(p), coercion, container)
219 if term:
220 break
221 if term:
222 break
223
224 node = None
225 use_set = not context.active
226
227 if term:
228 p_key = term.name
229
230 if term.type:
231 node = self.type_coerce(o, term.type)
232 elif term.language and o.language == term.language:
233 node = str(o)
234 elif context.language and (
235 term.language is None and o.language is None):
236 node = str(o)
237
238 if term.container == SET:
239 use_set = True
240 elif term.container == LIST:
241 node = [self.type_coerce(v, term.type) or self.to_raw_value(graph, s, v, nodemap)
242 for v in self.to_collection(graph, o)]
243 elif term.container == LANG and language:
244 value = s_node.setdefault(p_key, {})
245 values = value.get(language)
246 node = str(o)
247 if values:
248 if not isinstance(values, list):
249 value[language] = values = [values]
250 values.append(node)
251 else:
252 value[language] = node
253 return
254
255 else:
256 p_key = context.to_symbol(p)
257 # TODO: for coercing curies - quite clumsy; unify to_symbol and find_term?
258 key_term = context.terms.get(p_key)
259 if key_term and (key_term.type or key_term.container):
260 p_key = p
261 if not term and p == RDF.type and not self.use_rdf_type:
262 if isinstance(o, URIRef):
263 node = context.to_symbol(o)
264 p_key = context.type_key
265
266 if node is None:
267 node = self.to_raw_value(graph, s, o, nodemap)
268
269 value = s_node.get(p_key)
270 if value:
271 if not isinstance(value, list):
272 value = [value]
273 value.append(node)
274 elif use_set:
275 value = [node]
276 else:
277 value = node
278 s_node[p_key] = value
279
280 def type_coerce(self, o, coerce_type):
281 if coerce_type == ID:
282 if isinstance(o, URIRef):
283 return self.context.shrink_iri(o)
284 elif isinstance(o, BNode):
285 return o.n3()
286 else:
287 return o
288 elif coerce_type == VOCAB and isinstance(o, URIRef):
289 return self.context.to_symbol(o)
290 elif isinstance(o, Literal) and str(o.datatype) == coerce_type:
291 return o
292 else:
293 return None
294
295 def to_raw_value(self, graph, s, o, nodemap):
296 context = self.context
297 coll = self.to_collection(graph, o)
298 if coll is not None:
299 coll = [self.to_raw_value(graph, s, lo, nodemap)
300 for lo in self.to_collection(graph, o)]
301 return {context.list_key: coll}
302 elif isinstance(o, BNode):
303 embed = False # TODO: self.context.active or using startnode and only one ref
304 onode = self.process_subject(graph, o, nodemap)
305 if onode:
306 if embed and not any(s2 for s2 in graph.subjects(None, o) if s2 != s):
307 return onode
308 else:
309 nodemap[onode[context.id_key]] = onode
310 return {context.id_key: o.n3()}
311 elif isinstance(o, URIRef):
312 # TODO: embed if o != startnode (else reverse)
313 return {context.id_key: context.shrink_iri(o)}
314 elif isinstance(o, Literal):
315 # TODO: if compact
316 native = self.use_native_types and o.datatype in PLAIN_LITERAL_TYPES
317 if native:
318 v = o.toPython()
319 else:
320 v = str(o)
321 if o.datatype:
322 if native:
323 if self.context.active:
324 return v
325 else:
326 return {context.value_key: v}
327 return {context.type_key: context.to_symbol(o.datatype),
328 context.value_key: v}
329 elif o.language and o.language != context.language:
330 return {context.lang_key: o.language,
331 context.value_key: v}
332 elif not context.active or context.language and not o.language:
333 return {context.value_key: v}
334 else:
335 return v
336
337 def to_collection(self, graph, l):
338 if l != RDF.nil and not graph.value(l, RDF.first):
339 return None
340 list_nodes = []
341 chain = set([l])
342 while l:
343 if l == RDF.nil:
344 return list_nodes
345 if isinstance(l, URIRef):
346 return None
347 first, rest = None, None
348 for p, o in graph.predicate_objects(l):
349 if not first and p == RDF.first:
350 first = o
351 elif not rest and p == RDF.rest:
352 rest = o
353 elif p != RDF.type or o != RDF.List:
354 return None
355 list_nodes.append(first)
356 l = rest
357 if l in chain:
358 return None
359 chain.add(l)
360