comparison env/lib/python3.7/site-packages/rdflib_jsonld/serializer.py @ 5:9b1c78e6ba9c draft default tip

"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author shellac
date Mon, 01 Jun 2020 08:59:25 -0400
parents 79f47841a781
children
comparison
equal deleted inserted replaced
4:79f47841a781 5:9b1c78e6ba9c
1 # -*- coding: utf-8 -*-
2 """
3 This serialiser will output an RDF Graph as a JSON-LD formatted document. See:
4
5 http://json-ld.org/
6
7 Example usage::
8
9 >>> from rdflib.plugin import register, Serializer
10 >>> register('json-ld', Serializer, 'rdflib_jsonld.serializer', 'JsonLDSerializer')
11
12 >>> from rdflib import Graph
13
14 >>> testrdf = '''
15 ... @prefix dc: <http://purl.org/dc/terms/> .
16 ... <http://example.org/about>
17 ... dc:title "Someone's Homepage"@en .
18 ... '''
19
20 >>> g = Graph().parse(data=testrdf, format='n3')
21
22 >>> print((g.serialize(format='json-ld', indent=4).decode()))
23 [
24 {
25 "@id": "http://example.org/about",
26 "http://purl.org/dc/terms/title": [
27 {
28 "@language": "en",
29 "@value": "Someone's Homepage"
30 }
31 ]
32 }
33 ]
34
35 """
36
37 # NOTE: This code writes the entire JSON object into memory before serialising,
38 # but we should consider streaming the output to deal with arbitrarily large
39 # graphs.
40
41 import warnings
42
43 from rdflib.serializer import Serializer
44 from rdflib.graph import Graph
45 from rdflib.term import URIRef, Literal, BNode
46 from rdflib.namespace import RDF, XSD
47
48 from .context import Context, UNDEF
49 from .util import json
50 from .keys import CONTEXT, GRAPH, ID, VOCAB, LIST, SET, LANG
51
52 __all__ = ['JsonLDSerializer', 'from_rdf']
53
54
55 PLAIN_LITERAL_TYPES = set([XSD.boolean, XSD.integer, XSD.double, XSD.string])
56
57
58 class JsonLDSerializer(Serializer):
59 def __init__(self, store):
60 super(JsonLDSerializer, self).__init__(store)
61
62 def serialize(self, stream, base=None, encoding=None, **kwargs):
63 # TODO: docstring w. args and return value
64 encoding = encoding or 'utf-8'
65 if encoding not in ('utf-8', 'utf-16'):
66 warnings.warn("JSON should be encoded as unicode. " +
67 "Given encoding was: %s" % encoding)
68
69 context_data = kwargs.get('context')
70 use_native_types = kwargs.get('use_native_types', False),
71 use_rdf_type = kwargs.get('use_rdf_type', False)
72 auto_compact = kwargs.get('auto_compact', False)
73
74 indent = kwargs.get('indent', 2)
75 separators = kwargs.get('separators', (',', ': '))
76 sort_keys = kwargs.get('sort_keys', True)
77 ensure_ascii = kwargs.get('ensure_ascii', False)
78
79 obj = from_rdf(self.store, context_data, base,
80 use_native_types, use_rdf_type,
81 auto_compact=auto_compact)
82
83 data = json.dumps(obj, indent=indent, separators=separators,
84 sort_keys=sort_keys, ensure_ascii=ensure_ascii)
85
86 stream.write(data.encode(encoding, 'replace'))
87
88
89 def from_rdf(graph, context_data=None, base=None,
90 use_native_types=False, use_rdf_type=False,
91 auto_compact=False, startnode=None, index=False):
92 # TODO: docstring w. args and return value
93 # TODO: support for index and startnode
94
95 if not context_data and auto_compact:
96 context_data = dict(
97 (pfx, str(ns))
98 for (pfx, ns) in graph.namespaces() if pfx and
99 str(ns) != "http://www.w3.org/XML/1998/namespace")
100
101 if isinstance(context_data, Context):
102 context = context_data
103 context_data = context.to_dict()
104 else:
105 context = Context(context_data, base=base)
106
107 converter = Converter(context, use_native_types, use_rdf_type)
108 result = converter.convert(graph)
109
110 if converter.context.active:
111 if isinstance(result, list):
112 result = {context.get_key(GRAPH): result}
113 result[CONTEXT] = context_data
114
115 return result
116
117
118 class Converter(object):
119
120 def __init__(self, context, use_native_types, use_rdf_type):
121 self.context = context
122 self.use_native_types = context.active or use_native_types
123 self.use_rdf_type = use_rdf_type
124
125 def convert(self, graph):
126 # TODO: bug in rdflib dataset parsing (nquads et al):
127 # plain triples end up in separate unnamed graphs (rdflib issue #436)
128 if graph.context_aware:
129 default_graph = Graph()
130 graphs = [default_graph]
131 for g in graph.contexts():
132 if isinstance(g.identifier, URIRef):
133 graphs.append(g)
134 else:
135 default_graph += g
136 else:
137 graphs = [graph]
138
139 context = self.context
140
141 objs = []
142 for g in graphs:
143 obj = {}
144 graphname = None
145
146 if isinstance(g.identifier, URIRef):
147 graphname = context.shrink_iri(g.identifier)
148 obj[context.id_key] = graphname
149
150 nodes = self.from_graph(g)
151
152 if not graphname and len(nodes) == 1:
153 obj.update(nodes[0])
154 else:
155 if not nodes:
156 continue
157 obj[context.graph_key] = nodes
158
159 if objs and objs[0].get(context.get_key(ID)) == graphname:
160 objs[0].update(obj)
161 else:
162 objs.append(obj)
163
164 if len(graphs) == 1 and len(objs) == 1 and not self.context.active:
165 default = objs[0]
166 items = default.get(context.graph_key)
167 if len(default) == 1 and items:
168 objs = items
169 elif len(objs) == 1 and self.context.active:
170 objs = objs[0]
171
172 return objs
173
174 def from_graph(self, graph):
175 nodemap = {}
176
177 for s in set(graph.subjects()):
178 ## only iri:s and unreferenced (rest will be promoted to top if needed)
179 if isinstance(s, URIRef) or (isinstance(s, BNode)
180 and not any(graph.subjects(None, s))):
181 self.process_subject(graph, s, nodemap)
182
183 return list(nodemap.values())
184
185 def process_subject(self, graph, s, nodemap):
186 if isinstance(s, URIRef):
187 node_id = self.context.shrink_iri(s)
188 elif isinstance(s, BNode):
189 node_id = s.n3()
190 else:
191 node_id = None
192
193 #used_as_object = any(graph.subjects(None, s))
194 if node_id in nodemap:
195 return None
196
197 node = {}
198 node[self.context.id_key] = node_id
199 nodemap[node_id] = node
200
201 for p, o in graph.predicate_objects(s):
202 self.add_to_node(graph, s, p, o, node, nodemap)
203
204 return node
205
206 def add_to_node(self, graph, s, p, o, s_node, nodemap):
207 context = self.context
208
209 if isinstance(o, Literal):
210 datatype = str(o.datatype) if o.datatype else None
211 language = o.language
212 term = context.find_term(str(p), datatype, language=language)
213 else:
214 containers = [LIST, None] if graph.value(o, RDF.first) else [None]
215 for container in containers:
216 for coercion in (ID, VOCAB, UNDEF):
217 term = context.find_term(str(p), coercion, container)
218 if term:
219 break
220 if term:
221 break
222
223 node = None
224 use_set = not context.active
225
226 if term:
227 p_key = term.name
228
229 if term.type:
230 node = self.type_coerce(o, term.type)
231 elif term.language and o.language == term.language:
232 node = str(o)
233 elif context.language and (
234 term.language is None and o.language is None):
235 node = str(o)
236
237 if term.container == SET:
238 use_set = True
239 elif term.container == LIST:
240 node = [self.type_coerce(v, term.type) or self.to_raw_value(graph, s, v, nodemap)
241 for v in self.to_collection(graph, o)]
242 elif term.container == LANG and language:
243 value = s_node.setdefault(p_key, {})
244 values = value.get(language)
245 node = str(o)
246 if values:
247 if not isinstance(values, list):
248 value[language] = values = [values]
249 values.append(node)
250 else:
251 value[language] = node
252 return
253
254 else:
255 p_key = context.to_symbol(p)
256 # TODO: for coercing curies - quite clumsy; unify to_symbol and find_term?
257 key_term = context.terms.get(p_key)
258 if key_term and (key_term.type or key_term.container):
259 p_key = p
260 if not term and p == RDF.type and not self.use_rdf_type:
261 if isinstance(o, URIRef):
262 node = context.to_symbol(o)
263 p_key = context.type_key
264
265 if node is None:
266 node = self.to_raw_value(graph, s, o, nodemap)
267
268 value = s_node.get(p_key)
269 if value:
270 if not isinstance(value, list):
271 value = [value]
272 value.append(node)
273 elif use_set:
274 value = [node]
275 else:
276 value = node
277 s_node[p_key] = value
278
279 def type_coerce(self, o, coerce_type):
280 if coerce_type == ID:
281 if isinstance(o, URIRef):
282 return self.context.shrink_iri(o)
283 elif isinstance(o, BNode):
284 return o.n3()
285 else:
286 return o
287 elif coerce_type == VOCAB and isinstance(o, URIRef):
288 return self.context.to_symbol(o)
289 elif isinstance(o, Literal) and str(o.datatype) == coerce_type:
290 return o
291 else:
292 return None
293
294 def to_raw_value(self, graph, s, o, nodemap):
295 context = self.context
296 coll = self.to_collection(graph, o)
297 if coll is not None:
298 coll = [self.to_raw_value(graph, s, lo, nodemap)
299 for lo in self.to_collection(graph, o)]
300 return {context.list_key: coll}
301 elif isinstance(o, BNode):
302 embed = False # TODO: self.context.active or using startnode and only one ref
303 onode = self.process_subject(graph, o, nodemap)
304 if onode:
305 if embed and not any(s2 for s2 in graph.subjects(None, o) if s2 != s):
306 return onode
307 else:
308 nodemap[onode[context.id_key]] = onode
309 return {context.id_key: o.n3()}
310 elif isinstance(o, URIRef):
311 # TODO: embed if o != startnode (else reverse)
312 return {context.id_key: context.shrink_iri(o)}
313 elif isinstance(o, Literal):
314 # TODO: if compact
315 native = self.use_native_types and o.datatype in PLAIN_LITERAL_TYPES
316 if native:
317 v = o.toPython()
318 else:
319 v = str(o)
320 if o.datatype:
321 if native:
322 if self.context.active:
323 return v
324 else:
325 return {context.value_key: v}
326 return {context.type_key: context.to_symbol(o.datatype),
327 context.value_key: v}
328 elif o.language and o.language != context.language:
329 return {context.lang_key: o.language,
330 context.value_key: v}
331 elif not context.active or context.language and not o.language:
332 return {context.value_key: v}
333 else:
334 return v
335
336 def to_collection(self, graph, l):
337 if l != RDF.nil and not graph.value(l, RDF.first):
338 return None
339 list_nodes = []
340 chain = set([l])
341 while l:
342 if l == RDF.nil:
343 return list_nodes
344 if isinstance(l, URIRef):
345 return None
346 first, rest = None, None
347 for p, o in graph.predicate_objects(l):
348 if not first and p == RDF.first:
349 first = o
350 elif not rest and p == RDF.rest:
351 rest = o
352 elif p != RDF.type or o != RDF.List:
353 return None
354 list_nodes.append(first)
355 l = rest
356 if l in chain:
357 return None
358 chain.add(l)
359