comparison env/lib/python3.7/site-packages/rdflib_jsonld/parser.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 # -*- coding: utf-8 -*-
2 """
3 This parser will interpret a JSON-LD document as an RDF Graph. See:
4
5 http://json-ld.org/
6
7 Example usage::
8
9 >>> from rdflib.plugin import register, Parser
10 >>> register('json-ld', Parser, 'rdflib_jsonld.parser', 'JsonLDParser')
11
12 >>> from rdflib import Graph, URIRef, Literal
13 >>> test_json = '''
14 ... {
15 ... "@context": {
16 ... "dc": "http://purl.org/dc/terms/",
17 ... "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
18 ... "rdfs": "http://www.w3.org/2000/01/rdf-schema#"
19 ... },
20 ... "@id": "http://example.org/about",
21 ... "dc:title": {
22 ... "@language": "en",
23 ... "@value": "Someone's Homepage"
24 ... }
25 ... }
26 ... '''
27 >>> g = Graph().parse(data=test_json, format='json-ld')
28 >>> list(g) == [(URIRef('http://example.org/about'),
29 ... URIRef('http://purl.org/dc/terms/title'),
30 ... Literal("Someone's Homepage", lang='en'))]
31 True
32
33 """
34 # NOTE: This code reads the entire JSON object into memory before parsing, but
35 # we should consider streaming the input to deal with arbitrarily large graphs.
36
37 import warnings
38 from rdflib.graph import ConjunctiveGraph
39 from rdflib.parser import Parser, URLInputSource
40 from rdflib.namespace import RDF, XSD
41 from rdflib.term import URIRef, BNode, Literal
42
43 from .context import Context, Term, UNDEF
44 from .util import source_to_json, VOCAB_DELIMS, context_from_urlinputsource
45 from .keys import CONTEXT, GRAPH, ID, INDEX, LANG, LIST, REV, SET, TYPE, VALUE, VOCAB
46
47 __all__ = ['JsonLDParser', 'to_rdf']
48
49
50 TYPE_TERM = Term(str(RDF.type), TYPE, VOCAB)
51
52 ALLOW_LISTS_OF_LISTS = True # NOTE: Not allowed in JSON-LD 1.0
53
54
55 class JsonLDParser(Parser):
56 def __init__(self):
57 super(JsonLDParser, self).__init__()
58
59 def parse(self, source, sink, **kwargs):
60 # TODO: docstring w. args and return value
61 encoding = kwargs.get('encoding') or 'utf-8'
62 if encoding not in ('utf-8', 'utf-16'):
63 warnings.warn("JSON should be encoded as unicode. " +
64 "Given encoding was: %s" % encoding)
65
66 base = kwargs.get('base') or sink.absolutize(
67 source.getPublicId() or source.getSystemId() or "")
68 context_data = kwargs.get('context')
69 if not context_data and isinstance(source, URLInputSource):
70 context_data = context_from_urlinputsource(source)
71 produce_generalized_rdf = kwargs.get('produce_generalized_rdf', False)
72
73 data = source_to_json(source)
74 conj_sink = ConjunctiveGraph(
75 store=sink.store, identifier=sink.identifier)
76 to_rdf(data, conj_sink, base, context_data)
77
78
79 def to_rdf(data, graph, base=None, context_data=None,
80 produce_generalized_rdf=False,
81 allow_lists_of_lists=None):
82 # TODO: docstring w. args and return value
83 context=Context(base=base)
84 if context_data:
85 context.load(context_data)
86 parser = Parser(generalized_rdf=produce_generalized_rdf,
87 allow_lists_of_lists=allow_lists_of_lists)
88 return parser.parse(data, context, graph)
89
90
91 class Parser(object):
92
93 def __init__(self, generalized_rdf=False, allow_lists_of_lists=None):
94 self.generalized_rdf = generalized_rdf
95 self.allow_lists_of_lists = (allow_lists_of_lists
96 if allow_lists_of_lists is not None else ALLOW_LISTS_OF_LISTS)
97
98 def parse(self, data, context, graph):
99 topcontext = False
100
101 if isinstance(data, list):
102 resources = data
103 elif isinstance(data, dict):
104 l_ctx = data.get(CONTEXT)
105 if l_ctx:
106 context.load(l_ctx, context.base)
107 topcontext = True
108 resources = data
109 if not isinstance(resources, list):
110 resources = [resources]
111
112 if context.vocab:
113 graph.bind(None, context.vocab)
114 for name, term in list(context.terms.items()):
115 if term.id and term.id.endswith(VOCAB_DELIMS):
116 graph.bind(name, term.id)
117
118 for node in resources:
119 self._add_to_graph(graph, graph, context, node, topcontext)
120
121 return graph
122
123
124 def _add_to_graph(self, dataset, graph, context, node, topcontext=False):
125 if not isinstance(node, dict) or context.get_value(node):
126 return
127
128 if CONTEXT in node and not topcontext:
129 l_ctx = node.get(CONTEXT)
130 if l_ctx:
131 context = context.subcontext(l_ctx)
132 else:
133 context = Context(base=context.doc_base)
134
135 id_val = context.get_id(node)
136 if isinstance(id_val, str):
137 subj = self._to_rdf_id(context, id_val)
138 else:
139 subj = BNode()
140
141 if subj is None:
142 return None
143
144 for key, obj in list(node.items()):
145 if key in (CONTEXT, ID, context.get_key(ID)):
146 continue
147 if key in (REV, context.get_key(REV)):
148 for rkey, robj in list(obj.items()):
149 self._key_to_graph(dataset, graph, context, subj, rkey, robj, True)
150 else:
151 self._key_to_graph(dataset, graph, context, subj, key, obj)
152
153 return subj
154
155
156 def _key_to_graph(self, dataset, graph, context, subj, key, obj, reverse=False):
157
158 if isinstance(obj, list):
159 obj_nodes = obj
160 else:
161 obj_nodes = [obj]
162
163 term = context.terms.get(key)
164 if term:
165 term_id = term.id
166 if term.container == LIST:
167 obj_nodes = [{LIST: obj_nodes}]
168 elif isinstance(obj, dict):
169 if term.container == INDEX:
170 obj_nodes = []
171 for values in list(obj.values()):
172 if not isinstance(values, list):
173 obj_nodes.append(values)
174 else:
175 obj_nodes += values
176 elif term.container == LANG:
177 obj_nodes = []
178 for lang, values in list(obj.items()):
179 if not isinstance(values, list):
180 values = [values]
181 for v in values:
182 obj_nodes.append((v, lang))
183 else:
184 term_id = None
185
186 if TYPE in (key, term_id):
187 term = TYPE_TERM
188 elif GRAPH in (key, term_id):
189 #assert graph.context_aware
190 subgraph = dataset.get_context(subj)
191 for onode in obj_nodes:
192 self._add_to_graph(dataset, subgraph, context, onode)
193 return
194 elif SET in (key, term_id):
195 for onode in obj_nodes:
196 self._add_to_graph(dataset, graph, context, onode)
197 return
198
199 pred_uri = term.id if term else context.expand(key)
200
201 flattened = []
202 for obj in obj_nodes:
203 if isinstance(obj, dict):
204 objs = context.get_set(obj)
205 if objs is not None:
206 obj = objs
207 if isinstance(obj, list):
208 flattened += obj
209 continue
210 flattened.append(obj)
211 obj_nodes = flattened
212
213 if not pred_uri:
214 return
215
216 if term and term.reverse:
217 reverse = not reverse
218
219 bid = self._get_bnodeid(pred_uri)
220 if bid:
221 if not self.generalized_rdf:
222 return
223 pred = BNode(bid)
224 else:
225 pred = URIRef(pred_uri)
226 for obj_node in obj_nodes:
227 obj = self._to_object(dataset, graph, context, term, obj_node)
228 if obj is None:
229 continue
230 if reverse:
231 graph.add((obj, pred, subj))
232 else:
233 graph.add((subj, pred, obj))
234
235
236 def _to_object(self, dataset, graph, context, term, node, inlist=False):
237
238 if node is None:
239 return
240
241 if isinstance(node, tuple):
242 value, lang = node
243 if value is None:
244 return
245 return Literal(value, lang=lang)
246
247 if isinstance(node, dict):
248 node_list = context.get_list(node)
249 if node_list is not None:
250 if inlist and not self.allow_lists_of_lists:
251 return
252 listref = self._add_list(dataset, graph, context, term, node_list)
253 if listref:
254 return listref
255
256 else: # expand..
257 if not term or not term.type:
258 if isinstance(node, float):
259 return Literal(node, datatype=XSD.double)
260 if term and term.language is not UNDEF:
261 lang = term.language
262 else:
263 lang = context.language
264 return Literal(node, lang=lang)
265 else:
266 if term.type == ID:
267 node = {ID: context.resolve(node)}
268 elif term.type == VOCAB:
269 node = {ID: context.expand(node) or context.resolve_iri(node)}
270 else:
271 node = {TYPE: term.type,
272 VALUE: node}
273
274 lang = context.get_language(node)
275 if lang or context.get_key(VALUE) in node or VALUE in node:
276 value = context.get_value(node)
277 if value is None:
278 return None
279 datatype = not lang and context.get_type(node) or None
280 if lang:
281 return Literal(value, lang=lang)
282 elif datatype:
283 return Literal(value, datatype=context.expand(datatype))
284 else:
285 return Literal(value)
286 else:
287 return self._add_to_graph(dataset, graph, context, node)
288
289
290 def _to_rdf_id(self, context, id_val):
291 bid = self._get_bnodeid(id_val)
292 if bid:
293 return BNode(bid)
294 else:
295 uri = context.resolve(id_val)
296 if not self.generalized_rdf and ':' not in uri:
297 return None
298 return URIRef(uri)
299
300
301 def _get_bnodeid(self, ref):
302 if not ref.startswith('_:'):
303 return
304 bid = ref.split('_:', 1)[-1]
305 return bid or None
306
307
308 def _add_list(self, dataset, graph, context, term, node_list):
309 if not isinstance(node_list, list):
310 node_list = [node_list]
311 first_subj = BNode()
312 subj, rest = first_subj, None
313 for node in node_list:
314 if node is None:
315 continue
316 if rest:
317 graph.add((subj, RDF.rest, rest))
318 subj = rest
319 obj = self._to_object(dataset, graph, context, term, node, inlist=True)
320 if obj is None:
321 continue
322 graph.add((subj, RDF.first, obj))
323 rest = BNode()
324 if rest:
325 graph.add((subj, RDF.rest, RDF.nil))
326 return first_subj
327 else:
328 return RDF.nil