Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/rdflib_jsonld/parser.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """ | |
3 This parser will interpret a JSON-LD document as an RDF Graph. See: | |
4 | |
5 http://json-ld.org/ | |
6 | |
7 Example usage:: | |
8 | |
9 >>> from rdflib.plugin import register, Parser | |
10 >>> register('json-ld', Parser, 'rdflib_jsonld.parser', 'JsonLDParser') | |
11 | |
12 >>> from rdflib import Graph, URIRef, Literal | |
13 >>> test_json = ''' | |
14 ... { | |
15 ... "@context": { | |
16 ... "dc": "http://purl.org/dc/terms/", | |
17 ... "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", | |
18 ... "rdfs": "http://www.w3.org/2000/01/rdf-schema#" | |
19 ... }, | |
20 ... "@id": "http://example.org/about", | |
21 ... "dc:title": { | |
22 ... "@language": "en", | |
23 ... "@value": "Someone's Homepage" | |
24 ... } | |
25 ... } | |
26 ... ''' | |
27 >>> g = Graph().parse(data=test_json, format='json-ld') | |
28 >>> list(g) == [(URIRef('http://example.org/about'), | |
29 ... URIRef('http://purl.org/dc/terms/title'), | |
30 ... Literal("Someone's Homepage", lang='en'))] | |
31 True | |
32 | |
33 """ | |
34 # NOTE: This code reads the entire JSON object into memory before parsing, but | |
35 # we should consider streaming the input to deal with arbitrarily large graphs. | |
36 | |
37 import warnings | |
38 from rdflib.graph import ConjunctiveGraph | |
39 from rdflib.parser import Parser, URLInputSource | |
40 from rdflib.namespace import RDF, XSD | |
41 from rdflib.term import URIRef, BNode, Literal | |
42 | |
43 from ._compat import str, str | |
44 from .context import Context, Term, UNDEF | |
45 from .util import source_to_json, VOCAB_DELIMS, context_from_urlinputsource | |
46 from .keys import CONTEXT, GRAPH, ID, INDEX, LANG, LIST, REV, SET, TYPE, VALUE, VOCAB | |
47 | |
48 __all__ = ['JsonLDParser', 'to_rdf'] | |
49 | |
50 | |
51 # Add jsonld suffix so RDFLib can guess format from file name | |
52 try: | |
53 from rdflib.util import SUFFIX_FORMAT_MAP | |
54 if 'jsonld' not in SUFFIX_FORMAT_MAP: | |
55 SUFFIX_FORMAT_MAP['jsonld'] = 'application/ld+json' | |
56 except ImportError: | |
57 pass | |
58 | |
59 | |
60 TYPE_TERM = Term(str(RDF.type), TYPE, VOCAB) | |
61 | |
62 ALLOW_LISTS_OF_LISTS = True # NOTE: Not allowed in JSON-LD 1.0 | |
63 | |
64 | |
65 class JsonLDParser(Parser): | |
66 def __init__(self): | |
67 super(JsonLDParser, self).__init__() | |
68 | |
69 def parse(self, source, sink, **kwargs): | |
70 # TODO: docstring w. args and return value | |
71 encoding = kwargs.get('encoding') or 'utf-8' | |
72 if encoding not in ('utf-8', 'utf-16'): | |
73 warnings.warn("JSON should be encoded as unicode. " + | |
74 "Given encoding was: %s" % encoding) | |
75 | |
76 base = kwargs.get('base') or sink.absolutize( | |
77 source.getPublicId() or source.getSystemId() or "") | |
78 context_data = kwargs.get('context') | |
79 if not context_data and isinstance(source, URLInputSource): | |
80 context_data = context_from_urlinputsource(source) | |
81 produce_generalized_rdf = kwargs.get('produce_generalized_rdf', False) | |
82 | |
83 data = source_to_json(source) | |
84 | |
85 # NOTE: A ConjunctiveGraph parses into a Graph sink, so no sink will be | |
86 # context_aware. Keeping this check in case RDFLib is changed, or | |
87 # someone passes something context_aware to this parser directly. | |
88 if not sink.context_aware: | |
89 conj_sink = ConjunctiveGraph( | |
90 store=sink.store, | |
91 identifier=sink.identifier) | |
92 else: | |
93 conj_sink = sink | |
94 | |
95 to_rdf(data, conj_sink, base, context_data) | |
96 | |
97 | |
98 def to_rdf(data, dataset, base=None, context_data=None, | |
99 produce_generalized_rdf=False, | |
100 allow_lists_of_lists=None): | |
101 # TODO: docstring w. args and return value | |
102 context=Context(base=base) | |
103 if context_data: | |
104 context.load(context_data) | |
105 parser = Parser(generalized_rdf=produce_generalized_rdf, | |
106 allow_lists_of_lists=allow_lists_of_lists) | |
107 return parser.parse(data, context, dataset) | |
108 | |
109 | |
110 class Parser(object): | |
111 | |
112 def __init__(self, generalized_rdf=False, allow_lists_of_lists=None): | |
113 self.generalized_rdf = generalized_rdf | |
114 self.allow_lists_of_lists = (allow_lists_of_lists | |
115 if allow_lists_of_lists is not None else ALLOW_LISTS_OF_LISTS) | |
116 | |
117 def parse(self, data, context, dataset): | |
118 topcontext = False | |
119 | |
120 if isinstance(data, list): | |
121 resources = data | |
122 elif isinstance(data, dict): | |
123 l_ctx = data.get(CONTEXT) | |
124 if l_ctx: | |
125 context.load(l_ctx, context.base) | |
126 topcontext = True | |
127 resources = data | |
128 if not isinstance(resources, list): | |
129 resources = [resources] | |
130 | |
131 if context.vocab: | |
132 dataset.bind(None, context.vocab) | |
133 for name, term in list(context.terms.items()): | |
134 if term.id and term.id.endswith(VOCAB_DELIMS): | |
135 dataset.bind(name, term.id) | |
136 | |
137 graph = dataset.default_context if dataset.context_aware else dataset | |
138 | |
139 for node in resources: | |
140 self._add_to_graph(dataset, graph, context, node, topcontext) | |
141 | |
142 return graph | |
143 | |
144 | |
145 def _add_to_graph(self, dataset, graph, context, node, topcontext=False): | |
146 if not isinstance(node, dict) or context.get_value(node): | |
147 return | |
148 | |
149 if CONTEXT in node and not topcontext: | |
150 l_ctx = node.get(CONTEXT) | |
151 if l_ctx: | |
152 context = context.subcontext(l_ctx) | |
153 else: | |
154 context = Context(base=context.doc_base) | |
155 | |
156 id_val = context.get_id(node) | |
157 if isinstance(id_val, str): | |
158 subj = self._to_rdf_id(context, id_val) | |
159 else: | |
160 subj = BNode() | |
161 | |
162 if subj is None: | |
163 return None | |
164 | |
165 # NOTE: crude way to signify that this node might represent a named graph | |
166 no_id = id_val is None | |
167 | |
168 for key, obj in list(node.items()): | |
169 if key in (CONTEXT, ID) or key in context.get_keys(ID): | |
170 continue | |
171 if key == REV or key in context.get_keys(REV): | |
172 for rkey, robj in list(obj.items()): | |
173 self._key_to_graph(dataset, graph, context, subj, rkey, robj, | |
174 reverse=True, no_id=no_id) | |
175 else: | |
176 self._key_to_graph(dataset, graph, context, subj, key, obj, | |
177 no_id=no_id) | |
178 | |
179 return subj | |
180 | |
181 | |
182 def _key_to_graph(self, dataset, graph, context, subj, key, obj, | |
183 reverse=False, no_id=False): | |
184 | |
185 if isinstance(obj, list): | |
186 obj_nodes = obj | |
187 else: | |
188 obj_nodes = [obj] | |
189 | |
190 term = context.terms.get(key) | |
191 if term: | |
192 term_id = term.id | |
193 if term.container == LIST: | |
194 obj_nodes = [{LIST: obj_nodes}] | |
195 elif isinstance(obj, dict): | |
196 if term.container == INDEX: | |
197 obj_nodes = [] | |
198 for values in list(obj.values()): | |
199 if not isinstance(values, list): | |
200 obj_nodes.append(values) | |
201 else: | |
202 obj_nodes += values | |
203 elif term.container == LANG: | |
204 obj_nodes = [] | |
205 for lang, values in list(obj.items()): | |
206 if not isinstance(values, list): | |
207 values = [values] | |
208 for v in values: | |
209 obj_nodes.append((v, lang)) | |
210 else: | |
211 term_id = None | |
212 | |
213 if TYPE in (key, term_id): | |
214 term = TYPE_TERM | |
215 elif GRAPH in (key, term_id): | |
216 if dataset.context_aware and not no_id: | |
217 subgraph = dataset.get_context(subj) | |
218 else: | |
219 subgraph = graph | |
220 for onode in obj_nodes: | |
221 self._add_to_graph(dataset, subgraph, context, onode) | |
222 return | |
223 elif SET in (key, term_id): | |
224 for onode in obj_nodes: | |
225 self._add_to_graph(dataset, graph, context, onode) | |
226 return | |
227 | |
228 pred_uri = term.id if term else context.expand(key) | |
229 | |
230 flattened = [] | |
231 for obj in obj_nodes: | |
232 if isinstance(obj, dict): | |
233 objs = context.get_set(obj) | |
234 if objs is not None: | |
235 obj = objs | |
236 if isinstance(obj, list): | |
237 flattened += obj | |
238 continue | |
239 flattened.append(obj) | |
240 obj_nodes = flattened | |
241 | |
242 if not pred_uri: | |
243 return | |
244 | |
245 if term and term.reverse: | |
246 reverse = not reverse | |
247 | |
248 bid = self._get_bnodeid(pred_uri) | |
249 if bid: | |
250 if not self.generalized_rdf: | |
251 return | |
252 pred = BNode(bid) | |
253 else: | |
254 pred = URIRef(pred_uri) | |
255 for obj_node in obj_nodes: | |
256 obj = self._to_object(dataset, graph, context, term, obj_node) | |
257 if obj is None: | |
258 continue | |
259 if reverse: | |
260 graph.add((obj, pred, subj)) | |
261 else: | |
262 graph.add((subj, pred, obj)) | |
263 | |
264 | |
265 def _to_object(self, dataset, graph, context, term, node, inlist=False): | |
266 | |
267 if node is None: | |
268 return | |
269 | |
270 if isinstance(node, tuple): | |
271 value, lang = node | |
272 if value is None: | |
273 return | |
274 return Literal(value, lang=lang) | |
275 | |
276 if isinstance(node, dict): | |
277 node_list = context.get_list(node) | |
278 if node_list is not None: | |
279 if inlist and not self.allow_lists_of_lists: | |
280 return | |
281 listref = self._add_list(dataset, graph, context, term, node_list) | |
282 if listref: | |
283 return listref | |
284 | |
285 else: # expand.. | |
286 if not term or not term.type: | |
287 if isinstance(node, float): | |
288 return Literal(node, datatype=XSD.double) | |
289 if term and term.language is not UNDEF: | |
290 lang = term.language | |
291 else: | |
292 lang = context.language | |
293 return Literal(node, lang=lang) | |
294 else: | |
295 if term.type == ID: | |
296 node = {ID: context.resolve(node)} | |
297 elif term.type == VOCAB: | |
298 node = {ID: context.expand(node) or context.resolve_iri(node)} | |
299 else: | |
300 node = {TYPE: term.type, | |
301 VALUE: node} | |
302 | |
303 lang = context.get_language(node) | |
304 if lang or context.get_key(VALUE) in node or VALUE in node: | |
305 value = context.get_value(node) | |
306 if value is None: | |
307 return None | |
308 datatype = not lang and context.get_type(node) or None | |
309 if lang: | |
310 return Literal(value, lang=lang) | |
311 elif datatype: | |
312 return Literal(value, datatype=context.expand(datatype)) | |
313 else: | |
314 return Literal(value) | |
315 else: | |
316 return self._add_to_graph(dataset, graph, context, node) | |
317 | |
318 | |
319 def _to_rdf_id(self, context, id_val): | |
320 bid = self._get_bnodeid(id_val) | |
321 if bid: | |
322 return BNode(bid) | |
323 else: | |
324 uri = context.resolve(id_val) | |
325 if not self.generalized_rdf and ':' not in uri: | |
326 return None | |
327 return URIRef(uri) | |
328 | |
329 | |
330 def _get_bnodeid(self, ref): | |
331 if not ref.startswith('_:'): | |
332 return | |
333 bid = ref.split('_:', 1)[-1] | |
334 return bid or None | |
335 | |
336 | |
337 def _add_list(self, dataset, graph, context, term, node_list): | |
338 if not isinstance(node_list, list): | |
339 node_list = [node_list] | |
340 first_subj = BNode() | |
341 subj, rest = first_subj, None | |
342 for node in node_list: | |
343 if node is None: | |
344 continue | |
345 if rest: | |
346 graph.add((subj, RDF.rest, rest)) | |
347 subj = rest | |
348 obj = self._to_object(dataset, graph, context, term, node, inlist=True) | |
349 if obj is None: | |
350 continue | |
351 graph.add((subj, RDF.first, obj)) | |
352 rest = BNode() | |
353 if rest: | |
354 graph.add((subj, RDF.rest, RDF.nil)) | |
355 return first_subj | |
356 else: | |
357 return RDF.nil |