Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/rdflib_jsonld/parser.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author | shellac |
---|---|
date | Sat, 02 May 2020 07:14:21 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:26e78fe6e8c4 |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """ | |
3 This parser will interpret a JSON-LD document as an RDF Graph. See: | |
4 | |
5 http://json-ld.org/ | |
6 | |
7 Example usage:: | |
8 | |
9 >>> from rdflib.plugin import register, Parser | |
10 >>> register('json-ld', Parser, 'rdflib_jsonld.parser', 'JsonLDParser') | |
11 | |
12 >>> from rdflib import Graph, URIRef, Literal | |
13 >>> test_json = ''' | |
14 ... { | |
15 ... "@context": { | |
16 ... "dc": "http://purl.org/dc/terms/", | |
17 ... "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", | |
18 ... "rdfs": "http://www.w3.org/2000/01/rdf-schema#" | |
19 ... }, | |
20 ... "@id": "http://example.org/about", | |
21 ... "dc:title": { | |
22 ... "@language": "en", | |
23 ... "@value": "Someone's Homepage" | |
24 ... } | |
25 ... } | |
26 ... ''' | |
27 >>> g = Graph().parse(data=test_json, format='json-ld') | |
28 >>> list(g) == [(URIRef('http://example.org/about'), | |
29 ... URIRef('http://purl.org/dc/terms/title'), | |
30 ... Literal("Someone's Homepage", lang='en'))] | |
31 True | |
32 | |
33 """ | |
34 # NOTE: This code reads the entire JSON object into memory before parsing, but | |
35 # we should consider streaming the input to deal with arbitrarily large graphs. | |
36 | |
37 import warnings | |
38 from rdflib.graph import ConjunctiveGraph | |
39 from rdflib.parser import Parser, URLInputSource | |
40 from rdflib.namespace import RDF, XSD | |
41 from rdflib.term import URIRef, BNode, Literal | |
42 | |
43 from .context import Context, Term, UNDEF | |
44 from .util import source_to_json, VOCAB_DELIMS, context_from_urlinputsource | |
45 from .keys import CONTEXT, GRAPH, ID, INDEX, LANG, LIST, REV, SET, TYPE, VALUE, VOCAB | |
46 | |
47 __all__ = ['JsonLDParser', 'to_rdf'] | |
48 | |
49 | |
50 TYPE_TERM = Term(str(RDF.type), TYPE, VOCAB) | |
51 | |
52 ALLOW_LISTS_OF_LISTS = True # NOTE: Not allowed in JSON-LD 1.0 | |
53 | |
54 | |
55 class JsonLDParser(Parser): | |
56 def __init__(self): | |
57 super(JsonLDParser, self).__init__() | |
58 | |
59 def parse(self, source, sink, **kwargs): | |
60 # TODO: docstring w. args and return value | |
61 encoding = kwargs.get('encoding') or 'utf-8' | |
62 if encoding not in ('utf-8', 'utf-16'): | |
63 warnings.warn("JSON should be encoded as unicode. " + | |
64 "Given encoding was: %s" % encoding) | |
65 | |
66 base = kwargs.get('base') or sink.absolutize( | |
67 source.getPublicId() or source.getSystemId() or "") | |
68 context_data = kwargs.get('context') | |
69 if not context_data and isinstance(source, URLInputSource): | |
70 context_data = context_from_urlinputsource(source) | |
71 produce_generalized_rdf = kwargs.get('produce_generalized_rdf', False) | |
72 | |
73 data = source_to_json(source) | |
74 conj_sink = ConjunctiveGraph( | |
75 store=sink.store, identifier=sink.identifier) | |
76 to_rdf(data, conj_sink, base, context_data) | |
77 | |
78 | |
79 def to_rdf(data, graph, base=None, context_data=None, | |
80 produce_generalized_rdf=False, | |
81 allow_lists_of_lists=None): | |
82 # TODO: docstring w. args and return value | |
83 context=Context(base=base) | |
84 if context_data: | |
85 context.load(context_data) | |
86 parser = Parser(generalized_rdf=produce_generalized_rdf, | |
87 allow_lists_of_lists=allow_lists_of_lists) | |
88 return parser.parse(data, context, graph) | |
89 | |
90 | |
91 class Parser(object): | |
92 | |
93 def __init__(self, generalized_rdf=False, allow_lists_of_lists=None): | |
94 self.generalized_rdf = generalized_rdf | |
95 self.allow_lists_of_lists = (allow_lists_of_lists | |
96 if allow_lists_of_lists is not None else ALLOW_LISTS_OF_LISTS) | |
97 | |
98 def parse(self, data, context, graph): | |
99 topcontext = False | |
100 | |
101 if isinstance(data, list): | |
102 resources = data | |
103 elif isinstance(data, dict): | |
104 l_ctx = data.get(CONTEXT) | |
105 if l_ctx: | |
106 context.load(l_ctx, context.base) | |
107 topcontext = True | |
108 resources = data | |
109 if not isinstance(resources, list): | |
110 resources = [resources] | |
111 | |
112 if context.vocab: | |
113 graph.bind(None, context.vocab) | |
114 for name, term in list(context.terms.items()): | |
115 if term.id and term.id.endswith(VOCAB_DELIMS): | |
116 graph.bind(name, term.id) | |
117 | |
118 for node in resources: | |
119 self._add_to_graph(graph, graph, context, node, topcontext) | |
120 | |
121 return graph | |
122 | |
123 | |
124 def _add_to_graph(self, dataset, graph, context, node, topcontext=False): | |
125 if not isinstance(node, dict) or context.get_value(node): | |
126 return | |
127 | |
128 if CONTEXT in node and not topcontext: | |
129 l_ctx = node.get(CONTEXT) | |
130 if l_ctx: | |
131 context = context.subcontext(l_ctx) | |
132 else: | |
133 context = Context(base=context.doc_base) | |
134 | |
135 id_val = context.get_id(node) | |
136 if isinstance(id_val, str): | |
137 subj = self._to_rdf_id(context, id_val) | |
138 else: | |
139 subj = BNode() | |
140 | |
141 if subj is None: | |
142 return None | |
143 | |
144 for key, obj in list(node.items()): | |
145 if key in (CONTEXT, ID, context.get_key(ID)): | |
146 continue | |
147 if key in (REV, context.get_key(REV)): | |
148 for rkey, robj in list(obj.items()): | |
149 self._key_to_graph(dataset, graph, context, subj, rkey, robj, True) | |
150 else: | |
151 self._key_to_graph(dataset, graph, context, subj, key, obj) | |
152 | |
153 return subj | |
154 | |
155 | |
156 def _key_to_graph(self, dataset, graph, context, subj, key, obj, reverse=False): | |
157 | |
158 if isinstance(obj, list): | |
159 obj_nodes = obj | |
160 else: | |
161 obj_nodes = [obj] | |
162 | |
163 term = context.terms.get(key) | |
164 if term: | |
165 term_id = term.id | |
166 if term.container == LIST: | |
167 obj_nodes = [{LIST: obj_nodes}] | |
168 elif isinstance(obj, dict): | |
169 if term.container == INDEX: | |
170 obj_nodes = [] | |
171 for values in list(obj.values()): | |
172 if not isinstance(values, list): | |
173 obj_nodes.append(values) | |
174 else: | |
175 obj_nodes += values | |
176 elif term.container == LANG: | |
177 obj_nodes = [] | |
178 for lang, values in list(obj.items()): | |
179 if not isinstance(values, list): | |
180 values = [values] | |
181 for v in values: | |
182 obj_nodes.append((v, lang)) | |
183 else: | |
184 term_id = None | |
185 | |
186 if TYPE in (key, term_id): | |
187 term = TYPE_TERM | |
188 elif GRAPH in (key, term_id): | |
189 #assert graph.context_aware | |
190 subgraph = dataset.get_context(subj) | |
191 for onode in obj_nodes: | |
192 self._add_to_graph(dataset, subgraph, context, onode) | |
193 return | |
194 elif SET in (key, term_id): | |
195 for onode in obj_nodes: | |
196 self._add_to_graph(dataset, graph, context, onode) | |
197 return | |
198 | |
199 pred_uri = term.id if term else context.expand(key) | |
200 | |
201 flattened = [] | |
202 for obj in obj_nodes: | |
203 if isinstance(obj, dict): | |
204 objs = context.get_set(obj) | |
205 if objs is not None: | |
206 obj = objs | |
207 if isinstance(obj, list): | |
208 flattened += obj | |
209 continue | |
210 flattened.append(obj) | |
211 obj_nodes = flattened | |
212 | |
213 if not pred_uri: | |
214 return | |
215 | |
216 if term and term.reverse: | |
217 reverse = not reverse | |
218 | |
219 bid = self._get_bnodeid(pred_uri) | |
220 if bid: | |
221 if not self.generalized_rdf: | |
222 return | |
223 pred = BNode(bid) | |
224 else: | |
225 pred = URIRef(pred_uri) | |
226 for obj_node in obj_nodes: | |
227 obj = self._to_object(dataset, graph, context, term, obj_node) | |
228 if obj is None: | |
229 continue | |
230 if reverse: | |
231 graph.add((obj, pred, subj)) | |
232 else: | |
233 graph.add((subj, pred, obj)) | |
234 | |
235 | |
236 def _to_object(self, dataset, graph, context, term, node, inlist=False): | |
237 | |
238 if node is None: | |
239 return | |
240 | |
241 if isinstance(node, tuple): | |
242 value, lang = node | |
243 if value is None: | |
244 return | |
245 return Literal(value, lang=lang) | |
246 | |
247 if isinstance(node, dict): | |
248 node_list = context.get_list(node) | |
249 if node_list is not None: | |
250 if inlist and not self.allow_lists_of_lists: | |
251 return | |
252 listref = self._add_list(dataset, graph, context, term, node_list) | |
253 if listref: | |
254 return listref | |
255 | |
256 else: # expand.. | |
257 if not term or not term.type: | |
258 if isinstance(node, float): | |
259 return Literal(node, datatype=XSD.double) | |
260 if term and term.language is not UNDEF: | |
261 lang = term.language | |
262 else: | |
263 lang = context.language | |
264 return Literal(node, lang=lang) | |
265 else: | |
266 if term.type == ID: | |
267 node = {ID: context.resolve(node)} | |
268 elif term.type == VOCAB: | |
269 node = {ID: context.expand(node) or context.resolve_iri(node)} | |
270 else: | |
271 node = {TYPE: term.type, | |
272 VALUE: node} | |
273 | |
274 lang = context.get_language(node) | |
275 if lang or context.get_key(VALUE) in node or VALUE in node: | |
276 value = context.get_value(node) | |
277 if value is None: | |
278 return None | |
279 datatype = not lang and context.get_type(node) or None | |
280 if lang: | |
281 return Literal(value, lang=lang) | |
282 elif datatype: | |
283 return Literal(value, datatype=context.expand(datatype)) | |
284 else: | |
285 return Literal(value) | |
286 else: | |
287 return self._add_to_graph(dataset, graph, context, node) | |
288 | |
289 | |
290 def _to_rdf_id(self, context, id_val): | |
291 bid = self._get_bnodeid(id_val) | |
292 if bid: | |
293 return BNode(bid) | |
294 else: | |
295 uri = context.resolve(id_val) | |
296 if not self.generalized_rdf and ':' not in uri: | |
297 return None | |
298 return URIRef(uri) | |
299 | |
300 | |
301 def _get_bnodeid(self, ref): | |
302 if not ref.startswith('_:'): | |
303 return | |
304 bid = ref.split('_:', 1)[-1] | |
305 return bid or None | |
306 | |
307 | |
308 def _add_list(self, dataset, graph, context, term, node_list): | |
309 if not isinstance(node_list, list): | |
310 node_list = [node_list] | |
311 first_subj = BNode() | |
312 subj, rest = first_subj, None | |
313 for node in node_list: | |
314 if node is None: | |
315 continue | |
316 if rest: | |
317 graph.add((subj, RDF.rest, rest)) | |
318 subj = rest | |
319 obj = self._to_object(dataset, graph, context, term, node, inlist=True) | |
320 if obj is None: | |
321 continue | |
322 graph.add((subj, RDF.first, obj)) | |
323 rest = BNode() | |
324 if rest: | |
325 graph.add((subj, RDF.rest, RDF.nil)) | |
326 return first_subj | |
327 else: | |
328 return RDF.nil |