Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/rdflib_jsonld/parser.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac |
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4f3585e2f14b |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 """ | |
| 3 This parser will interpret a JSON-LD document as an RDF Graph. See: | |
| 4 | |
| 5 http://json-ld.org/ | |
| 6 | |
| 7 Example usage:: | |
| 8 | |
| 9 >>> from rdflib.plugin import register, Parser | |
| 10 >>> register('json-ld', Parser, 'rdflib_jsonld.parser', 'JsonLDParser') | |
| 11 | |
| 12 >>> from rdflib import Graph, URIRef, Literal | |
| 13 >>> test_json = ''' | |
| 14 ... { | |
| 15 ... "@context": { | |
| 16 ... "dc": "http://purl.org/dc/terms/", | |
| 17 ... "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", | |
| 18 ... "rdfs": "http://www.w3.org/2000/01/rdf-schema#" | |
| 19 ... }, | |
| 20 ... "@id": "http://example.org/about", | |
| 21 ... "dc:title": { | |
| 22 ... "@language": "en", | |
| 23 ... "@value": "Someone's Homepage" | |
| 24 ... } | |
| 25 ... } | |
| 26 ... ''' | |
| 27 >>> g = Graph().parse(data=test_json, format='json-ld') | |
| 28 >>> list(g) == [(URIRef('http://example.org/about'), | |
| 29 ... URIRef('http://purl.org/dc/terms/title'), | |
| 30 ... Literal("Someone's Homepage", lang='en'))] | |
| 31 True | |
| 32 | |
| 33 """ | |
| 34 # NOTE: This code reads the entire JSON object into memory before parsing, but | |
| 35 # we should consider streaming the input to deal with arbitrarily large graphs. | |
| 36 | |
| 37 import warnings | |
| 38 from rdflib.graph import ConjunctiveGraph | |
| 39 from rdflib.parser import Parser, URLInputSource | |
| 40 from rdflib.namespace import RDF, XSD | |
| 41 from rdflib.term import URIRef, BNode, Literal | |
| 42 | |
| 43 from ._compat import str, str | |
| 44 from .context import Context, Term, UNDEF | |
| 45 from .util import source_to_json, VOCAB_DELIMS, context_from_urlinputsource | |
| 46 from .keys import CONTEXT, GRAPH, ID, INDEX, LANG, LIST, REV, SET, TYPE, VALUE, VOCAB | |
| 47 | |
| 48 __all__ = ['JsonLDParser', 'to_rdf'] | |
| 49 | |
| 50 | |
| 51 # Add jsonld suffix so RDFLib can guess format from file name | |
| 52 try: | |
| 53 from rdflib.util import SUFFIX_FORMAT_MAP | |
| 54 if 'jsonld' not in SUFFIX_FORMAT_MAP: | |
| 55 SUFFIX_FORMAT_MAP['jsonld'] = 'application/ld+json' | |
| 56 except ImportError: | |
| 57 pass | |
| 58 | |
| 59 | |
| 60 TYPE_TERM = Term(str(RDF.type), TYPE, VOCAB) | |
| 61 | |
| 62 ALLOW_LISTS_OF_LISTS = True # NOTE: Not allowed in JSON-LD 1.0 | |
| 63 | |
| 64 | |
| 65 class JsonLDParser(Parser): | |
| 66 def __init__(self): | |
| 67 super(JsonLDParser, self).__init__() | |
| 68 | |
| 69 def parse(self, source, sink, **kwargs): | |
| 70 # TODO: docstring w. args and return value | |
| 71 encoding = kwargs.get('encoding') or 'utf-8' | |
| 72 if encoding not in ('utf-8', 'utf-16'): | |
| 73 warnings.warn("JSON should be encoded as unicode. " + | |
| 74 "Given encoding was: %s" % encoding) | |
| 75 | |
| 76 base = kwargs.get('base') or sink.absolutize( | |
| 77 source.getPublicId() or source.getSystemId() or "") | |
| 78 context_data = kwargs.get('context') | |
| 79 if not context_data and isinstance(source, URLInputSource): | |
| 80 context_data = context_from_urlinputsource(source) | |
| 81 produce_generalized_rdf = kwargs.get('produce_generalized_rdf', False) | |
| 82 | |
| 83 data = source_to_json(source) | |
| 84 | |
| 85 # NOTE: A ConjunctiveGraph parses into a Graph sink, so no sink will be | |
| 86 # context_aware. Keeping this check in case RDFLib is changed, or | |
| 87 # someone passes something context_aware to this parser directly. | |
| 88 if not sink.context_aware: | |
| 89 conj_sink = ConjunctiveGraph( | |
| 90 store=sink.store, | |
| 91 identifier=sink.identifier) | |
| 92 else: | |
| 93 conj_sink = sink | |
| 94 | |
| 95 to_rdf(data, conj_sink, base, context_data) | |
| 96 | |
| 97 | |
| 98 def to_rdf(data, dataset, base=None, context_data=None, | |
| 99 produce_generalized_rdf=False, | |
| 100 allow_lists_of_lists=None): | |
| 101 # TODO: docstring w. args and return value | |
| 102 context=Context(base=base) | |
| 103 if context_data: | |
| 104 context.load(context_data) | |
| 105 parser = Parser(generalized_rdf=produce_generalized_rdf, | |
| 106 allow_lists_of_lists=allow_lists_of_lists) | |
| 107 return parser.parse(data, context, dataset) | |
| 108 | |
| 109 | |
| 110 class Parser(object): | |
| 111 | |
| 112 def __init__(self, generalized_rdf=False, allow_lists_of_lists=None): | |
| 113 self.generalized_rdf = generalized_rdf | |
| 114 self.allow_lists_of_lists = (allow_lists_of_lists | |
| 115 if allow_lists_of_lists is not None else ALLOW_LISTS_OF_LISTS) | |
| 116 | |
| 117 def parse(self, data, context, dataset): | |
| 118 topcontext = False | |
| 119 | |
| 120 if isinstance(data, list): | |
| 121 resources = data | |
| 122 elif isinstance(data, dict): | |
| 123 l_ctx = data.get(CONTEXT) | |
| 124 if l_ctx: | |
| 125 context.load(l_ctx, context.base) | |
| 126 topcontext = True | |
| 127 resources = data | |
| 128 if not isinstance(resources, list): | |
| 129 resources = [resources] | |
| 130 | |
| 131 if context.vocab: | |
| 132 dataset.bind(None, context.vocab) | |
| 133 for name, term in list(context.terms.items()): | |
| 134 if term.id and term.id.endswith(VOCAB_DELIMS): | |
| 135 dataset.bind(name, term.id) | |
| 136 | |
| 137 graph = dataset.default_context if dataset.context_aware else dataset | |
| 138 | |
| 139 for node in resources: | |
| 140 self._add_to_graph(dataset, graph, context, node, topcontext) | |
| 141 | |
| 142 return graph | |
| 143 | |
| 144 | |
| 145 def _add_to_graph(self, dataset, graph, context, node, topcontext=False): | |
| 146 if not isinstance(node, dict) or context.get_value(node): | |
| 147 return | |
| 148 | |
| 149 if CONTEXT in node and not topcontext: | |
| 150 l_ctx = node.get(CONTEXT) | |
| 151 if l_ctx: | |
| 152 context = context.subcontext(l_ctx) | |
| 153 else: | |
| 154 context = Context(base=context.doc_base) | |
| 155 | |
| 156 id_val = context.get_id(node) | |
| 157 if isinstance(id_val, str): | |
| 158 subj = self._to_rdf_id(context, id_val) | |
| 159 else: | |
| 160 subj = BNode() | |
| 161 | |
| 162 if subj is None: | |
| 163 return None | |
| 164 | |
| 165 # NOTE: crude way to signify that this node might represent a named graph | |
| 166 no_id = id_val is None | |
| 167 | |
| 168 for key, obj in list(node.items()): | |
| 169 if key in (CONTEXT, ID) or key in context.get_keys(ID): | |
| 170 continue | |
| 171 if key == REV or key in context.get_keys(REV): | |
| 172 for rkey, robj in list(obj.items()): | |
| 173 self._key_to_graph(dataset, graph, context, subj, rkey, robj, | |
| 174 reverse=True, no_id=no_id) | |
| 175 else: | |
| 176 self._key_to_graph(dataset, graph, context, subj, key, obj, | |
| 177 no_id=no_id) | |
| 178 | |
| 179 return subj | |
| 180 | |
| 181 | |
| 182 def _key_to_graph(self, dataset, graph, context, subj, key, obj, | |
| 183 reverse=False, no_id=False): | |
| 184 | |
| 185 if isinstance(obj, list): | |
| 186 obj_nodes = obj | |
| 187 else: | |
| 188 obj_nodes = [obj] | |
| 189 | |
| 190 term = context.terms.get(key) | |
| 191 if term: | |
| 192 term_id = term.id | |
| 193 if term.container == LIST: | |
| 194 obj_nodes = [{LIST: obj_nodes}] | |
| 195 elif isinstance(obj, dict): | |
| 196 if term.container == INDEX: | |
| 197 obj_nodes = [] | |
| 198 for values in list(obj.values()): | |
| 199 if not isinstance(values, list): | |
| 200 obj_nodes.append(values) | |
| 201 else: | |
| 202 obj_nodes += values | |
| 203 elif term.container == LANG: | |
| 204 obj_nodes = [] | |
| 205 for lang, values in list(obj.items()): | |
| 206 if not isinstance(values, list): | |
| 207 values = [values] | |
| 208 for v in values: | |
| 209 obj_nodes.append((v, lang)) | |
| 210 else: | |
| 211 term_id = None | |
| 212 | |
| 213 if TYPE in (key, term_id): | |
| 214 term = TYPE_TERM | |
| 215 elif GRAPH in (key, term_id): | |
| 216 if dataset.context_aware and not no_id: | |
| 217 subgraph = dataset.get_context(subj) | |
| 218 else: | |
| 219 subgraph = graph | |
| 220 for onode in obj_nodes: | |
| 221 self._add_to_graph(dataset, subgraph, context, onode) | |
| 222 return | |
| 223 elif SET in (key, term_id): | |
| 224 for onode in obj_nodes: | |
| 225 self._add_to_graph(dataset, graph, context, onode) | |
| 226 return | |
| 227 | |
| 228 pred_uri = term.id if term else context.expand(key) | |
| 229 | |
| 230 flattened = [] | |
| 231 for obj in obj_nodes: | |
| 232 if isinstance(obj, dict): | |
| 233 objs = context.get_set(obj) | |
| 234 if objs is not None: | |
| 235 obj = objs | |
| 236 if isinstance(obj, list): | |
| 237 flattened += obj | |
| 238 continue | |
| 239 flattened.append(obj) | |
| 240 obj_nodes = flattened | |
| 241 | |
| 242 if not pred_uri: | |
| 243 return | |
| 244 | |
| 245 if term and term.reverse: | |
| 246 reverse = not reverse | |
| 247 | |
| 248 bid = self._get_bnodeid(pred_uri) | |
| 249 if bid: | |
| 250 if not self.generalized_rdf: | |
| 251 return | |
| 252 pred = BNode(bid) | |
| 253 else: | |
| 254 pred = URIRef(pred_uri) | |
| 255 for obj_node in obj_nodes: | |
| 256 obj = self._to_object(dataset, graph, context, term, obj_node) | |
| 257 if obj is None: | |
| 258 continue | |
| 259 if reverse: | |
| 260 graph.add((obj, pred, subj)) | |
| 261 else: | |
| 262 graph.add((subj, pred, obj)) | |
| 263 | |
| 264 | |
| 265 def _to_object(self, dataset, graph, context, term, node, inlist=False): | |
| 266 | |
| 267 if node is None: | |
| 268 return | |
| 269 | |
| 270 if isinstance(node, tuple): | |
| 271 value, lang = node | |
| 272 if value is None: | |
| 273 return | |
| 274 return Literal(value, lang=lang) | |
| 275 | |
| 276 if isinstance(node, dict): | |
| 277 node_list = context.get_list(node) | |
| 278 if node_list is not None: | |
| 279 if inlist and not self.allow_lists_of_lists: | |
| 280 return | |
| 281 listref = self._add_list(dataset, graph, context, term, node_list) | |
| 282 if listref: | |
| 283 return listref | |
| 284 | |
| 285 else: # expand.. | |
| 286 if not term or not term.type: | |
| 287 if isinstance(node, float): | |
| 288 return Literal(node, datatype=XSD.double) | |
| 289 if term and term.language is not UNDEF: | |
| 290 lang = term.language | |
| 291 else: | |
| 292 lang = context.language | |
| 293 return Literal(node, lang=lang) | |
| 294 else: | |
| 295 if term.type == ID: | |
| 296 node = {ID: context.resolve(node)} | |
| 297 elif term.type == VOCAB: | |
| 298 node = {ID: context.expand(node) or context.resolve_iri(node)} | |
| 299 else: | |
| 300 node = {TYPE: term.type, | |
| 301 VALUE: node} | |
| 302 | |
| 303 lang = context.get_language(node) | |
| 304 if lang or context.get_key(VALUE) in node or VALUE in node: | |
| 305 value = context.get_value(node) | |
| 306 if value is None: | |
| 307 return None | |
| 308 datatype = not lang and context.get_type(node) or None | |
| 309 if lang: | |
| 310 return Literal(value, lang=lang) | |
| 311 elif datatype: | |
| 312 return Literal(value, datatype=context.expand(datatype)) | |
| 313 else: | |
| 314 return Literal(value) | |
| 315 else: | |
| 316 return self._add_to_graph(dataset, graph, context, node) | |
| 317 | |
| 318 | |
| 319 def _to_rdf_id(self, context, id_val): | |
| 320 bid = self._get_bnodeid(id_val) | |
| 321 if bid: | |
| 322 return BNode(bid) | |
| 323 else: | |
| 324 uri = context.resolve(id_val) | |
| 325 if not self.generalized_rdf and ':' not in uri: | |
| 326 return None | |
| 327 return URIRef(uri) | |
| 328 | |
| 329 | |
| 330 def _get_bnodeid(self, ref): | |
| 331 if not ref.startswith('_:'): | |
| 332 return | |
| 333 bid = ref.split('_:', 1)[-1] | |
| 334 return bid or None | |
| 335 | |
| 336 | |
| 337 def _add_list(self, dataset, graph, context, term, node_list): | |
| 338 if not isinstance(node_list, list): | |
| 339 node_list = [node_list] | |
| 340 first_subj = BNode() | |
| 341 subj, rest = first_subj, None | |
| 342 for node in node_list: | |
| 343 if node is None: | |
| 344 continue | |
| 345 if rest: | |
| 346 graph.add((subj, RDF.rest, rest)) | |
| 347 subj = rest | |
| 348 obj = self._to_object(dataset, graph, context, term, node, inlist=True) | |
| 349 if obj is None: | |
| 350 continue | |
| 351 graph.add((subj, RDF.first, obj)) | |
| 352 rest = BNode() | |
| 353 if rest: | |
| 354 graph.add((subj, RDF.rest, RDF.nil)) | |
| 355 return first_subj | |
| 356 else: | |
| 357 return RDF.nil |
