Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/prov/serializers/provrdf.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 """PROV-RDF serializers for ProvDocument | |
2 """ | |
3 from __future__ import (absolute_import, division, print_function, | |
4 unicode_literals) | |
5 | |
6 import base64 | |
7 from collections import OrderedDict | |
8 import datetime | |
9 import io | |
10 | |
11 import dateutil.parser | |
12 import six | |
13 | |
14 from rdflib.term import URIRef, BNode | |
15 from rdflib.term import Literal as RDFLiteral | |
16 from rdflib.graph import ConjunctiveGraph | |
17 from rdflib.namespace import RDF, RDFS, XSD | |
18 | |
19 import prov.model as pm | |
20 from prov.constants import ( | |
21 PROV, PROV_ID_ATTRIBUTES_MAP, PROV_N_MAP, PROV_BASE_CLS, XSD_QNAME, | |
22 PROV_END, PROV_START, PROV_USAGE, PROV_GENERATION, PROV_DERIVATION, PROV_INVALIDATION, | |
23 PROV_ALTERNATE, PROV_MENTION, PROV_DELEGATION, PROV_ACTIVITY, PROV_ATTR_STARTTIME, | |
24 PROV_ATTR_ENDTIME, PROV_LOCATION, PROV_ATTR_TIME, PROV_ROLE, PROV_COMMUNICATION, | |
25 PROV_ATTR_INFORMANT, PROV_ATTR_RESPONSIBLE, PROV_ATTR_TRIGGER, PROV_ATTR_ENDER, | |
26 PROV_ATTR_STARTER, PROV_ATTR_USED_ENTITY) | |
27 from prov.serializers import Serializer, Error | |
28 | |
29 | |
30 __author__ = 'Satrajit S. Ghosh' | |
31 __email__ = 'satra@mit.edu' | |
32 | |
33 | |
34 class ProvRDFException(Error): | |
35 pass | |
36 | |
37 | |
38 class AnonymousIDGenerator: | |
39 def __init__(self): | |
40 self._cache = {} | |
41 self._count = 0 | |
42 | |
43 def get_anon_id(self, obj, local_prefix="id"): | |
44 if obj not in self._cache: | |
45 self._count += 1 | |
46 self._cache[obj] = pm.Identifier( | |
47 '_:%s%d' % (local_prefix, self._count) | |
48 ).uri | |
49 return self._cache[obj] | |
50 | |
51 | |
52 # Reverse map for prov.model.XSD_DATATYPE_PARSERS | |
53 LITERAL_XSDTYPE_MAP = { | |
54 float: XSD['double'], | |
55 int: XSD['int'], | |
56 six.text_type: XSD['string'], | |
57 # boolean, string values are supported natively by PROV-RDF | |
58 # datetime values are converted separately | |
59 } | |
60 | |
61 # Add long on Python 2 | |
62 if six.integer_types[-1] not in LITERAL_XSDTYPE_MAP: | |
63 LITERAL_XSDTYPE_MAP[six.integer_types[-1]] = XSD['long'] | |
64 | |
65 | |
66 def attr2rdf(attr): | |
67 return URIRef(PROV[PROV_ID_ATTRIBUTES_MAP[attr].split('prov:')[1]].uri) | |
68 | |
69 | |
70 def valid_qualified_name(bundle, value, xsd_qname=False): | |
71 if value is None: | |
72 return None | |
73 qualified_name = bundle.valid_qualified_name(value) | |
74 return qualified_name if not xsd_qname else XSD_QNAME(qualified_name) | |
75 | |
76 | |
77 class ProvRDFSerializer(Serializer): | |
78 """ | |
79 PROV-O serializer for :class:`~prov.model.ProvDocument` | |
80 """ | |
81 | |
82 def serialize(self, stream=None, rdf_format='trig', **kwargs): | |
83 """ | |
84 Serializes a :class:`~prov.model.ProvDocument` instance to | |
85 `PROV-O <https://www.w3.org/TR/prov-o/>`_. | |
86 | |
87 :param stream: Where to save the output. | |
88 :param rdf_format: The RDF format of the output, default to TRiG. | |
89 """ | |
90 container = self.encode_document(self.document) | |
91 newargs = kwargs.copy() | |
92 newargs['format'] = rdf_format | |
93 | |
94 if six.PY2: | |
95 buf = io.BytesIO() | |
96 try: | |
97 container.serialize(buf, **newargs) | |
98 buf.seek(0, 0) | |
99 # Right now this is a bytestream. If the object to stream to is | |
100 # a text object is must be decoded. We assume utf-8 here which | |
101 # should be fine for almost every case. | |
102 if isinstance(stream, io.TextIOBase): | |
103 stream.write(buf.read().decode('utf-8')) | |
104 else: | |
105 stream.write(buf.read()) | |
106 finally: | |
107 buf.close() | |
108 else: | |
109 buf = io.BytesIO() | |
110 try: | |
111 container.serialize(buf, **newargs) | |
112 buf.seek(0, 0) | |
113 # Right now this is a bytestream. If the object to stream to is | |
114 # a text object is must be decoded. We assume utf-8 here which | |
115 # should be fine for almost every case. | |
116 if isinstance(stream, io.TextIOBase): | |
117 stream.write(buf.read().decode('utf-8')) | |
118 else: | |
119 stream.write(buf.read()) | |
120 finally: | |
121 buf.close() | |
122 | |
123 def deserialize(self, stream, rdf_format='trig', **kwargs): | |
124 """ | |
125 Deserialize from the `PROV-O <https://www.w3.org/TR/prov-o/>`_ | |
126 representation to a :class:`~prov.model.ProvDocument` instance. | |
127 | |
128 :param stream: Input data. | |
129 :param rdf_format: The RDF format of the input data, default: TRiG. | |
130 """ | |
131 newargs = kwargs.copy() | |
132 newargs['format'] = rdf_format | |
133 container = ConjunctiveGraph() | |
134 container.parse(stream, **newargs) | |
135 document = pm.ProvDocument() | |
136 self.document = document | |
137 self.decode_document(container, document) | |
138 return document | |
139 | |
140 def valid_identifier(self, value): | |
141 return self.document.valid_qualified_name(value) | |
142 | |
143 def encode_rdf_representation(self, value): | |
144 if isinstance(value, URIRef): | |
145 return value | |
146 elif isinstance(value, pm.Literal): | |
147 return literal_rdf_representation(value) | |
148 elif isinstance(value, datetime.datetime): | |
149 return RDFLiteral(value.isoformat(), datatype=XSD['dateTime']) | |
150 elif isinstance(value, pm.QualifiedName): | |
151 return URIRef(value.uri) | |
152 elif isinstance(value, pm.Identifier): | |
153 return RDFLiteral(value.uri, datatype=XSD['anyURI']) | |
154 elif type(value) in LITERAL_XSDTYPE_MAP: | |
155 return RDFLiteral(value, datatype=LITERAL_XSDTYPE_MAP[type(value)]) | |
156 else: | |
157 return RDFLiteral(value) | |
158 | |
159 def decode_rdf_representation(self, literal, graph): | |
160 if isinstance(literal, RDFLiteral): | |
161 value = literal.value if literal.value is not None else literal | |
162 datatype = literal.datatype if hasattr(literal, 'datatype') else None | |
163 langtag = literal.language if hasattr(literal, 'language') else None | |
164 if datatype and 'XMLLiteral' in datatype: | |
165 value = literal | |
166 if datatype and 'base64Binary' in datatype: | |
167 value = base64.standard_b64encode(value) | |
168 if datatype == XSD['QName']: | |
169 return pm.Literal(literal, datatype=XSD_QNAME) | |
170 if datatype == XSD['dateTime']: | |
171 return dateutil.parser.parse(literal) | |
172 if datatype == XSD['gYear']: | |
173 return pm.Literal(dateutil.parser.parse(literal).year, | |
174 datatype=self.valid_identifier(datatype)) | |
175 if datatype == XSD['gYearMonth']: | |
176 parsed_info = dateutil.parser.parse(literal) | |
177 return pm.Literal('{0}-{1:02d}'.format(parsed_info.year, parsed_info.month), | |
178 datatype=self.valid_identifier(datatype)) | |
179 else: | |
180 # The literal of standard Python types is not converted here | |
181 # It will be automatically converted when added to a record by | |
182 # _auto_literal_conversion() | |
183 return pm.Literal(value, self.valid_identifier(datatype), langtag) | |
184 elif isinstance(literal, URIRef): | |
185 rval = self.valid_identifier(literal) | |
186 if rval is None: | |
187 prefix, iri, _ = graph.namespace_manager.compute_qname(literal) | |
188 ns = self.document.add_namespace(prefix, iri) | |
189 rval = pm.QualifiedName(ns, literal.replace(ns.uri, '')) | |
190 return rval | |
191 else: | |
192 # simple type, just return it | |
193 return literal | |
194 | |
195 def encode_document(self, document): | |
196 container = self.encode_container(document) | |
197 for item in document.bundles: | |
198 # encoding the sub-bundle | |
199 bundle = self.encode_container(item, identifier=item.identifier.uri) | |
200 container.addN(bundle.quads()) | |
201 return container | |
202 | |
203 def encode_container(self, bundle, container=None, identifier=None): | |
204 if container is None: | |
205 container = ConjunctiveGraph(identifier=identifier) | |
206 nm = container.namespace_manager | |
207 nm.bind('prov', PROV.uri) | |
208 | |
209 for namespace in bundle.namespaces: | |
210 container.bind(namespace.prefix, namespace.uri) | |
211 | |
212 id_generator = AnonymousIDGenerator() | |
213 real_or_anon_id = lambda record: record._identifier.uri if \ | |
214 record._identifier else id_generator.get_anon_id(record) | |
215 | |
216 for record in bundle._records: | |
217 rec_type = record.get_type() | |
218 if hasattr(record, 'identifier') and record.identifier: | |
219 identifier = URIRef(six.text_type(real_or_anon_id(record))) | |
220 container.add((identifier, RDF.type, URIRef(rec_type.uri))) | |
221 else: | |
222 identifier = None | |
223 if record.attributes: | |
224 bnode = None | |
225 formal_objects = [] | |
226 used_objects = [] | |
227 all_attributes = list(record.formal_attributes) + list(record.attributes) | |
228 formal_qualifiers = False | |
229 for attrid, (attr, value) in enumerate(list(record.formal_attributes)): | |
230 if (identifier is not None and value is not None) or \ | |
231 (identifier is None and value is not None and attrid > 1): | |
232 formal_qualifiers = True | |
233 has_qualifiers = len(record.extra_attributes) > 0 or formal_qualifiers | |
234 for idx, (attr, value) in enumerate(all_attributes): | |
235 if record.is_relation(): | |
236 pred = URIRef(PROV[PROV_N_MAP[rec_type]].uri) | |
237 # create bnode relation | |
238 if bnode is None: | |
239 valid_formal_indices = set() | |
240 for idx, (key, val) in enumerate(record.formal_attributes): | |
241 formal_objects.append(key) | |
242 if val: | |
243 valid_formal_indices.add(idx) | |
244 used_objects = [record.formal_attributes[0][0]] | |
245 subj = None | |
246 if record.formal_attributes[0][1]: | |
247 subj = URIRef(record.formal_attributes[0][1].uri) | |
248 if identifier is None and subj is not None: | |
249 try: | |
250 obj_val = record.formal_attributes[1][1] | |
251 obj_attr = URIRef(record.formal_attributes[1][0].uri) | |
252 # TODO: Why is obj_attr above not used anywhere? | |
253 except IndexError: | |
254 obj_val = None | |
255 if obj_val and (rec_type not in {PROV_END, | |
256 PROV_START, | |
257 PROV_USAGE, | |
258 PROV_GENERATION, | |
259 PROV_DERIVATION, | |
260 PROV_INVALIDATION} or | |
261 (valid_formal_indices == {0, 1} and | |
262 len(record.extra_attributes) == 0)): | |
263 used_objects.append(record.formal_attributes[1][0]) | |
264 obj_val = self.encode_rdf_representation(obj_val) | |
265 if rec_type == PROV_ALTERNATE: | |
266 subj, obj_val = obj_val, subj | |
267 container.add((subj, pred, obj_val)) | |
268 if rec_type == PROV_MENTION: | |
269 if record.formal_attributes[2][1]: | |
270 used_objects.append(record.formal_attributes[2][0]) | |
271 obj_val = self.encode_rdf_representation(record.formal_attributes[2][1]) | |
272 container.add((subj, URIRef(PROV['asInBundle'].uri), obj_val)) | |
273 has_qualifiers = False | |
274 if rec_type in [PROV_ALTERNATE]: | |
275 continue | |
276 if subj and (has_qualifiers or identifier): | |
277 qualifier = rec_type._localpart | |
278 rec_uri = rec_type.uri | |
279 for attr_name, val in record.extra_attributes: | |
280 if attr_name == PROV['type']: | |
281 if PROV['Revision'] == val or \ | |
282 PROV['Quotation'] == val or \ | |
283 PROV['PrimarySource'] == val: | |
284 qualifier = val._localpart | |
285 rec_uri = val.uri | |
286 if identifier is not None: | |
287 container.remove((identifier, | |
288 RDF.type, | |
289 URIRef(rec_type.uri))) | |
290 QRole = URIRef(PROV['qualified' + qualifier].uri) | |
291 if identifier is not None: | |
292 container.add((subj, QRole, identifier)) | |
293 else: | |
294 bnode = identifier = BNode() | |
295 container.add((subj, QRole, identifier)) | |
296 container.add( | |
297 (identifier, RDF.type, URIRef(rec_uri)) | |
298 ) # reset identifier to BNode | |
299 if value is not None and attr not in used_objects: | |
300 if attr in formal_objects: | |
301 pred = attr2rdf(attr) | |
302 elif attr == PROV['role']: | |
303 pred = URIRef(PROV['hadRole'].uri) | |
304 elif attr == PROV['plan']: | |
305 pred = URIRef(PROV['hadPlan'].uri) | |
306 elif attr == PROV['type']: | |
307 pred = RDF.type | |
308 elif attr == PROV['label']: | |
309 pred = RDFS.label | |
310 elif isinstance(attr, pm.QualifiedName): | |
311 pred = URIRef(attr.uri) | |
312 else: | |
313 pred = self.encode_rdf_representation(attr) | |
314 if PROV['plan'].uri in pred: | |
315 pred = URIRef(PROV['hadPlan'].uri) | |
316 if PROV['informant'].uri in pred: | |
317 pred = URIRef(PROV['activity'].uri) | |
318 if PROV['responsible'].uri in pred: | |
319 pred = URIRef(PROV['agent'].uri) | |
320 if rec_type == PROV_DELEGATION and PROV['activity'].uri in pred: | |
321 pred = URIRef(PROV['hadActivity'].uri) | |
322 if (rec_type in [PROV_END, PROV_START] and PROV['trigger'].uri in pred) or\ | |
323 (rec_type in [PROV_USAGE] and PROV['used'].uri in pred): | |
324 pred = URIRef(PROV['entity'].uri) | |
325 if rec_type in [PROV_GENERATION, PROV_END, | |
326 PROV_START, PROV_USAGE, | |
327 PROV_INVALIDATION]: | |
328 if PROV['time'].uri in pred: | |
329 pred = URIRef(PROV['atTime'].uri) | |
330 if PROV['ender'].uri in pred: | |
331 pred = URIRef(PROV['hadActivity'].uri) | |
332 if PROV['starter'].uri in pred: | |
333 pred = URIRef(PROV['hadActivity'].uri) | |
334 if PROV['location'].uri in pred: | |
335 pred = URIRef(PROV['atLocation'].uri) | |
336 if rec_type in [PROV_ACTIVITY]: | |
337 if PROV_ATTR_STARTTIME in pred: | |
338 pred = URIRef(PROV['startedAtTime'].uri) | |
339 if PROV_ATTR_ENDTIME in pred: | |
340 pred = URIRef(PROV['endedAtTime'].uri) | |
341 if rec_type == PROV_DERIVATION: | |
342 if PROV['activity'].uri in pred: | |
343 pred = URIRef(PROV['hadActivity'].uri) | |
344 if PROV['generation'].uri in pred: | |
345 pred = URIRef(PROV['hadGeneration'].uri) | |
346 if PROV['usage'].uri in pred: | |
347 pred = URIRef(PROV['hadUsage'].uri) | |
348 if PROV['usedEntity'].uri in pred: | |
349 pred = URIRef(PROV['entity'].uri) | |
350 container.add((identifier, pred, | |
351 self.encode_rdf_representation(value))) | |
352 continue | |
353 if value is None: | |
354 continue | |
355 if isinstance(value, pm.ProvRecord): | |
356 obj = URIRef(six.text_type(real_or_anon_id(value))) | |
357 else: | |
358 # Assuming this is a datetime value | |
359 obj = self.encode_rdf_representation(value) | |
360 if attr == PROV['location']: | |
361 pred = URIRef(PROV['atLocation'].uri) | |
362 if False and isinstance(value, (URIRef, pm.QualifiedName)): | |
363 if isinstance(value, pm.QualifiedName): | |
364 value = URIRef(value.uri) | |
365 container.add((identifier, pred, value)) | |
366 else: | |
367 container.add((identifier, pred, | |
368 self.encode_rdf_representation(obj))) | |
369 continue | |
370 if attr == PROV['type']: | |
371 pred = RDF.type | |
372 elif attr == PROV['label']: | |
373 pred = RDFS.label | |
374 elif attr == PROV_ATTR_STARTTIME: | |
375 pred = URIRef(PROV['startedAtTime'].uri) | |
376 elif attr == PROV_ATTR_ENDTIME: | |
377 pred = URIRef(PROV['endedAtTime'].uri) | |
378 else: | |
379 pred = self.encode_rdf_representation(attr) | |
380 container.add((identifier, pred, obj)) | |
381 return container | |
382 | |
383 def decode_document(self, content, document): | |
384 for prefix, url in content.namespaces(): | |
385 document.add_namespace(prefix, six.text_type(url)) | |
386 if hasattr(content, 'contexts'): | |
387 for graph in content.contexts(): | |
388 if isinstance(graph.identifier, BNode): | |
389 self.decode_container(graph, document) | |
390 else: | |
391 bundle_id = six.text_type(graph.identifier) | |
392 bundle = document.bundle(bundle_id) | |
393 self.decode_container(graph, bundle) | |
394 else: | |
395 self.decode_container(content, document) | |
396 | |
397 def decode_container(self, graph, bundle): | |
398 ids = {} | |
399 PROV_CLS_MAP = {} | |
400 formal_attributes = {} | |
401 unique_sets = {} | |
402 for key, val in PROV_BASE_CLS.items(): | |
403 PROV_CLS_MAP[key.uri] = PROV_BASE_CLS[key] | |
404 relation_mapper = {URIRef(PROV['alternateOf'].uri): 'alternate', | |
405 URIRef(PROV['actedOnBehalfOf'].uri): 'delegation', | |
406 URIRef(PROV['specializationOf'].uri): 'specialization', | |
407 URIRef(PROV['mentionOf'].uri): 'mention', | |
408 URIRef(PROV['wasAssociatedWith'].uri): 'association', | |
409 URIRef(PROV['wasDerivedFrom'].uri): 'derivation', | |
410 URIRef(PROV['wasAttributedTo'].uri): 'attribution', | |
411 URIRef(PROV['wasInformedBy'].uri): 'communication', | |
412 URIRef(PROV['wasGeneratedBy'].uri): 'generation', | |
413 URIRef(PROV['wasInfluencedBy'].uri): 'influence', | |
414 URIRef(PROV['wasInvalidatedBy'].uri): 'invalidation', | |
415 URIRef(PROV['wasEndedBy'].uri): 'end', | |
416 URIRef(PROV['wasStartedBy'].uri): 'start', | |
417 URIRef(PROV['hadMember'].uri): 'membership', | |
418 URIRef(PROV['used'].uri): 'usage', | |
419 } | |
420 predicate_mapper = {RDFS.label: pm.PROV['label'], | |
421 URIRef(PROV['atLocation'].uri): PROV_LOCATION, | |
422 URIRef(PROV['startedAtTime'].uri): PROV_ATTR_STARTTIME, | |
423 URIRef(PROV['endedAtTime'].uri): PROV_ATTR_ENDTIME, | |
424 URIRef(PROV['atTime'].uri): PROV_ATTR_TIME, | |
425 URIRef(PROV['hadRole'].uri): PROV_ROLE, | |
426 URIRef(PROV['hadPlan'].uri): pm.PROV_ATTR_PLAN, | |
427 URIRef(PROV['hadUsage'].uri): pm.PROV_ATTR_USAGE, | |
428 URIRef(PROV['hadGeneration'].uri): pm.PROV_ATTR_GENERATION, | |
429 URIRef(PROV['hadActivity'].uri): pm.PROV_ATTR_ACTIVITY, | |
430 } | |
431 other_attributes = {} | |
432 for stmt in graph.triples((None, RDF.type, None)): | |
433 id = six.text_type(stmt[0]) | |
434 obj = six.text_type(stmt[2]) | |
435 if obj in PROV_CLS_MAP: | |
436 if not isinstance(stmt[0], BNode) and self.valid_identifier(id) is None: | |
437 prefix, iri, _ = graph.namespace_manager.compute_qname(id) | |
438 self.document.add_namespace(prefix, iri) | |
439 try: | |
440 prov_obj = PROV_CLS_MAP[obj] | |
441 except AttributeError: | |
442 prov_obj = None | |
443 add_attr = True | |
444 isderivation = pm.PROV['Revision'].uri in stmt[2] or \ | |
445 pm.PROV['Quotation'].uri in stmt[2] or \ | |
446 pm.PROV['PrimarySource'].uri in stmt[2] | |
447 if id not in ids and prov_obj and (prov_obj.uri == obj or | |
448 isderivation or | |
449 isinstance(stmt[0], BNode)): | |
450 ids[id] = prov_obj | |
451 klass = pm.PROV_REC_CLS[prov_obj] | |
452 formal_attributes[id] = OrderedDict([(key, None) for key in klass.FORMAL_ATTRIBUTES]) | |
453 unique_sets[id] = OrderedDict([(key, []) for key in klass.FORMAL_ATTRIBUTES]) | |
454 add_attr = False or ((isinstance(stmt[0], BNode) or isderivation) and prov_obj.uri != obj) | |
455 if add_attr: | |
456 if id not in other_attributes: | |
457 other_attributes[id] = [] | |
458 obj_formatted = self.decode_rdf_representation(stmt[2], graph) | |
459 other_attributes[id].append((pm.PROV['type'], obj_formatted)) | |
460 else: | |
461 if id not in other_attributes: | |
462 other_attributes[id] = [] | |
463 obj = self.decode_rdf_representation(stmt[2], graph) | |
464 other_attributes[id].append((pm.PROV['type'], obj)) | |
465 for id, pred, obj in graph: | |
466 id = six.text_type(id) | |
467 if id not in other_attributes: | |
468 other_attributes[id] = [] | |
469 if pred == RDF.type: | |
470 continue | |
471 if pred in relation_mapper: | |
472 if 'alternateOf' in pred: | |
473 getattr(bundle, relation_mapper[pred])(obj, id) | |
474 elif 'mentionOf' in pred: | |
475 mentionBundle = None | |
476 for stmt in graph.triples((URIRef(id), URIRef(pm.PROV['asInBundle'].uri), None)): | |
477 mentionBundle = stmt[2] | |
478 getattr(bundle, relation_mapper[pred])(id, six.text_type(obj), mentionBundle) | |
479 elif 'actedOnBehalfOf' in pred or 'wasAssociatedWith' in pred: | |
480 qualifier = 'qualified' + relation_mapper[pred].upper()[0] + relation_mapper[pred][1:] | |
481 qualifier_bnode = None | |
482 for stmt in graph.triples((URIRef(id), URIRef(pm.PROV[qualifier].uri), None)): | |
483 qualifier_bnode = stmt[2] | |
484 if qualifier_bnode is None: | |
485 getattr(bundle, relation_mapper[pred])(id, six.text_type(obj)) | |
486 else: | |
487 fakeys = list(formal_attributes[six.text_type(qualifier_bnode)].keys()) | |
488 formal_attributes[six.text_type(qualifier_bnode)][fakeys[0]] = id | |
489 formal_attributes[six.text_type(qualifier_bnode)][fakeys[1]] = six.text_type(obj) | |
490 else: | |
491 getattr(bundle, relation_mapper[pred])(id, six.text_type(obj)) | |
492 elif id in ids: | |
493 obj1 = self.decode_rdf_representation(obj, graph) | |
494 if obj is not None and obj1 is None: | |
495 raise ValueError(('Error transforming', obj)) | |
496 pred_new = pred | |
497 if pred in predicate_mapper: | |
498 pred_new = predicate_mapper[pred] | |
499 if ids[id] == PROV_COMMUNICATION and 'activity' in six.text_type(pred_new): | |
500 pred_new = PROV_ATTR_INFORMANT | |
501 if ids[id] == PROV_DELEGATION and 'agent' in six.text_type(pred_new): | |
502 pred_new = PROV_ATTR_RESPONSIBLE | |
503 if ids[id] in [PROV_END, PROV_START] and 'entity' in six.text_type(pred_new): | |
504 pred_new = PROV_ATTR_TRIGGER | |
505 if ids[id] in [PROV_END] and 'activity' in six.text_type(pred_new): | |
506 pred_new = PROV_ATTR_ENDER | |
507 if ids[id] in [PROV_START] and 'activity' in six.text_type(pred_new): | |
508 pred_new = PROV_ATTR_STARTER | |
509 if ids[id] == PROV_DERIVATION and 'entity' in six.text_type(pred_new): | |
510 pred_new = PROV_ATTR_USED_ENTITY | |
511 if six.text_type(pred_new) in [val.uri for val in formal_attributes[id]]: | |
512 qname_key = self.valid_identifier(pred_new) | |
513 formal_attributes[id][qname_key] = obj1 | |
514 unique_sets[id][qname_key].append(obj1) | |
515 if len(unique_sets[id][qname_key]) > 1: | |
516 formal_attributes[id][qname_key] = None | |
517 else: | |
518 if 'qualified' not in six.text_type(pred_new) and \ | |
519 'asInBundle' not in six.text_type(pred_new): | |
520 other_attributes[id].append((six.text_type(pred_new), obj1)) | |
521 local_key = six.text_type(obj) | |
522 if local_key in ids: | |
523 if 'qualified' in pred: | |
524 formal_attributes[local_key][list(formal_attributes[local_key].keys())[0]] = id | |
525 for id in ids: | |
526 attrs = None | |
527 if id in other_attributes: | |
528 attrs = other_attributes[id] | |
529 items_to_walk = [] | |
530 for qname, values in unique_sets[id].items(): | |
531 if values and len(values) > 1: | |
532 items_to_walk.append((qname, values)) | |
533 if items_to_walk: | |
534 for subset in list(walk(items_to_walk)): | |
535 for key, value in subset.items(): | |
536 formal_attributes[id][key] = value | |
537 bundle.new_record(ids[id], id, formal_attributes[id], attrs) | |
538 else: | |
539 bundle.new_record(ids[id], id, formal_attributes[id], attrs) | |
540 ids[id] = None | |
541 if attrs is not None: | |
542 other_attributes[id] = [] | |
543 for key, val in other_attributes.items(): | |
544 if val: | |
545 ids[key].add_attributes(val) | |
546 | |
547 | |
548 def walk(children, level=0, path=None, usename=True): | |
549 """Generate all the full paths in a tree, as a dict. | |
550 | |
551 :Example: | |
552 | |
553 >>> from prov.serializers.provrdf import walk | |
554 >>> iterables = [('a', lambda: [1, 2]), ('b', lambda: [3, 4])] | |
555 >>> [val['a'] for val in walk(iterables)] | |
556 [1, 1, 2, 2] | |
557 >>> [val['b'] for val in walk(iterables)] | |
558 [3, 4, 3, 4] | |
559 """ | |
560 # Entry point | |
561 if level == 0: | |
562 path = {} | |
563 # Exit condition | |
564 if not children: | |
565 yield path.copy() | |
566 return | |
567 # Tree recursion | |
568 head, tail = children[0], children[1:] | |
569 name, func = head | |
570 for child in func: | |
571 # We can use the arg name or the tree level as a key | |
572 if usename: | |
573 path[name] = child | |
574 else: | |
575 path[level] = child | |
576 # Recurse into the next level | |
577 for child_paths in walk(tail, level + 1, path, usename): | |
578 yield child_paths | |
579 | |
580 | |
581 def literal_rdf_representation(literal): | |
582 value = six.text_type(literal.value) if literal.value else literal | |
583 if literal.langtag: | |
584 # a language tag can only go with prov:InternationalizedString | |
585 return RDFLiteral(value, lang=str(literal.langtag)) | |
586 else: | |
587 datatype = literal.datatype | |
588 if 'base64Binary' in datatype.uri: | |
589 if six.PY2: | |
590 value = base64.standard_b64encode(value) | |
591 else: | |
592 value = literal.value.encode() | |
593 return RDFLiteral(value, datatype=datatype.uri) |