Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/prov/serializers/provrdf.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d30785e31577 | 1:56ad4e20f292 |
|---|---|
| 1 """PROV-RDF serializers for ProvDocument | |
| 2 """ | |
| 3 from __future__ import (absolute_import, division, print_function, | |
| 4 unicode_literals) | |
| 5 | |
| 6 import base64 | |
| 7 from collections import OrderedDict | |
| 8 import datetime | |
| 9 import io | |
| 10 | |
| 11 import dateutil.parser | |
| 12 import six | |
| 13 | |
| 14 from rdflib.term import URIRef, BNode | |
| 15 from rdflib.term import Literal as RDFLiteral | |
| 16 from rdflib.graph import ConjunctiveGraph | |
| 17 from rdflib.namespace import RDF, RDFS, XSD | |
| 18 | |
| 19 import prov.model as pm | |
| 20 from prov.constants import ( | |
| 21 PROV, PROV_ID_ATTRIBUTES_MAP, PROV_N_MAP, PROV_BASE_CLS, XSD_QNAME, | |
| 22 PROV_END, PROV_START, PROV_USAGE, PROV_GENERATION, PROV_DERIVATION, PROV_INVALIDATION, | |
| 23 PROV_ALTERNATE, PROV_MENTION, PROV_DELEGATION, PROV_ACTIVITY, PROV_ATTR_STARTTIME, | |
| 24 PROV_ATTR_ENDTIME, PROV_LOCATION, PROV_ATTR_TIME, PROV_ROLE, PROV_COMMUNICATION, | |
| 25 PROV_ATTR_INFORMANT, PROV_ATTR_RESPONSIBLE, PROV_ATTR_TRIGGER, PROV_ATTR_ENDER, | |
| 26 PROV_ATTR_STARTER, PROV_ATTR_USED_ENTITY) | |
| 27 from prov.serializers import Serializer, Error | |
| 28 | |
| 29 | |
| 30 __author__ = 'Satrajit S. Ghosh' | |
| 31 __email__ = 'satra@mit.edu' | |
| 32 | |
| 33 | |
| 34 class ProvRDFException(Error): | |
| 35 pass | |
| 36 | |
| 37 | |
| 38 class AnonymousIDGenerator: | |
| 39 def __init__(self): | |
| 40 self._cache = {} | |
| 41 self._count = 0 | |
| 42 | |
| 43 def get_anon_id(self, obj, local_prefix="id"): | |
| 44 if obj not in self._cache: | |
| 45 self._count += 1 | |
| 46 self._cache[obj] = pm.Identifier( | |
| 47 '_:%s%d' % (local_prefix, self._count) | |
| 48 ).uri | |
| 49 return self._cache[obj] | |
| 50 | |
| 51 | |
| 52 # Reverse map for prov.model.XSD_DATATYPE_PARSERS | |
| 53 LITERAL_XSDTYPE_MAP = { | |
| 54 float: XSD['double'], | |
| 55 int: XSD['int'], | |
| 56 six.text_type: XSD['string'], | |
| 57 # boolean, string values are supported natively by PROV-RDF | |
| 58 # datetime values are converted separately | |
| 59 } | |
| 60 | |
| 61 # Add long on Python 2 | |
| 62 if six.integer_types[-1] not in LITERAL_XSDTYPE_MAP: | |
| 63 LITERAL_XSDTYPE_MAP[six.integer_types[-1]] = XSD['long'] | |
| 64 | |
| 65 | |
| 66 def attr2rdf(attr): | |
| 67 return URIRef(PROV[PROV_ID_ATTRIBUTES_MAP[attr].split('prov:')[1]].uri) | |
| 68 | |
| 69 | |
| 70 def valid_qualified_name(bundle, value, xsd_qname=False): | |
| 71 if value is None: | |
| 72 return None | |
| 73 qualified_name = bundle.valid_qualified_name(value) | |
| 74 return qualified_name if not xsd_qname else XSD_QNAME(qualified_name) | |
| 75 | |
| 76 | |
| 77 class ProvRDFSerializer(Serializer): | |
| 78 """ | |
| 79 PROV-O serializer for :class:`~prov.model.ProvDocument` | |
| 80 """ | |
| 81 | |
| 82 def serialize(self, stream=None, rdf_format='trig', **kwargs): | |
| 83 """ | |
| 84 Serializes a :class:`~prov.model.ProvDocument` instance to | |
| 85 `PROV-O <https://www.w3.org/TR/prov-o/>`_. | |
| 86 | |
| 87 :param stream: Where to save the output. | |
| 88 :param rdf_format: The RDF format of the output, default to TRiG. | |
| 89 """ | |
| 90 container = self.encode_document(self.document) | |
| 91 newargs = kwargs.copy() | |
| 92 newargs['format'] = rdf_format | |
| 93 | |
| 94 if six.PY2: | |
| 95 buf = io.BytesIO() | |
| 96 try: | |
| 97 container.serialize(buf, **newargs) | |
| 98 buf.seek(0, 0) | |
| 99 # Right now this is a bytestream. If the object to stream to is | |
| 100 # a text object is must be decoded. We assume utf-8 here which | |
| 101 # should be fine for almost every case. | |
| 102 if isinstance(stream, io.TextIOBase): | |
| 103 stream.write(buf.read().decode('utf-8')) | |
| 104 else: | |
| 105 stream.write(buf.read()) | |
| 106 finally: | |
| 107 buf.close() | |
| 108 else: | |
| 109 buf = io.BytesIO() | |
| 110 try: | |
| 111 container.serialize(buf, **newargs) | |
| 112 buf.seek(0, 0) | |
| 113 # Right now this is a bytestream. If the object to stream to is | |
| 114 # a text object is must be decoded. We assume utf-8 here which | |
| 115 # should be fine for almost every case. | |
| 116 if isinstance(stream, io.TextIOBase): | |
| 117 stream.write(buf.read().decode('utf-8')) | |
| 118 else: | |
| 119 stream.write(buf.read()) | |
| 120 finally: | |
| 121 buf.close() | |
| 122 | |
| 123 def deserialize(self, stream, rdf_format='trig', **kwargs): | |
| 124 """ | |
| 125 Deserialize from the `PROV-O <https://www.w3.org/TR/prov-o/>`_ | |
| 126 representation to a :class:`~prov.model.ProvDocument` instance. | |
| 127 | |
| 128 :param stream: Input data. | |
| 129 :param rdf_format: The RDF format of the input data, default: TRiG. | |
| 130 """ | |
| 131 newargs = kwargs.copy() | |
| 132 newargs['format'] = rdf_format | |
| 133 container = ConjunctiveGraph() | |
| 134 container.parse(stream, **newargs) | |
| 135 document = pm.ProvDocument() | |
| 136 self.document = document | |
| 137 self.decode_document(container, document) | |
| 138 return document | |
| 139 | |
| 140 def valid_identifier(self, value): | |
| 141 return self.document.valid_qualified_name(value) | |
| 142 | |
| 143 def encode_rdf_representation(self, value): | |
| 144 if isinstance(value, URIRef): | |
| 145 return value | |
| 146 elif isinstance(value, pm.Literal): | |
| 147 return literal_rdf_representation(value) | |
| 148 elif isinstance(value, datetime.datetime): | |
| 149 return RDFLiteral(value.isoformat(), datatype=XSD['dateTime']) | |
| 150 elif isinstance(value, pm.QualifiedName): | |
| 151 return URIRef(value.uri) | |
| 152 elif isinstance(value, pm.Identifier): | |
| 153 return RDFLiteral(value.uri, datatype=XSD['anyURI']) | |
| 154 elif type(value) in LITERAL_XSDTYPE_MAP: | |
| 155 return RDFLiteral(value, datatype=LITERAL_XSDTYPE_MAP[type(value)]) | |
| 156 else: | |
| 157 return RDFLiteral(value) | |
| 158 | |
| 159 def decode_rdf_representation(self, literal, graph): | |
| 160 if isinstance(literal, RDFLiteral): | |
| 161 value = literal.value if literal.value is not None else literal | |
| 162 datatype = literal.datatype if hasattr(literal, 'datatype') else None | |
| 163 langtag = literal.language if hasattr(literal, 'language') else None | |
| 164 if datatype and 'XMLLiteral' in datatype: | |
| 165 value = literal | |
| 166 if datatype and 'base64Binary' in datatype: | |
| 167 value = base64.standard_b64encode(value) | |
| 168 if datatype == XSD['QName']: | |
| 169 return pm.Literal(literal, datatype=XSD_QNAME) | |
| 170 if datatype == XSD['dateTime']: | |
| 171 return dateutil.parser.parse(literal) | |
| 172 if datatype == XSD['gYear']: | |
| 173 return pm.Literal(dateutil.parser.parse(literal).year, | |
| 174 datatype=self.valid_identifier(datatype)) | |
| 175 if datatype == XSD['gYearMonth']: | |
| 176 parsed_info = dateutil.parser.parse(literal) | |
| 177 return pm.Literal('{0}-{1:02d}'.format(parsed_info.year, parsed_info.month), | |
| 178 datatype=self.valid_identifier(datatype)) | |
| 179 else: | |
| 180 # The literal of standard Python types is not converted here | |
| 181 # It will be automatically converted when added to a record by | |
| 182 # _auto_literal_conversion() | |
| 183 return pm.Literal(value, self.valid_identifier(datatype), langtag) | |
| 184 elif isinstance(literal, URIRef): | |
| 185 rval = self.valid_identifier(literal) | |
| 186 if rval is None: | |
| 187 prefix, iri, _ = graph.namespace_manager.compute_qname(literal) | |
| 188 ns = self.document.add_namespace(prefix, iri) | |
| 189 rval = pm.QualifiedName(ns, literal.replace(ns.uri, '')) | |
| 190 return rval | |
| 191 else: | |
| 192 # simple type, just return it | |
| 193 return literal | |
| 194 | |
| 195 def encode_document(self, document): | |
| 196 container = self.encode_container(document) | |
| 197 for item in document.bundles: | |
| 198 # encoding the sub-bundle | |
| 199 bundle = self.encode_container(item, identifier=item.identifier.uri) | |
| 200 container.addN(bundle.quads()) | |
| 201 return container | |
| 202 | |
| 203 def encode_container(self, bundle, container=None, identifier=None): | |
| 204 if container is None: | |
| 205 container = ConjunctiveGraph(identifier=identifier) | |
| 206 nm = container.namespace_manager | |
| 207 nm.bind('prov', PROV.uri) | |
| 208 | |
| 209 for namespace in bundle.namespaces: | |
| 210 container.bind(namespace.prefix, namespace.uri) | |
| 211 | |
| 212 id_generator = AnonymousIDGenerator() | |
| 213 real_or_anon_id = lambda record: record._identifier.uri if \ | |
| 214 record._identifier else id_generator.get_anon_id(record) | |
| 215 | |
| 216 for record in bundle._records: | |
| 217 rec_type = record.get_type() | |
| 218 if hasattr(record, 'identifier') and record.identifier: | |
| 219 identifier = URIRef(six.text_type(real_or_anon_id(record))) | |
| 220 container.add((identifier, RDF.type, URIRef(rec_type.uri))) | |
| 221 else: | |
| 222 identifier = None | |
| 223 if record.attributes: | |
| 224 bnode = None | |
| 225 formal_objects = [] | |
| 226 used_objects = [] | |
| 227 all_attributes = list(record.formal_attributes) + list(record.attributes) | |
| 228 formal_qualifiers = False | |
| 229 for attrid, (attr, value) in enumerate(list(record.formal_attributes)): | |
| 230 if (identifier is not None and value is not None) or \ | |
| 231 (identifier is None and value is not None and attrid > 1): | |
| 232 formal_qualifiers = True | |
| 233 has_qualifiers = len(record.extra_attributes) > 0 or formal_qualifiers | |
| 234 for idx, (attr, value) in enumerate(all_attributes): | |
| 235 if record.is_relation(): | |
| 236 pred = URIRef(PROV[PROV_N_MAP[rec_type]].uri) | |
| 237 # create bnode relation | |
| 238 if bnode is None: | |
| 239 valid_formal_indices = set() | |
| 240 for idx, (key, val) in enumerate(record.formal_attributes): | |
| 241 formal_objects.append(key) | |
| 242 if val: | |
| 243 valid_formal_indices.add(idx) | |
| 244 used_objects = [record.formal_attributes[0][0]] | |
| 245 subj = None | |
| 246 if record.formal_attributes[0][1]: | |
| 247 subj = URIRef(record.formal_attributes[0][1].uri) | |
| 248 if identifier is None and subj is not None: | |
| 249 try: | |
| 250 obj_val = record.formal_attributes[1][1] | |
| 251 obj_attr = URIRef(record.formal_attributes[1][0].uri) | |
| 252 # TODO: Why is obj_attr above not used anywhere? | |
| 253 except IndexError: | |
| 254 obj_val = None | |
| 255 if obj_val and (rec_type not in {PROV_END, | |
| 256 PROV_START, | |
| 257 PROV_USAGE, | |
| 258 PROV_GENERATION, | |
| 259 PROV_DERIVATION, | |
| 260 PROV_INVALIDATION} or | |
| 261 (valid_formal_indices == {0, 1} and | |
| 262 len(record.extra_attributes) == 0)): | |
| 263 used_objects.append(record.formal_attributes[1][0]) | |
| 264 obj_val = self.encode_rdf_representation(obj_val) | |
| 265 if rec_type == PROV_ALTERNATE: | |
| 266 subj, obj_val = obj_val, subj | |
| 267 container.add((subj, pred, obj_val)) | |
| 268 if rec_type == PROV_MENTION: | |
| 269 if record.formal_attributes[2][1]: | |
| 270 used_objects.append(record.formal_attributes[2][0]) | |
| 271 obj_val = self.encode_rdf_representation(record.formal_attributes[2][1]) | |
| 272 container.add((subj, URIRef(PROV['asInBundle'].uri), obj_val)) | |
| 273 has_qualifiers = False | |
| 274 if rec_type in [PROV_ALTERNATE]: | |
| 275 continue | |
| 276 if subj and (has_qualifiers or identifier): | |
| 277 qualifier = rec_type._localpart | |
| 278 rec_uri = rec_type.uri | |
| 279 for attr_name, val in record.extra_attributes: | |
| 280 if attr_name == PROV['type']: | |
| 281 if PROV['Revision'] == val or \ | |
| 282 PROV['Quotation'] == val or \ | |
| 283 PROV['PrimarySource'] == val: | |
| 284 qualifier = val._localpart | |
| 285 rec_uri = val.uri | |
| 286 if identifier is not None: | |
| 287 container.remove((identifier, | |
| 288 RDF.type, | |
| 289 URIRef(rec_type.uri))) | |
| 290 QRole = URIRef(PROV['qualified' + qualifier].uri) | |
| 291 if identifier is not None: | |
| 292 container.add((subj, QRole, identifier)) | |
| 293 else: | |
| 294 bnode = identifier = BNode() | |
| 295 container.add((subj, QRole, identifier)) | |
| 296 container.add( | |
| 297 (identifier, RDF.type, URIRef(rec_uri)) | |
| 298 ) # reset identifier to BNode | |
| 299 if value is not None and attr not in used_objects: | |
| 300 if attr in formal_objects: | |
| 301 pred = attr2rdf(attr) | |
| 302 elif attr == PROV['role']: | |
| 303 pred = URIRef(PROV['hadRole'].uri) | |
| 304 elif attr == PROV['plan']: | |
| 305 pred = URIRef(PROV['hadPlan'].uri) | |
| 306 elif attr == PROV['type']: | |
| 307 pred = RDF.type | |
| 308 elif attr == PROV['label']: | |
| 309 pred = RDFS.label | |
| 310 elif isinstance(attr, pm.QualifiedName): | |
| 311 pred = URIRef(attr.uri) | |
| 312 else: | |
| 313 pred = self.encode_rdf_representation(attr) | |
| 314 if PROV['plan'].uri in pred: | |
| 315 pred = URIRef(PROV['hadPlan'].uri) | |
| 316 if PROV['informant'].uri in pred: | |
| 317 pred = URIRef(PROV['activity'].uri) | |
| 318 if PROV['responsible'].uri in pred: | |
| 319 pred = URIRef(PROV['agent'].uri) | |
| 320 if rec_type == PROV_DELEGATION and PROV['activity'].uri in pred: | |
| 321 pred = URIRef(PROV['hadActivity'].uri) | |
| 322 if (rec_type in [PROV_END, PROV_START] and PROV['trigger'].uri in pred) or\ | |
| 323 (rec_type in [PROV_USAGE] and PROV['used'].uri in pred): | |
| 324 pred = URIRef(PROV['entity'].uri) | |
| 325 if rec_type in [PROV_GENERATION, PROV_END, | |
| 326 PROV_START, PROV_USAGE, | |
| 327 PROV_INVALIDATION]: | |
| 328 if PROV['time'].uri in pred: | |
| 329 pred = URIRef(PROV['atTime'].uri) | |
| 330 if PROV['ender'].uri in pred: | |
| 331 pred = URIRef(PROV['hadActivity'].uri) | |
| 332 if PROV['starter'].uri in pred: | |
| 333 pred = URIRef(PROV['hadActivity'].uri) | |
| 334 if PROV['location'].uri in pred: | |
| 335 pred = URIRef(PROV['atLocation'].uri) | |
| 336 if rec_type in [PROV_ACTIVITY]: | |
| 337 if PROV_ATTR_STARTTIME in pred: | |
| 338 pred = URIRef(PROV['startedAtTime'].uri) | |
| 339 if PROV_ATTR_ENDTIME in pred: | |
| 340 pred = URIRef(PROV['endedAtTime'].uri) | |
| 341 if rec_type == PROV_DERIVATION: | |
| 342 if PROV['activity'].uri in pred: | |
| 343 pred = URIRef(PROV['hadActivity'].uri) | |
| 344 if PROV['generation'].uri in pred: | |
| 345 pred = URIRef(PROV['hadGeneration'].uri) | |
| 346 if PROV['usage'].uri in pred: | |
| 347 pred = URIRef(PROV['hadUsage'].uri) | |
| 348 if PROV['usedEntity'].uri in pred: | |
| 349 pred = URIRef(PROV['entity'].uri) | |
| 350 container.add((identifier, pred, | |
| 351 self.encode_rdf_representation(value))) | |
| 352 continue | |
| 353 if value is None: | |
| 354 continue | |
| 355 if isinstance(value, pm.ProvRecord): | |
| 356 obj = URIRef(six.text_type(real_or_anon_id(value))) | |
| 357 else: | |
| 358 # Assuming this is a datetime value | |
| 359 obj = self.encode_rdf_representation(value) | |
| 360 if attr == PROV['location']: | |
| 361 pred = URIRef(PROV['atLocation'].uri) | |
| 362 if False and isinstance(value, (URIRef, pm.QualifiedName)): | |
| 363 if isinstance(value, pm.QualifiedName): | |
| 364 value = URIRef(value.uri) | |
| 365 container.add((identifier, pred, value)) | |
| 366 else: | |
| 367 container.add((identifier, pred, | |
| 368 self.encode_rdf_representation(obj))) | |
| 369 continue | |
| 370 if attr == PROV['type']: | |
| 371 pred = RDF.type | |
| 372 elif attr == PROV['label']: | |
| 373 pred = RDFS.label | |
| 374 elif attr == PROV_ATTR_STARTTIME: | |
| 375 pred = URIRef(PROV['startedAtTime'].uri) | |
| 376 elif attr == PROV_ATTR_ENDTIME: | |
| 377 pred = URIRef(PROV['endedAtTime'].uri) | |
| 378 else: | |
| 379 pred = self.encode_rdf_representation(attr) | |
| 380 container.add((identifier, pred, obj)) | |
| 381 return container | |
| 382 | |
| 383 def decode_document(self, content, document): | |
| 384 for prefix, url in content.namespaces(): | |
| 385 document.add_namespace(prefix, six.text_type(url)) | |
| 386 if hasattr(content, 'contexts'): | |
| 387 for graph in content.contexts(): | |
| 388 if isinstance(graph.identifier, BNode): | |
| 389 self.decode_container(graph, document) | |
| 390 else: | |
| 391 bundle_id = six.text_type(graph.identifier) | |
| 392 bundle = document.bundle(bundle_id) | |
| 393 self.decode_container(graph, bundle) | |
| 394 else: | |
| 395 self.decode_container(content, document) | |
| 396 | |
| 397 def decode_container(self, graph, bundle): | |
| 398 ids = {} | |
| 399 PROV_CLS_MAP = {} | |
| 400 formal_attributes = {} | |
| 401 unique_sets = {} | |
| 402 for key, val in PROV_BASE_CLS.items(): | |
| 403 PROV_CLS_MAP[key.uri] = PROV_BASE_CLS[key] | |
| 404 relation_mapper = {URIRef(PROV['alternateOf'].uri): 'alternate', | |
| 405 URIRef(PROV['actedOnBehalfOf'].uri): 'delegation', | |
| 406 URIRef(PROV['specializationOf'].uri): 'specialization', | |
| 407 URIRef(PROV['mentionOf'].uri): 'mention', | |
| 408 URIRef(PROV['wasAssociatedWith'].uri): 'association', | |
| 409 URIRef(PROV['wasDerivedFrom'].uri): 'derivation', | |
| 410 URIRef(PROV['wasAttributedTo'].uri): 'attribution', | |
| 411 URIRef(PROV['wasInformedBy'].uri): 'communication', | |
| 412 URIRef(PROV['wasGeneratedBy'].uri): 'generation', | |
| 413 URIRef(PROV['wasInfluencedBy'].uri): 'influence', | |
| 414 URIRef(PROV['wasInvalidatedBy'].uri): 'invalidation', | |
| 415 URIRef(PROV['wasEndedBy'].uri): 'end', | |
| 416 URIRef(PROV['wasStartedBy'].uri): 'start', | |
| 417 URIRef(PROV['hadMember'].uri): 'membership', | |
| 418 URIRef(PROV['used'].uri): 'usage', | |
| 419 } | |
| 420 predicate_mapper = {RDFS.label: pm.PROV['label'], | |
| 421 URIRef(PROV['atLocation'].uri): PROV_LOCATION, | |
| 422 URIRef(PROV['startedAtTime'].uri): PROV_ATTR_STARTTIME, | |
| 423 URIRef(PROV['endedAtTime'].uri): PROV_ATTR_ENDTIME, | |
| 424 URIRef(PROV['atTime'].uri): PROV_ATTR_TIME, | |
| 425 URIRef(PROV['hadRole'].uri): PROV_ROLE, | |
| 426 URIRef(PROV['hadPlan'].uri): pm.PROV_ATTR_PLAN, | |
| 427 URIRef(PROV['hadUsage'].uri): pm.PROV_ATTR_USAGE, | |
| 428 URIRef(PROV['hadGeneration'].uri): pm.PROV_ATTR_GENERATION, | |
| 429 URIRef(PROV['hadActivity'].uri): pm.PROV_ATTR_ACTIVITY, | |
| 430 } | |
| 431 other_attributes = {} | |
| 432 for stmt in graph.triples((None, RDF.type, None)): | |
| 433 id = six.text_type(stmt[0]) | |
| 434 obj = six.text_type(stmt[2]) | |
| 435 if obj in PROV_CLS_MAP: | |
| 436 if not isinstance(stmt[0], BNode) and self.valid_identifier(id) is None: | |
| 437 prefix, iri, _ = graph.namespace_manager.compute_qname(id) | |
| 438 self.document.add_namespace(prefix, iri) | |
| 439 try: | |
| 440 prov_obj = PROV_CLS_MAP[obj] | |
| 441 except AttributeError: | |
| 442 prov_obj = None | |
| 443 add_attr = True | |
| 444 isderivation = pm.PROV['Revision'].uri in stmt[2] or \ | |
| 445 pm.PROV['Quotation'].uri in stmt[2] or \ | |
| 446 pm.PROV['PrimarySource'].uri in stmt[2] | |
| 447 if id not in ids and prov_obj and (prov_obj.uri == obj or | |
| 448 isderivation or | |
| 449 isinstance(stmt[0], BNode)): | |
| 450 ids[id] = prov_obj | |
| 451 klass = pm.PROV_REC_CLS[prov_obj] | |
| 452 formal_attributes[id] = OrderedDict([(key, None) for key in klass.FORMAL_ATTRIBUTES]) | |
| 453 unique_sets[id] = OrderedDict([(key, []) for key in klass.FORMAL_ATTRIBUTES]) | |
| 454 add_attr = False or ((isinstance(stmt[0], BNode) or isderivation) and prov_obj.uri != obj) | |
| 455 if add_attr: | |
| 456 if id not in other_attributes: | |
| 457 other_attributes[id] = [] | |
| 458 obj_formatted = self.decode_rdf_representation(stmt[2], graph) | |
| 459 other_attributes[id].append((pm.PROV['type'], obj_formatted)) | |
| 460 else: | |
| 461 if id not in other_attributes: | |
| 462 other_attributes[id] = [] | |
| 463 obj = self.decode_rdf_representation(stmt[2], graph) | |
| 464 other_attributes[id].append((pm.PROV['type'], obj)) | |
| 465 for id, pred, obj in graph: | |
| 466 id = six.text_type(id) | |
| 467 if id not in other_attributes: | |
| 468 other_attributes[id] = [] | |
| 469 if pred == RDF.type: | |
| 470 continue | |
| 471 if pred in relation_mapper: | |
| 472 if 'alternateOf' in pred: | |
| 473 getattr(bundle, relation_mapper[pred])(obj, id) | |
| 474 elif 'mentionOf' in pred: | |
| 475 mentionBundle = None | |
| 476 for stmt in graph.triples((URIRef(id), URIRef(pm.PROV['asInBundle'].uri), None)): | |
| 477 mentionBundle = stmt[2] | |
| 478 getattr(bundle, relation_mapper[pred])(id, six.text_type(obj), mentionBundle) | |
| 479 elif 'actedOnBehalfOf' in pred or 'wasAssociatedWith' in pred: | |
| 480 qualifier = 'qualified' + relation_mapper[pred].upper()[0] + relation_mapper[pred][1:] | |
| 481 qualifier_bnode = None | |
| 482 for stmt in graph.triples((URIRef(id), URIRef(pm.PROV[qualifier].uri), None)): | |
| 483 qualifier_bnode = stmt[2] | |
| 484 if qualifier_bnode is None: | |
| 485 getattr(bundle, relation_mapper[pred])(id, six.text_type(obj)) | |
| 486 else: | |
| 487 fakeys = list(formal_attributes[six.text_type(qualifier_bnode)].keys()) | |
| 488 formal_attributes[six.text_type(qualifier_bnode)][fakeys[0]] = id | |
| 489 formal_attributes[six.text_type(qualifier_bnode)][fakeys[1]] = six.text_type(obj) | |
| 490 else: | |
| 491 getattr(bundle, relation_mapper[pred])(id, six.text_type(obj)) | |
| 492 elif id in ids: | |
| 493 obj1 = self.decode_rdf_representation(obj, graph) | |
| 494 if obj is not None and obj1 is None: | |
| 495 raise ValueError(('Error transforming', obj)) | |
| 496 pred_new = pred | |
| 497 if pred in predicate_mapper: | |
| 498 pred_new = predicate_mapper[pred] | |
| 499 if ids[id] == PROV_COMMUNICATION and 'activity' in six.text_type(pred_new): | |
| 500 pred_new = PROV_ATTR_INFORMANT | |
| 501 if ids[id] == PROV_DELEGATION and 'agent' in six.text_type(pred_new): | |
| 502 pred_new = PROV_ATTR_RESPONSIBLE | |
| 503 if ids[id] in [PROV_END, PROV_START] and 'entity' in six.text_type(pred_new): | |
| 504 pred_new = PROV_ATTR_TRIGGER | |
| 505 if ids[id] in [PROV_END] and 'activity' in six.text_type(pred_new): | |
| 506 pred_new = PROV_ATTR_ENDER | |
| 507 if ids[id] in [PROV_START] and 'activity' in six.text_type(pred_new): | |
| 508 pred_new = PROV_ATTR_STARTER | |
| 509 if ids[id] == PROV_DERIVATION and 'entity' in six.text_type(pred_new): | |
| 510 pred_new = PROV_ATTR_USED_ENTITY | |
| 511 if six.text_type(pred_new) in [val.uri for val in formal_attributes[id]]: | |
| 512 qname_key = self.valid_identifier(pred_new) | |
| 513 formal_attributes[id][qname_key] = obj1 | |
| 514 unique_sets[id][qname_key].append(obj1) | |
| 515 if len(unique_sets[id][qname_key]) > 1: | |
| 516 formal_attributes[id][qname_key] = None | |
| 517 else: | |
| 518 if 'qualified' not in six.text_type(pred_new) and \ | |
| 519 'asInBundle' not in six.text_type(pred_new): | |
| 520 other_attributes[id].append((six.text_type(pred_new), obj1)) | |
| 521 local_key = six.text_type(obj) | |
| 522 if local_key in ids: | |
| 523 if 'qualified' in pred: | |
| 524 formal_attributes[local_key][list(formal_attributes[local_key].keys())[0]] = id | |
| 525 for id in ids: | |
| 526 attrs = None | |
| 527 if id in other_attributes: | |
| 528 attrs = other_attributes[id] | |
| 529 items_to_walk = [] | |
| 530 for qname, values in unique_sets[id].items(): | |
| 531 if values and len(values) > 1: | |
| 532 items_to_walk.append((qname, values)) | |
| 533 if items_to_walk: | |
| 534 for subset in list(walk(items_to_walk)): | |
| 535 for key, value in subset.items(): | |
| 536 formal_attributes[id][key] = value | |
| 537 bundle.new_record(ids[id], id, formal_attributes[id], attrs) | |
| 538 else: | |
| 539 bundle.new_record(ids[id], id, formal_attributes[id], attrs) | |
| 540 ids[id] = None | |
| 541 if attrs is not None: | |
| 542 other_attributes[id] = [] | |
| 543 for key, val in other_attributes.items(): | |
| 544 if val: | |
| 545 ids[key].add_attributes(val) | |
| 546 | |
| 547 | |
| 548 def walk(children, level=0, path=None, usename=True): | |
| 549 """Generate all the full paths in a tree, as a dict. | |
| 550 | |
| 551 :Example: | |
| 552 | |
| 553 >>> from prov.serializers.provrdf import walk | |
| 554 >>> iterables = [('a', lambda: [1, 2]), ('b', lambda: [3, 4])] | |
| 555 >>> [val['a'] for val in walk(iterables)] | |
| 556 [1, 1, 2, 2] | |
| 557 >>> [val['b'] for val in walk(iterables)] | |
| 558 [3, 4, 3, 4] | |
| 559 """ | |
| 560 # Entry point | |
| 561 if level == 0: | |
| 562 path = {} | |
| 563 # Exit condition | |
| 564 if not children: | |
| 565 yield path.copy() | |
| 566 return | |
| 567 # Tree recursion | |
| 568 head, tail = children[0], children[1:] | |
| 569 name, func = head | |
| 570 for child in func: | |
| 571 # We can use the arg name or the tree level as a key | |
| 572 if usename: | |
| 573 path[name] = child | |
| 574 else: | |
| 575 path[level] = child | |
| 576 # Recurse into the next level | |
| 577 for child_paths in walk(tail, level + 1, path, usename): | |
| 578 yield child_paths | |
| 579 | |
| 580 | |
| 581 def literal_rdf_representation(literal): | |
| 582 value = six.text_type(literal.value) if literal.value else literal | |
| 583 if literal.langtag: | |
| 584 # a language tag can only go with prov:InternationalizedString | |
| 585 return RDFLiteral(value, lang=str(literal.langtag)) | |
| 586 else: | |
| 587 datatype = literal.datatype | |
| 588 if 'base64Binary' in datatype.uri: | |
| 589 if six.PY2: | |
| 590 value = base64.standard_b64encode(value) | |
| 591 else: | |
| 592 value = literal.value.encode() | |
| 593 return RDFLiteral(value, datatype=datatype.uri) |
