annotate planemo/lib/python3.7/site-packages/prov/graph.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
1 from __future__ import (absolute_import, division, print_function,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
2 unicode_literals)
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
3
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
4 import networkx as nx
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
5 from prov.model import (
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
6 ProvDocument, ProvRecord, ProvElement, ProvEntity, ProvActivity, ProvAgent,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
7 ProvRelation, PROV_ATTR_ENTITY, PROV_ATTR_ACTIVITY, PROV_ATTR_AGENT,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
8 PROV_ATTR_TRIGGER, PROV_ATTR_GENERATED_ENTITY, PROV_ATTR_USED_ENTITY,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
9 PROV_ATTR_DELEGATE, PROV_ATTR_RESPONSIBLE, PROV_ATTR_SPECIFIC_ENTITY,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
10 PROV_ATTR_GENERAL_ENTITY, PROV_ATTR_ALTERNATE1, PROV_ATTR_ALTERNATE2,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
11 PROV_ATTR_COLLECTION, PROV_ATTR_INFORMED, PROV_ATTR_INFORMANT
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
12 )
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
13
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
14 __author__ = 'Trung Dong Huynh'
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
15 __email__ = 'trungdong@donggiang.com'
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
16
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
17
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
18 INFERRED_ELEMENT_CLASS = {
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
19 PROV_ATTR_ENTITY: ProvEntity,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
20 PROV_ATTR_ACTIVITY: ProvActivity,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
21 PROV_ATTR_AGENT: ProvAgent,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
22 PROV_ATTR_TRIGGER: ProvEntity,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
23 PROV_ATTR_GENERATED_ENTITY: ProvEntity,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
24 PROV_ATTR_USED_ENTITY: ProvEntity,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
25 PROV_ATTR_DELEGATE: ProvAgent,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
26 PROV_ATTR_RESPONSIBLE: ProvAgent,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
27 PROV_ATTR_SPECIFIC_ENTITY: ProvEntity,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
28 PROV_ATTR_GENERAL_ENTITY: ProvEntity,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
29 PROV_ATTR_ALTERNATE1: ProvEntity,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
30 PROV_ATTR_ALTERNATE2: ProvEntity,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
31 PROV_ATTR_COLLECTION: ProvEntity,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
32 PROV_ATTR_INFORMED: ProvActivity,
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
33 PROV_ATTR_INFORMANT: ProvActivity
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
34 }
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
35
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
36
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
37 def prov_to_graph(prov_document):
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
38 """
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
39 Convert a :class:`~prov.model.ProvDocument` to a `MultiDiGraph
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
40 <https://networkx.readthedocs.io/en/stable/reference/classes.multigraph.html>`_
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
41 instance of the `NetworkX <https://networkx.github.io/>`_ library.
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
42
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
43 :param prov_document: The :class:`~prov.model.ProvDocument` instance to convert.
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
44 """
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
45 g = nx.MultiDiGraph()
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
46 unified = prov_document.unified()
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
47 node_map = dict()
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
48 for element in unified.get_records(ProvElement):
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
49 g.add_node(element)
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
50 node_map[element.identifier] = element
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
51
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
52 for relation in unified.get_records(ProvRelation):
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
53 # taking the first two elements of a relation
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
54 attr_pair_1, attr_pair_2 = relation.formal_attributes[:2]
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
55 # only need the QualifiedName (i.e. the value of the attribute)
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
56 qn1, qn2 = attr_pair_1[1], attr_pair_2[1]
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
57 if qn1 and qn2: # only proceed if both ends of the relation exist
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
58 try:
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
59 if qn1 not in node_map:
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
60 node_map[qn1] = \
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
61 INFERRED_ELEMENT_CLASS[attr_pair_1[0]](None, qn1)
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
62 if qn2 not in node_map:
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
63 node_map[qn2] = \
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
64 INFERRED_ELEMENT_CLASS[attr_pair_2[0]](None, qn2)
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
65 except KeyError:
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
66 # Unsupported attribute; cannot infer the type of the element
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
67 continue # skipping this relation
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
68 g.add_edge(node_map[qn1], node_map[qn2], relation=relation)
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
69 return g
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
70
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
71
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
72 def graph_to_prov(g):
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
73 """
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
74 Convert a `MultiDiGraph
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
75 <https://networkx.readthedocs.io/en/stable/reference/classes.multigraph.html>`_
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
76 that was previously produced by :func:`prov_to_graph` back to a
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
77 :class:`~prov.model.ProvDocument`.
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
78
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
79 :param g: The graph instance to convert.
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
80 """
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
81 prov_doc = ProvDocument()
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
82 for n in g.nodes_iter():
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
83 if isinstance(n, ProvRecord) and n.bundle is not None:
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
84 prov_doc.add_record(n)
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
85 for _, _, edge_data in g.edges_iter(data=True):
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
86 try:
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
87 relation = edge_data['relation']
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
88 if isinstance(relation, ProvRecord):
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
89 prov_doc.add_record(relation)
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
90 except KeyError:
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
91 pass
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
92
56ad4e20f292 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
93 return prov_doc