comparison planemo/lib/python3.7/site-packages/rdflib/extras/external_graph_libs.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 #!/usr/bin/env python2.7
2 # encoding: utf-8
3
4
5
6
7
8 """Convert (to and) from rdflib graphs to other well known graph libraries.
9
10 Currently the following libraries are supported:
11 - networkx: MultiDiGraph, DiGraph, Graph
12 - graph_tool: Graph
13
14 Doctests in this file are all skipped, as we can't run them conditionally if
15 networkx or graph_tool are available and they would err otherwise.
16 see ../../test/test_extras_external_graph_libs.py for conditional tests
17 """
18
19 import logging
20 logger = logging.getLogger(__name__)
21
22 _identity = lambda x: x
23
24 def _rdflib_to_networkx_graph(
25 graph,
26 nxgraph,
27 calc_weights,
28 edge_attrs,
29 transform_s=_identity, transform_o=_identity):
30 """Helper method for multidigraph, digraph and graph.
31
32 Modifies nxgraph in-place!
33
34 Arguments:
35 graph: an rdflib.Graph.
36 nxgraph: a networkx.Graph/DiGraph/MultiDigraph.
37 calc_weights: If True adds a 'weight' attribute to each edge according
38 to the count of s,p,o triples between s and o, which is meaningful
39 for Graph/DiGraph.
40 edge_attrs: Callable to construct edge data from s, p, o.
41 'triples' attribute is handled specially to be merged.
42 'weight' should not be generated if calc_weights==True.
43 (see invokers below!)
44 transform_s: Callable to transform node generated from s.
45 transform_o: Callable to transform node generated from o.
46 """
47 assert callable(edge_attrs)
48 assert callable(transform_s)
49 assert callable(transform_o)
50 import networkx as nx
51 for s, p, o in graph:
52 ts, to = transform_s(s), transform_o(o) # apply possible transformations
53 data = nxgraph.get_edge_data(ts, to)
54 if data is None or isinstance(nxgraph, nx.MultiDiGraph):
55 # no edge yet, set defaults
56 data = edge_attrs(s, p, o)
57 if calc_weights:
58 data['weight'] = 1
59 nxgraph.add_edge(ts, to, **data)
60 else:
61 # already have an edge, just update attributes
62 if calc_weights:
63 data['weight'] += 1
64 if 'triples' in data:
65 d = edge_attrs(s, p, o)
66 data['triples'].extend(d['triples'])
67
68 def rdflib_to_networkx_multidigraph(
69 graph,
70 edge_attrs=lambda s, p, o: {'key': p},
71 **kwds):
72 """Converts the given graph into a networkx.MultiDiGraph.
73
74 The subjects and objects are the later nodes of the MultiDiGraph.
75 The predicates are used as edge keys (to identify multi-edges).
76
77 Arguments:
78 graph: a rdflib.Graph.
79 edge_attrs: Callable to construct later edge_attributes. It receives
80 3 variables (s, p, o) and should construct a dictionary that is
81 passed to networkx's add_edge(s, o, **attrs) function.
82
83 By default this will include setting the MultiDiGraph key=p here.
84 If you don't want to be able to re-identify the edge later on, you
85 can set this to `lambda s, p, o: {}`. In this case MultiDiGraph's
86 default (increasing ints) will be used.
87
88 Returns:
89 networkx.MultiDiGraph
90
91 >>> from rdflib import Graph, URIRef, Literal
92 >>> g = Graph()
93 >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l')
94 >>> p, q = URIRef('p'), URIRef('q')
95 >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)]
96 >>> for t in edges:
97 ... g.add(t)
98 ...
99 >>> mdg = rdflib_to_networkx_multidigraph(g)
100 >>> len(mdg.edges())
101 4
102 >>> mdg.has_edge(a, b)
103 True
104 >>> mdg.has_edge(a, b, key=p)
105 True
106 >>> mdg.has_edge(a, b, key=q)
107 True
108
109 >>> mdg = rdflib_to_networkx_multidigraph(g, edge_attrs=lambda s,p,o: {})
110 >>> mdg.has_edge(a, b, key=0)
111 True
112 >>> mdg.has_edge(a, b, key=1)
113 True
114 """
115 import networkx as nx
116 mdg = nx.MultiDiGraph()
117 _rdflib_to_networkx_graph(graph, mdg, False, edge_attrs, **kwds)
118 return mdg
119
120 def rdflib_to_networkx_digraph(
121 graph,
122 calc_weights=True,
123 edge_attrs=lambda s, p, o: {'triples': [(s, p, o)]},
124 **kwds):
125 """Converts the given graph into a networkx.DiGraph.
126
127 As an rdflib.Graph() can contain multiple edges between nodes, by default
128 adds the a 'triples' attribute to the single DiGraph edge with a list of
129 all triples between s and o.
130 Also by default calculates the edge weight as the length of triples.
131
132 Args:
133 graph: a rdflib.Graph.
134 calc_weights: If true calculate multi-graph edge-count as edge 'weight'
135 edge_attrs: Callable to construct later edge_attributes. It receives
136 3 variables (s, p, o) and should construct a dictionary that is
137 passed to networkx's add_edge(s, o, **attrs) function.
138
139 By default this will include setting the 'triples' attribute here,
140 which is treated specially by us to be merged. Other attributes of
141 multi-edges will only contain the attributes of the first edge.
142 If you don't want the 'triples' attribute for tracking, set this to
143 `lambda s, p, o: {}`.
144
145 Returns:
146 networkx.DiGraph
147
148 >>> from rdflib import Graph, URIRef, Literal
149 >>> g = Graph()
150 >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l')
151 >>> p, q = URIRef('p'), URIRef('q')
152 >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)]
153 >>> for t in edges:
154 ... g.add(t)
155 ...
156 >>> dg = rdflib_to_networkx_digraph(g)
157 >>> dg[a][b]['weight']
158 2
159 >>> sorted(dg[a][b]['triples']) == [(a, p, b), (a, q, b)]
160 True
161 >>> len(dg.edges())
162 3
163 >>> dg.size()
164 3
165 >>> dg.size(weight='weight')
166 4.0
167
168 >>> dg = rdflib_to_networkx_graph(g, False, edge_attrs=lambda s,p,o:{})
169 >>> 'weight' in dg[a][b]
170 False
171 >>> 'triples' in dg[a][b]
172 False
173 """
174 import networkx as nx
175 dg = nx.DiGraph()
176 _rdflib_to_networkx_graph(graph, dg, calc_weights, edge_attrs, **kwds)
177 return dg
178
179
180 def rdflib_to_networkx_graph(
181 graph,
182 calc_weights=True,
183 edge_attrs=lambda s, p, o: {'triples': [(s, p, o)]},
184 **kwds):
185 """Converts the given graph into a networkx.Graph.
186
187 As an rdflib.Graph() can contain multiple directed edges between nodes, by
188 default adds the a 'triples' attribute to the single DiGraph edge with a
189 list of triples between s and o in graph.
190 Also by default calculates the edge weight as the len(triples).
191
192 Args:
193 graph: a rdflib.Graph.
194 calc_weights: If true calculate multi-graph edge-count as edge 'weight'
195 edge_attrs: Callable to construct later edge_attributes. It receives
196 3 variables (s, p, o) and should construct a dictionary that is
197 passed to networkx's add_edge(s, o, **attrs) function.
198
199 By default this will include setting the 'triples' attribute here,
200 which is treated specially by us to be merged. Other attributes of
201 multi-edges will only contain the attributes of the first edge.
202 If you don't want the 'triples' attribute for tracking, set this to
203 `lambda s, p, o: {}`.
204
205 Returns:
206 networkx.Graph
207
208 >>> from rdflib import Graph, URIRef, Literal
209 >>> g = Graph()
210 >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l')
211 >>> p, q = URIRef('p'), URIRef('q')
212 >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)]
213 >>> for t in edges:
214 ... g.add(t)
215 ...
216 >>> ug = rdflib_to_networkx_graph(g)
217 >>> ug[a][b]['weight']
218 3
219 >>> sorted(ug[a][b]['triples']) == [(a, p, b), (a, q, b), (b, p, a)]
220 True
221 >>> len(ug.edges())
222 2
223 >>> ug.size()
224 2
225 >>> ug.size(weight='weight')
226 4.0
227
228 >>> ug = rdflib_to_networkx_graph(g, False, edge_attrs=lambda s,p,o:{})
229 >>> 'weight' in ug[a][b]
230 False
231 >>> 'triples' in ug[a][b]
232 False
233 """
234 import networkx as nx
235 g = nx.Graph()
236 _rdflib_to_networkx_graph(graph, g, calc_weights, edge_attrs, **kwds)
237 return g
238
239
240 def rdflib_to_graphtool(
241 graph,
242 v_prop_names=[str('term')],
243 e_prop_names=[str('term')],
244 transform_s=lambda s, p, o: {str('term'): s},
245 transform_p=lambda s, p, o: {str('term'): p},
246 transform_o=lambda s, p, o: {str('term'): o},
247 ):
248 """Converts the given graph into a graph_tool.Graph().
249
250 The subjects and objects are the later vertices of the Graph.
251 The predicates become edges.
252
253 Arguments:
254 graph: a rdflib.Graph.
255 v_prop_names: a list of names for the vertex properties. The default is
256 set to ['term'] (see transform_s, transform_o below).
257 e_prop_names: a list of names for the edge properties.
258 transform_s: callable with s, p, o input. Should return a dictionary
259 containing a value for each name in v_prop_names. By default is set
260 to {'term': s} which in combination with v_prop_names = ['term']
261 adds s as 'term' property to the generated vertex for s.
262 transform_p: similar to transform_s, but wrt. e_prop_names. By default
263 returns {'term': p} which adds p as a property to the generated
264 edge between the vertex for s and the vertex for o.
265 transform_o: similar to transform_s.
266
267 Returns:
268 graph_tool.Graph()
269
270 >>> from rdflib import Graph, URIRef, Literal
271 >>> g = Graph()
272 >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l')
273 >>> p, q = URIRef('p'), URIRef('q')
274 >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)]
275 >>> for t in edges:
276 ... g.add(t)
277 ...
278 >>> mdg = rdflib_to_graphtool(g)
279 >>> len(list(mdg.edges()))
280 4
281 >>> from graph_tool import util as gt_util
282 >>> vpterm = mdg.vertex_properties['term']
283 >>> va = gt_util.find_vertex(mdg, vpterm, a)[0]
284 >>> vb = gt_util.find_vertex(mdg, vpterm, b)[0]
285 >>> vl = gt_util.find_vertex(mdg, vpterm, l)[0]
286 >>> (va, vb) in [(e.source(), e.target()) for e in list(mdg.edges())]
287 True
288 >>> epterm = mdg.edge_properties['term']
289 >>> len(list(gt_util.find_edge(mdg, epterm, p))) == 3
290 True
291 >>> len(list(gt_util.find_edge(mdg, epterm, q))) == 1
292 True
293
294 >>> mdg = rdflib_to_graphtool(
295 ... g,
296 ... e_prop_names=[str('name')],
297 ... transform_p=lambda s, p, o: {str('name'): unicode(p)})
298 >>> epterm = mdg.edge_properties['name']
299 >>> len(list(gt_util.find_edge(mdg, epterm, unicode(p)))) == 3
300 True
301 >>> len(list(gt_util.find_edge(mdg, epterm, unicode(q)))) == 1
302 True
303 """
304 import graph_tool as gt
305 g = gt.Graph()
306
307 vprops = [(vpn, g.new_vertex_property('object')) for vpn in v_prop_names]
308 for vpn, vprop in vprops:
309 g.vertex_properties[vpn] = vprop
310 eprops = [(epn, g.new_edge_property('object')) for epn in e_prop_names]
311 for epn, eprop in eprops:
312 g.edge_properties[epn] = eprop
313 node_to_vertex = {}
314 for s, p, o in graph:
315 sv = node_to_vertex.get(s)
316 if sv is None:
317 v = g.add_vertex()
318 node_to_vertex[s] = v
319 tmp_props = transform_s(s, p, o)
320 for vpn, vprop in vprops:
321 vprop[v] = tmp_props[vpn]
322 sv = v
323
324 ov = node_to_vertex.get(o)
325 if ov is None:
326 v = g.add_vertex()
327 node_to_vertex[o] = v
328 tmp_props = transform_o(s, p, o)
329 for vpn, vprop in vprops:
330 vprop[v] = tmp_props[vpn]
331 ov = v
332
333 e = g.add_edge(sv, ov)
334 tmp_props = transform_p(s, p, o)
335 for epn, eprop in eprops:
336 eprop[e] = tmp_props[epn]
337 return g
338
339
340 if __name__ == '__main__':
341 import sys
342 import logging.config
343 logging.basicConfig(level=logging.DEBUG)
344
345 import nose
346 nose.run(argv=[sys.argv[0], sys.argv[0], '-v', '--with-doctest'])