Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/extras/external_graph_libs.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 #!/usr/bin/env python2.7 | |
2 # encoding: utf-8 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 """Convert (to and) from rdflib graphs to other well known graph libraries. | |
9 | |
10 Currently the following libraries are supported: | |
11 - networkx: MultiDiGraph, DiGraph, Graph | |
12 - graph_tool: Graph | |
13 | |
14 Doctests in this file are all skipped, as we can't run them conditionally if | |
15 networkx or graph_tool are available and they would err otherwise. | |
16 see ../../test/test_extras_external_graph_libs.py for conditional tests | |
17 """ | |
18 | |
19 import logging | |
20 logger = logging.getLogger(__name__) | |
21 | |
22 _identity = lambda x: x | |
23 | |
24 def _rdflib_to_networkx_graph( | |
25 graph, | |
26 nxgraph, | |
27 calc_weights, | |
28 edge_attrs, | |
29 transform_s=_identity, transform_o=_identity): | |
30 """Helper method for multidigraph, digraph and graph. | |
31 | |
32 Modifies nxgraph in-place! | |
33 | |
34 Arguments: | |
35 graph: an rdflib.Graph. | |
36 nxgraph: a networkx.Graph/DiGraph/MultiDigraph. | |
37 calc_weights: If True adds a 'weight' attribute to each edge according | |
38 to the count of s,p,o triples between s and o, which is meaningful | |
39 for Graph/DiGraph. | |
40 edge_attrs: Callable to construct edge data from s, p, o. | |
41 'triples' attribute is handled specially to be merged. | |
42 'weight' should not be generated if calc_weights==True. | |
43 (see invokers below!) | |
44 transform_s: Callable to transform node generated from s. | |
45 transform_o: Callable to transform node generated from o. | |
46 """ | |
47 assert callable(edge_attrs) | |
48 assert callable(transform_s) | |
49 assert callable(transform_o) | |
50 import networkx as nx | |
51 for s, p, o in graph: | |
52 ts, to = transform_s(s), transform_o(o) # apply possible transformations | |
53 data = nxgraph.get_edge_data(ts, to) | |
54 if data is None or isinstance(nxgraph, nx.MultiDiGraph): | |
55 # no edge yet, set defaults | |
56 data = edge_attrs(s, p, o) | |
57 if calc_weights: | |
58 data['weight'] = 1 | |
59 nxgraph.add_edge(ts, to, **data) | |
60 else: | |
61 # already have an edge, just update attributes | |
62 if calc_weights: | |
63 data['weight'] += 1 | |
64 if 'triples' in data: | |
65 d = edge_attrs(s, p, o) | |
66 data['triples'].extend(d['triples']) | |
67 | |
68 def rdflib_to_networkx_multidigraph( | |
69 graph, | |
70 edge_attrs=lambda s, p, o: {'key': p}, | |
71 **kwds): | |
72 """Converts the given graph into a networkx.MultiDiGraph. | |
73 | |
74 The subjects and objects are the later nodes of the MultiDiGraph. | |
75 The predicates are used as edge keys (to identify multi-edges). | |
76 | |
77 Arguments: | |
78 graph: a rdflib.Graph. | |
79 edge_attrs: Callable to construct later edge_attributes. It receives | |
80 3 variables (s, p, o) and should construct a dictionary that is | |
81 passed to networkx's add_edge(s, o, **attrs) function. | |
82 | |
83 By default this will include setting the MultiDiGraph key=p here. | |
84 If you don't want to be able to re-identify the edge later on, you | |
85 can set this to `lambda s, p, o: {}`. In this case MultiDiGraph's | |
86 default (increasing ints) will be used. | |
87 | |
88 Returns: | |
89 networkx.MultiDiGraph | |
90 | |
91 >>> from rdflib import Graph, URIRef, Literal | |
92 >>> g = Graph() | |
93 >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l') | |
94 >>> p, q = URIRef('p'), URIRef('q') | |
95 >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] | |
96 >>> for t in edges: | |
97 ... g.add(t) | |
98 ... | |
99 >>> mdg = rdflib_to_networkx_multidigraph(g) | |
100 >>> len(mdg.edges()) | |
101 4 | |
102 >>> mdg.has_edge(a, b) | |
103 True | |
104 >>> mdg.has_edge(a, b, key=p) | |
105 True | |
106 >>> mdg.has_edge(a, b, key=q) | |
107 True | |
108 | |
109 >>> mdg = rdflib_to_networkx_multidigraph(g, edge_attrs=lambda s,p,o: {}) | |
110 >>> mdg.has_edge(a, b, key=0) | |
111 True | |
112 >>> mdg.has_edge(a, b, key=1) | |
113 True | |
114 """ | |
115 import networkx as nx | |
116 mdg = nx.MultiDiGraph() | |
117 _rdflib_to_networkx_graph(graph, mdg, False, edge_attrs, **kwds) | |
118 return mdg | |
119 | |
120 def rdflib_to_networkx_digraph( | |
121 graph, | |
122 calc_weights=True, | |
123 edge_attrs=lambda s, p, o: {'triples': [(s, p, o)]}, | |
124 **kwds): | |
125 """Converts the given graph into a networkx.DiGraph. | |
126 | |
127 As an rdflib.Graph() can contain multiple edges between nodes, by default | |
128 adds the a 'triples' attribute to the single DiGraph edge with a list of | |
129 all triples between s and o. | |
130 Also by default calculates the edge weight as the length of triples. | |
131 | |
132 Args: | |
133 graph: a rdflib.Graph. | |
134 calc_weights: If true calculate multi-graph edge-count as edge 'weight' | |
135 edge_attrs: Callable to construct later edge_attributes. It receives | |
136 3 variables (s, p, o) and should construct a dictionary that is | |
137 passed to networkx's add_edge(s, o, **attrs) function. | |
138 | |
139 By default this will include setting the 'triples' attribute here, | |
140 which is treated specially by us to be merged. Other attributes of | |
141 multi-edges will only contain the attributes of the first edge. | |
142 If you don't want the 'triples' attribute for tracking, set this to | |
143 `lambda s, p, o: {}`. | |
144 | |
145 Returns: | |
146 networkx.DiGraph | |
147 | |
148 >>> from rdflib import Graph, URIRef, Literal | |
149 >>> g = Graph() | |
150 >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l') | |
151 >>> p, q = URIRef('p'), URIRef('q') | |
152 >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] | |
153 >>> for t in edges: | |
154 ... g.add(t) | |
155 ... | |
156 >>> dg = rdflib_to_networkx_digraph(g) | |
157 >>> dg[a][b]['weight'] | |
158 2 | |
159 >>> sorted(dg[a][b]['triples']) == [(a, p, b), (a, q, b)] | |
160 True | |
161 >>> len(dg.edges()) | |
162 3 | |
163 >>> dg.size() | |
164 3 | |
165 >>> dg.size(weight='weight') | |
166 4.0 | |
167 | |
168 >>> dg = rdflib_to_networkx_graph(g, False, edge_attrs=lambda s,p,o:{}) | |
169 >>> 'weight' in dg[a][b] | |
170 False | |
171 >>> 'triples' in dg[a][b] | |
172 False | |
173 """ | |
174 import networkx as nx | |
175 dg = nx.DiGraph() | |
176 _rdflib_to_networkx_graph(graph, dg, calc_weights, edge_attrs, **kwds) | |
177 return dg | |
178 | |
179 | |
180 def rdflib_to_networkx_graph( | |
181 graph, | |
182 calc_weights=True, | |
183 edge_attrs=lambda s, p, o: {'triples': [(s, p, o)]}, | |
184 **kwds): | |
185 """Converts the given graph into a networkx.Graph. | |
186 | |
187 As an rdflib.Graph() can contain multiple directed edges between nodes, by | |
188 default adds the a 'triples' attribute to the single DiGraph edge with a | |
189 list of triples between s and o in graph. | |
190 Also by default calculates the edge weight as the len(triples). | |
191 | |
192 Args: | |
193 graph: a rdflib.Graph. | |
194 calc_weights: If true calculate multi-graph edge-count as edge 'weight' | |
195 edge_attrs: Callable to construct later edge_attributes. It receives | |
196 3 variables (s, p, o) and should construct a dictionary that is | |
197 passed to networkx's add_edge(s, o, **attrs) function. | |
198 | |
199 By default this will include setting the 'triples' attribute here, | |
200 which is treated specially by us to be merged. Other attributes of | |
201 multi-edges will only contain the attributes of the first edge. | |
202 If you don't want the 'triples' attribute for tracking, set this to | |
203 `lambda s, p, o: {}`. | |
204 | |
205 Returns: | |
206 networkx.Graph | |
207 | |
208 >>> from rdflib import Graph, URIRef, Literal | |
209 >>> g = Graph() | |
210 >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l') | |
211 >>> p, q = URIRef('p'), URIRef('q') | |
212 >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] | |
213 >>> for t in edges: | |
214 ... g.add(t) | |
215 ... | |
216 >>> ug = rdflib_to_networkx_graph(g) | |
217 >>> ug[a][b]['weight'] | |
218 3 | |
219 >>> sorted(ug[a][b]['triples']) == [(a, p, b), (a, q, b), (b, p, a)] | |
220 True | |
221 >>> len(ug.edges()) | |
222 2 | |
223 >>> ug.size() | |
224 2 | |
225 >>> ug.size(weight='weight') | |
226 4.0 | |
227 | |
228 >>> ug = rdflib_to_networkx_graph(g, False, edge_attrs=lambda s,p,o:{}) | |
229 >>> 'weight' in ug[a][b] | |
230 False | |
231 >>> 'triples' in ug[a][b] | |
232 False | |
233 """ | |
234 import networkx as nx | |
235 g = nx.Graph() | |
236 _rdflib_to_networkx_graph(graph, g, calc_weights, edge_attrs, **kwds) | |
237 return g | |
238 | |
239 | |
240 def rdflib_to_graphtool( | |
241 graph, | |
242 v_prop_names=[str('term')], | |
243 e_prop_names=[str('term')], | |
244 transform_s=lambda s, p, o: {str('term'): s}, | |
245 transform_p=lambda s, p, o: {str('term'): p}, | |
246 transform_o=lambda s, p, o: {str('term'): o}, | |
247 ): | |
248 """Converts the given graph into a graph_tool.Graph(). | |
249 | |
250 The subjects and objects are the later vertices of the Graph. | |
251 The predicates become edges. | |
252 | |
253 Arguments: | |
254 graph: a rdflib.Graph. | |
255 v_prop_names: a list of names for the vertex properties. The default is | |
256 set to ['term'] (see transform_s, transform_o below). | |
257 e_prop_names: a list of names for the edge properties. | |
258 transform_s: callable with s, p, o input. Should return a dictionary | |
259 containing a value for each name in v_prop_names. By default is set | |
260 to {'term': s} which in combination with v_prop_names = ['term'] | |
261 adds s as 'term' property to the generated vertex for s. | |
262 transform_p: similar to transform_s, but wrt. e_prop_names. By default | |
263 returns {'term': p} which adds p as a property to the generated | |
264 edge between the vertex for s and the vertex for o. | |
265 transform_o: similar to transform_s. | |
266 | |
267 Returns: | |
268 graph_tool.Graph() | |
269 | |
270 >>> from rdflib import Graph, URIRef, Literal | |
271 >>> g = Graph() | |
272 >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l') | |
273 >>> p, q = URIRef('p'), URIRef('q') | |
274 >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] | |
275 >>> for t in edges: | |
276 ... g.add(t) | |
277 ... | |
278 >>> mdg = rdflib_to_graphtool(g) | |
279 >>> len(list(mdg.edges())) | |
280 4 | |
281 >>> from graph_tool import util as gt_util | |
282 >>> vpterm = mdg.vertex_properties['term'] | |
283 >>> va = gt_util.find_vertex(mdg, vpterm, a)[0] | |
284 >>> vb = gt_util.find_vertex(mdg, vpterm, b)[0] | |
285 >>> vl = gt_util.find_vertex(mdg, vpterm, l)[0] | |
286 >>> (va, vb) in [(e.source(), e.target()) for e in list(mdg.edges())] | |
287 True | |
288 >>> epterm = mdg.edge_properties['term'] | |
289 >>> len(list(gt_util.find_edge(mdg, epterm, p))) == 3 | |
290 True | |
291 >>> len(list(gt_util.find_edge(mdg, epterm, q))) == 1 | |
292 True | |
293 | |
294 >>> mdg = rdflib_to_graphtool( | |
295 ... g, | |
296 ... e_prop_names=[str('name')], | |
297 ... transform_p=lambda s, p, o: {str('name'): unicode(p)}) | |
298 >>> epterm = mdg.edge_properties['name'] | |
299 >>> len(list(gt_util.find_edge(mdg, epterm, unicode(p)))) == 3 | |
300 True | |
301 >>> len(list(gt_util.find_edge(mdg, epterm, unicode(q)))) == 1 | |
302 True | |
303 """ | |
304 import graph_tool as gt | |
305 g = gt.Graph() | |
306 | |
307 vprops = [(vpn, g.new_vertex_property('object')) for vpn in v_prop_names] | |
308 for vpn, vprop in vprops: | |
309 g.vertex_properties[vpn] = vprop | |
310 eprops = [(epn, g.new_edge_property('object')) for epn in e_prop_names] | |
311 for epn, eprop in eprops: | |
312 g.edge_properties[epn] = eprop | |
313 node_to_vertex = {} | |
314 for s, p, o in graph: | |
315 sv = node_to_vertex.get(s) | |
316 if sv is None: | |
317 v = g.add_vertex() | |
318 node_to_vertex[s] = v | |
319 tmp_props = transform_s(s, p, o) | |
320 for vpn, vprop in vprops: | |
321 vprop[v] = tmp_props[vpn] | |
322 sv = v | |
323 | |
324 ov = node_to_vertex.get(o) | |
325 if ov is None: | |
326 v = g.add_vertex() | |
327 node_to_vertex[o] = v | |
328 tmp_props = transform_o(s, p, o) | |
329 for vpn, vprop in vprops: | |
330 vprop[v] = tmp_props[vpn] | |
331 ov = v | |
332 | |
333 e = g.add_edge(sv, ov) | |
334 tmp_props = transform_p(s, p, o) | |
335 for epn, eprop in eprops: | |
336 eprop[e] = tmp_props[epn] | |
337 return g | |
338 | |
339 | |
340 if __name__ == '__main__': | |
341 import sys | |
342 import logging.config | |
343 logging.basicConfig(level=logging.DEBUG) | |
344 | |
345 import nose | |
346 nose.run(argv=[sys.argv[0], sys.argv[0], '-v', '--with-doctest']) |