Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/extras/external_graph_libs.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d30785e31577 | 1:56ad4e20f292 |
|---|---|
| 1 #!/usr/bin/env python2.7 | |
| 2 # encoding: utf-8 | |
| 3 | |
| 4 | |
| 5 | |
| 6 | |
| 7 | |
| 8 """Convert (to and) from rdflib graphs to other well known graph libraries. | |
| 9 | |
| 10 Currently the following libraries are supported: | |
| 11 - networkx: MultiDiGraph, DiGraph, Graph | |
| 12 - graph_tool: Graph | |
| 13 | |
| 14 Doctests in this file are all skipped, as we can't run them conditionally if | |
| 15 networkx or graph_tool are available and they would err otherwise. | |
| 16 see ../../test/test_extras_external_graph_libs.py for conditional tests | |
| 17 """ | |
| 18 | |
| 19 import logging | |
| 20 logger = logging.getLogger(__name__) | |
| 21 | |
| 22 _identity = lambda x: x | |
| 23 | |
| 24 def _rdflib_to_networkx_graph( | |
| 25 graph, | |
| 26 nxgraph, | |
| 27 calc_weights, | |
| 28 edge_attrs, | |
| 29 transform_s=_identity, transform_o=_identity): | |
| 30 """Helper method for multidigraph, digraph and graph. | |
| 31 | |
| 32 Modifies nxgraph in-place! | |
| 33 | |
| 34 Arguments: | |
| 35 graph: an rdflib.Graph. | |
| 36 nxgraph: a networkx.Graph/DiGraph/MultiDigraph. | |
| 37 calc_weights: If True adds a 'weight' attribute to each edge according | |
| 38 to the count of s,p,o triples between s and o, which is meaningful | |
| 39 for Graph/DiGraph. | |
| 40 edge_attrs: Callable to construct edge data from s, p, o. | |
| 41 'triples' attribute is handled specially to be merged. | |
| 42 'weight' should not be generated if calc_weights==True. | |
| 43 (see invokers below!) | |
| 44 transform_s: Callable to transform node generated from s. | |
| 45 transform_o: Callable to transform node generated from o. | |
| 46 """ | |
| 47 assert callable(edge_attrs) | |
| 48 assert callable(transform_s) | |
| 49 assert callable(transform_o) | |
| 50 import networkx as nx | |
| 51 for s, p, o in graph: | |
| 52 ts, to = transform_s(s), transform_o(o) # apply possible transformations | |
| 53 data = nxgraph.get_edge_data(ts, to) | |
| 54 if data is None or isinstance(nxgraph, nx.MultiDiGraph): | |
| 55 # no edge yet, set defaults | |
| 56 data = edge_attrs(s, p, o) | |
| 57 if calc_weights: | |
| 58 data['weight'] = 1 | |
| 59 nxgraph.add_edge(ts, to, **data) | |
| 60 else: | |
| 61 # already have an edge, just update attributes | |
| 62 if calc_weights: | |
| 63 data['weight'] += 1 | |
| 64 if 'triples' in data: | |
| 65 d = edge_attrs(s, p, o) | |
| 66 data['triples'].extend(d['triples']) | |
| 67 | |
| 68 def rdflib_to_networkx_multidigraph( | |
| 69 graph, | |
| 70 edge_attrs=lambda s, p, o: {'key': p}, | |
| 71 **kwds): | |
| 72 """Converts the given graph into a networkx.MultiDiGraph. | |
| 73 | |
| 74 The subjects and objects are the later nodes of the MultiDiGraph. | |
| 75 The predicates are used as edge keys (to identify multi-edges). | |
| 76 | |
| 77 Arguments: | |
| 78 graph: a rdflib.Graph. | |
| 79 edge_attrs: Callable to construct later edge_attributes. It receives | |
| 80 3 variables (s, p, o) and should construct a dictionary that is | |
| 81 passed to networkx's add_edge(s, o, **attrs) function. | |
| 82 | |
| 83 By default this will include setting the MultiDiGraph key=p here. | |
| 84 If you don't want to be able to re-identify the edge later on, you | |
| 85 can set this to `lambda s, p, o: {}`. In this case MultiDiGraph's | |
| 86 default (increasing ints) will be used. | |
| 87 | |
| 88 Returns: | |
| 89 networkx.MultiDiGraph | |
| 90 | |
| 91 >>> from rdflib import Graph, URIRef, Literal | |
| 92 >>> g = Graph() | |
| 93 >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l') | |
| 94 >>> p, q = URIRef('p'), URIRef('q') | |
| 95 >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] | |
| 96 >>> for t in edges: | |
| 97 ... g.add(t) | |
| 98 ... | |
| 99 >>> mdg = rdflib_to_networkx_multidigraph(g) | |
| 100 >>> len(mdg.edges()) | |
| 101 4 | |
| 102 >>> mdg.has_edge(a, b) | |
| 103 True | |
| 104 >>> mdg.has_edge(a, b, key=p) | |
| 105 True | |
| 106 >>> mdg.has_edge(a, b, key=q) | |
| 107 True | |
| 108 | |
| 109 >>> mdg = rdflib_to_networkx_multidigraph(g, edge_attrs=lambda s,p,o: {}) | |
| 110 >>> mdg.has_edge(a, b, key=0) | |
| 111 True | |
| 112 >>> mdg.has_edge(a, b, key=1) | |
| 113 True | |
| 114 """ | |
| 115 import networkx as nx | |
| 116 mdg = nx.MultiDiGraph() | |
| 117 _rdflib_to_networkx_graph(graph, mdg, False, edge_attrs, **kwds) | |
| 118 return mdg | |
| 119 | |
| 120 def rdflib_to_networkx_digraph( | |
| 121 graph, | |
| 122 calc_weights=True, | |
| 123 edge_attrs=lambda s, p, o: {'triples': [(s, p, o)]}, | |
| 124 **kwds): | |
| 125 """Converts the given graph into a networkx.DiGraph. | |
| 126 | |
| 127 As an rdflib.Graph() can contain multiple edges between nodes, by default | |
| 128 adds the a 'triples' attribute to the single DiGraph edge with a list of | |
| 129 all triples between s and o. | |
| 130 Also by default calculates the edge weight as the length of triples. | |
| 131 | |
| 132 Args: | |
| 133 graph: a rdflib.Graph. | |
| 134 calc_weights: If true calculate multi-graph edge-count as edge 'weight' | |
| 135 edge_attrs: Callable to construct later edge_attributes. It receives | |
| 136 3 variables (s, p, o) and should construct a dictionary that is | |
| 137 passed to networkx's add_edge(s, o, **attrs) function. | |
| 138 | |
| 139 By default this will include setting the 'triples' attribute here, | |
| 140 which is treated specially by us to be merged. Other attributes of | |
| 141 multi-edges will only contain the attributes of the first edge. | |
| 142 If you don't want the 'triples' attribute for tracking, set this to | |
| 143 `lambda s, p, o: {}`. | |
| 144 | |
| 145 Returns: | |
| 146 networkx.DiGraph | |
| 147 | |
| 148 >>> from rdflib import Graph, URIRef, Literal | |
| 149 >>> g = Graph() | |
| 150 >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l') | |
| 151 >>> p, q = URIRef('p'), URIRef('q') | |
| 152 >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] | |
| 153 >>> for t in edges: | |
| 154 ... g.add(t) | |
| 155 ... | |
| 156 >>> dg = rdflib_to_networkx_digraph(g) | |
| 157 >>> dg[a][b]['weight'] | |
| 158 2 | |
| 159 >>> sorted(dg[a][b]['triples']) == [(a, p, b), (a, q, b)] | |
| 160 True | |
| 161 >>> len(dg.edges()) | |
| 162 3 | |
| 163 >>> dg.size() | |
| 164 3 | |
| 165 >>> dg.size(weight='weight') | |
| 166 4.0 | |
| 167 | |
| 168 >>> dg = rdflib_to_networkx_graph(g, False, edge_attrs=lambda s,p,o:{}) | |
| 169 >>> 'weight' in dg[a][b] | |
| 170 False | |
| 171 >>> 'triples' in dg[a][b] | |
| 172 False | |
| 173 """ | |
| 174 import networkx as nx | |
| 175 dg = nx.DiGraph() | |
| 176 _rdflib_to_networkx_graph(graph, dg, calc_weights, edge_attrs, **kwds) | |
| 177 return dg | |
| 178 | |
| 179 | |
| 180 def rdflib_to_networkx_graph( | |
| 181 graph, | |
| 182 calc_weights=True, | |
| 183 edge_attrs=lambda s, p, o: {'triples': [(s, p, o)]}, | |
| 184 **kwds): | |
| 185 """Converts the given graph into a networkx.Graph. | |
| 186 | |
| 187 As an rdflib.Graph() can contain multiple directed edges between nodes, by | |
| 188 default adds the a 'triples' attribute to the single DiGraph edge with a | |
| 189 list of triples between s and o in graph. | |
| 190 Also by default calculates the edge weight as the len(triples). | |
| 191 | |
| 192 Args: | |
| 193 graph: a rdflib.Graph. | |
| 194 calc_weights: If true calculate multi-graph edge-count as edge 'weight' | |
| 195 edge_attrs: Callable to construct later edge_attributes. It receives | |
| 196 3 variables (s, p, o) and should construct a dictionary that is | |
| 197 passed to networkx's add_edge(s, o, **attrs) function. | |
| 198 | |
| 199 By default this will include setting the 'triples' attribute here, | |
| 200 which is treated specially by us to be merged. Other attributes of | |
| 201 multi-edges will only contain the attributes of the first edge. | |
| 202 If you don't want the 'triples' attribute for tracking, set this to | |
| 203 `lambda s, p, o: {}`. | |
| 204 | |
| 205 Returns: | |
| 206 networkx.Graph | |
| 207 | |
| 208 >>> from rdflib import Graph, URIRef, Literal | |
| 209 >>> g = Graph() | |
| 210 >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l') | |
| 211 >>> p, q = URIRef('p'), URIRef('q') | |
| 212 >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] | |
| 213 >>> for t in edges: | |
| 214 ... g.add(t) | |
| 215 ... | |
| 216 >>> ug = rdflib_to_networkx_graph(g) | |
| 217 >>> ug[a][b]['weight'] | |
| 218 3 | |
| 219 >>> sorted(ug[a][b]['triples']) == [(a, p, b), (a, q, b), (b, p, a)] | |
| 220 True | |
| 221 >>> len(ug.edges()) | |
| 222 2 | |
| 223 >>> ug.size() | |
| 224 2 | |
| 225 >>> ug.size(weight='weight') | |
| 226 4.0 | |
| 227 | |
| 228 >>> ug = rdflib_to_networkx_graph(g, False, edge_attrs=lambda s,p,o:{}) | |
| 229 >>> 'weight' in ug[a][b] | |
| 230 False | |
| 231 >>> 'triples' in ug[a][b] | |
| 232 False | |
| 233 """ | |
| 234 import networkx as nx | |
| 235 g = nx.Graph() | |
| 236 _rdflib_to_networkx_graph(graph, g, calc_weights, edge_attrs, **kwds) | |
| 237 return g | |
| 238 | |
| 239 | |
| 240 def rdflib_to_graphtool( | |
| 241 graph, | |
| 242 v_prop_names=[str('term')], | |
| 243 e_prop_names=[str('term')], | |
| 244 transform_s=lambda s, p, o: {str('term'): s}, | |
| 245 transform_p=lambda s, p, o: {str('term'): p}, | |
| 246 transform_o=lambda s, p, o: {str('term'): o}, | |
| 247 ): | |
| 248 """Converts the given graph into a graph_tool.Graph(). | |
| 249 | |
| 250 The subjects and objects are the later vertices of the Graph. | |
| 251 The predicates become edges. | |
| 252 | |
| 253 Arguments: | |
| 254 graph: a rdflib.Graph. | |
| 255 v_prop_names: a list of names for the vertex properties. The default is | |
| 256 set to ['term'] (see transform_s, transform_o below). | |
| 257 e_prop_names: a list of names for the edge properties. | |
| 258 transform_s: callable with s, p, o input. Should return a dictionary | |
| 259 containing a value for each name in v_prop_names. By default is set | |
| 260 to {'term': s} which in combination with v_prop_names = ['term'] | |
| 261 adds s as 'term' property to the generated vertex for s. | |
| 262 transform_p: similar to transform_s, but wrt. e_prop_names. By default | |
| 263 returns {'term': p} which adds p as a property to the generated | |
| 264 edge between the vertex for s and the vertex for o. | |
| 265 transform_o: similar to transform_s. | |
| 266 | |
| 267 Returns: | |
| 268 graph_tool.Graph() | |
| 269 | |
| 270 >>> from rdflib import Graph, URIRef, Literal | |
| 271 >>> g = Graph() | |
| 272 >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l') | |
| 273 >>> p, q = URIRef('p'), URIRef('q') | |
| 274 >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] | |
| 275 >>> for t in edges: | |
| 276 ... g.add(t) | |
| 277 ... | |
| 278 >>> mdg = rdflib_to_graphtool(g) | |
| 279 >>> len(list(mdg.edges())) | |
| 280 4 | |
| 281 >>> from graph_tool import util as gt_util | |
| 282 >>> vpterm = mdg.vertex_properties['term'] | |
| 283 >>> va = gt_util.find_vertex(mdg, vpterm, a)[0] | |
| 284 >>> vb = gt_util.find_vertex(mdg, vpterm, b)[0] | |
| 285 >>> vl = gt_util.find_vertex(mdg, vpterm, l)[0] | |
| 286 >>> (va, vb) in [(e.source(), e.target()) for e in list(mdg.edges())] | |
| 287 True | |
| 288 >>> epterm = mdg.edge_properties['term'] | |
| 289 >>> len(list(gt_util.find_edge(mdg, epterm, p))) == 3 | |
| 290 True | |
| 291 >>> len(list(gt_util.find_edge(mdg, epterm, q))) == 1 | |
| 292 True | |
| 293 | |
| 294 >>> mdg = rdflib_to_graphtool( | |
| 295 ... g, | |
| 296 ... e_prop_names=[str('name')], | |
| 297 ... transform_p=lambda s, p, o: {str('name'): unicode(p)}) | |
| 298 >>> epterm = mdg.edge_properties['name'] | |
| 299 >>> len(list(gt_util.find_edge(mdg, epterm, unicode(p)))) == 3 | |
| 300 True | |
| 301 >>> len(list(gt_util.find_edge(mdg, epterm, unicode(q)))) == 1 | |
| 302 True | |
| 303 """ | |
| 304 import graph_tool as gt | |
| 305 g = gt.Graph() | |
| 306 | |
| 307 vprops = [(vpn, g.new_vertex_property('object')) for vpn in v_prop_names] | |
| 308 for vpn, vprop in vprops: | |
| 309 g.vertex_properties[vpn] = vprop | |
| 310 eprops = [(epn, g.new_edge_property('object')) for epn in e_prop_names] | |
| 311 for epn, eprop in eprops: | |
| 312 g.edge_properties[epn] = eprop | |
| 313 node_to_vertex = {} | |
| 314 for s, p, o in graph: | |
| 315 sv = node_to_vertex.get(s) | |
| 316 if sv is None: | |
| 317 v = g.add_vertex() | |
| 318 node_to_vertex[s] = v | |
| 319 tmp_props = transform_s(s, p, o) | |
| 320 for vpn, vprop in vprops: | |
| 321 vprop[v] = tmp_props[vpn] | |
| 322 sv = v | |
| 323 | |
| 324 ov = node_to_vertex.get(o) | |
| 325 if ov is None: | |
| 326 v = g.add_vertex() | |
| 327 node_to_vertex[o] = v | |
| 328 tmp_props = transform_o(s, p, o) | |
| 329 for vpn, vprop in vprops: | |
| 330 vprop[v] = tmp_props[vpn] | |
| 331 ov = v | |
| 332 | |
| 333 e = g.add_edge(sv, ov) | |
| 334 tmp_props = transform_p(s, p, o) | |
| 335 for epn, eprop in eprops: | |
| 336 eprop[e] = tmp_props[epn] | |
| 337 return g | |
| 338 | |
| 339 | |
| 340 if __name__ == '__main__': | |
| 341 import sys | |
| 342 import logging.config | |
| 343 logging.basicConfig(level=logging.DEBUG) | |
| 344 | |
| 345 import nose | |
| 346 nose.run(argv=[sys.argv[0], sys.argv[0], '-v', '--with-doctest']) |
