Mercurial > repos > guerler > springsuite
view planemo/lib/python3.7/site-packages/rdflib/extras/external_graph_libs.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python2.7 # encoding: utf-8 """Convert (to and) from rdflib graphs to other well known graph libraries. Currently the following libraries are supported: - networkx: MultiDiGraph, DiGraph, Graph - graph_tool: Graph Doctests in this file are all skipped, as we can't run them conditionally if networkx or graph_tool are available and they would err otherwise. see ../../test/test_extras_external_graph_libs.py for conditional tests """ import logging logger = logging.getLogger(__name__) _identity = lambda x: x def _rdflib_to_networkx_graph( graph, nxgraph, calc_weights, edge_attrs, transform_s=_identity, transform_o=_identity): """Helper method for multidigraph, digraph and graph. Modifies nxgraph in-place! Arguments: graph: an rdflib.Graph. nxgraph: a networkx.Graph/DiGraph/MultiDigraph. calc_weights: If True adds a 'weight' attribute to each edge according to the count of s,p,o triples between s and o, which is meaningful for Graph/DiGraph. edge_attrs: Callable to construct edge data from s, p, o. 'triples' attribute is handled specially to be merged. 'weight' should not be generated if calc_weights==True. (see invokers below!) transform_s: Callable to transform node generated from s. transform_o: Callable to transform node generated from o. """ assert callable(edge_attrs) assert callable(transform_s) assert callable(transform_o) import networkx as nx for s, p, o in graph: ts, to = transform_s(s), transform_o(o) # apply possible transformations data = nxgraph.get_edge_data(ts, to) if data is None or isinstance(nxgraph, nx.MultiDiGraph): # no edge yet, set defaults data = edge_attrs(s, p, o) if calc_weights: data['weight'] = 1 nxgraph.add_edge(ts, to, **data) else: # already have an edge, just update attributes if calc_weights: data['weight'] += 1 if 'triples' in data: d = edge_attrs(s, p, o) data['triples'].extend(d['triples']) def rdflib_to_networkx_multidigraph( graph, edge_attrs=lambda s, p, o: {'key': p}, **kwds): """Converts the given graph into a networkx.MultiDiGraph. The subjects and objects are the later nodes of the MultiDiGraph. The predicates are used as edge keys (to identify multi-edges). Arguments: graph: a rdflib.Graph. edge_attrs: Callable to construct later edge_attributes. It receives 3 variables (s, p, o) and should construct a dictionary that is passed to networkx's add_edge(s, o, **attrs) function. By default this will include setting the MultiDiGraph key=p here. If you don't want to be able to re-identify the edge later on, you can set this to `lambda s, p, o: {}`. In this case MultiDiGraph's default (increasing ints) will be used. Returns: networkx.MultiDiGraph >>> from rdflib import Graph, URIRef, Literal >>> g = Graph() >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l') >>> p, q = URIRef('p'), URIRef('q') >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] >>> for t in edges: ... g.add(t) ... >>> mdg = rdflib_to_networkx_multidigraph(g) >>> len(mdg.edges()) 4 >>> mdg.has_edge(a, b) True >>> mdg.has_edge(a, b, key=p) True >>> mdg.has_edge(a, b, key=q) True >>> mdg = rdflib_to_networkx_multidigraph(g, edge_attrs=lambda s,p,o: {}) >>> mdg.has_edge(a, b, key=0) True >>> mdg.has_edge(a, b, key=1) True """ import networkx as nx mdg = nx.MultiDiGraph() _rdflib_to_networkx_graph(graph, mdg, False, edge_attrs, **kwds) return mdg def rdflib_to_networkx_digraph( graph, calc_weights=True, edge_attrs=lambda s, p, o: {'triples': [(s, p, o)]}, **kwds): """Converts the given graph into a networkx.DiGraph. As an rdflib.Graph() can contain multiple edges between nodes, by default adds the a 'triples' attribute to the single DiGraph edge with a list of all triples between s and o. Also by default calculates the edge weight as the length of triples. Args: graph: a rdflib.Graph. calc_weights: If true calculate multi-graph edge-count as edge 'weight' edge_attrs: Callable to construct later edge_attributes. It receives 3 variables (s, p, o) and should construct a dictionary that is passed to networkx's add_edge(s, o, **attrs) function. By default this will include setting the 'triples' attribute here, which is treated specially by us to be merged. Other attributes of multi-edges will only contain the attributes of the first edge. If you don't want the 'triples' attribute for tracking, set this to `lambda s, p, o: {}`. Returns: networkx.DiGraph >>> from rdflib import Graph, URIRef, Literal >>> g = Graph() >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l') >>> p, q = URIRef('p'), URIRef('q') >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] >>> for t in edges: ... g.add(t) ... >>> dg = rdflib_to_networkx_digraph(g) >>> dg[a][b]['weight'] 2 >>> sorted(dg[a][b]['triples']) == [(a, p, b), (a, q, b)] True >>> len(dg.edges()) 3 >>> dg.size() 3 >>> dg.size(weight='weight') 4.0 >>> dg = rdflib_to_networkx_graph(g, False, edge_attrs=lambda s,p,o:{}) >>> 'weight' in dg[a][b] False >>> 'triples' in dg[a][b] False """ import networkx as nx dg = nx.DiGraph() _rdflib_to_networkx_graph(graph, dg, calc_weights, edge_attrs, **kwds) return dg def rdflib_to_networkx_graph( graph, calc_weights=True, edge_attrs=lambda s, p, o: {'triples': [(s, p, o)]}, **kwds): """Converts the given graph into a networkx.Graph. As an rdflib.Graph() can contain multiple directed edges between nodes, by default adds the a 'triples' attribute to the single DiGraph edge with a list of triples between s and o in graph. Also by default calculates the edge weight as the len(triples). Args: graph: a rdflib.Graph. calc_weights: If true calculate multi-graph edge-count as edge 'weight' edge_attrs: Callable to construct later edge_attributes. It receives 3 variables (s, p, o) and should construct a dictionary that is passed to networkx's add_edge(s, o, **attrs) function. By default this will include setting the 'triples' attribute here, which is treated specially by us to be merged. Other attributes of multi-edges will only contain the attributes of the first edge. If you don't want the 'triples' attribute for tracking, set this to `lambda s, p, o: {}`. Returns: networkx.Graph >>> from rdflib import Graph, URIRef, Literal >>> g = Graph() >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l') >>> p, q = URIRef('p'), URIRef('q') >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] >>> for t in edges: ... g.add(t) ... >>> ug = rdflib_to_networkx_graph(g) >>> ug[a][b]['weight'] 3 >>> sorted(ug[a][b]['triples']) == [(a, p, b), (a, q, b), (b, p, a)] True >>> len(ug.edges()) 2 >>> ug.size() 2 >>> ug.size(weight='weight') 4.0 >>> ug = rdflib_to_networkx_graph(g, False, edge_attrs=lambda s,p,o:{}) >>> 'weight' in ug[a][b] False >>> 'triples' in ug[a][b] False """ import networkx as nx g = nx.Graph() _rdflib_to_networkx_graph(graph, g, calc_weights, edge_attrs, **kwds) return g def rdflib_to_graphtool( graph, v_prop_names=[str('term')], e_prop_names=[str('term')], transform_s=lambda s, p, o: {str('term'): s}, transform_p=lambda s, p, o: {str('term'): p}, transform_o=lambda s, p, o: {str('term'): o}, ): """Converts the given graph into a graph_tool.Graph(). The subjects and objects are the later vertices of the Graph. The predicates become edges. Arguments: graph: a rdflib.Graph. v_prop_names: a list of names for the vertex properties. The default is set to ['term'] (see transform_s, transform_o below). e_prop_names: a list of names for the edge properties. transform_s: callable with s, p, o input. Should return a dictionary containing a value for each name in v_prop_names. By default is set to {'term': s} which in combination with v_prop_names = ['term'] adds s as 'term' property to the generated vertex for s. transform_p: similar to transform_s, but wrt. e_prop_names. By default returns {'term': p} which adds p as a property to the generated edge between the vertex for s and the vertex for o. transform_o: similar to transform_s. Returns: graph_tool.Graph() >>> from rdflib import Graph, URIRef, Literal >>> g = Graph() >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l') >>> p, q = URIRef('p'), URIRef('q') >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] >>> for t in edges: ... g.add(t) ... >>> mdg = rdflib_to_graphtool(g) >>> len(list(mdg.edges())) 4 >>> from graph_tool import util as gt_util >>> vpterm = mdg.vertex_properties['term'] >>> va = gt_util.find_vertex(mdg, vpterm, a)[0] >>> vb = gt_util.find_vertex(mdg, vpterm, b)[0] >>> vl = gt_util.find_vertex(mdg, vpterm, l)[0] >>> (va, vb) in [(e.source(), e.target()) for e in list(mdg.edges())] True >>> epterm = mdg.edge_properties['term'] >>> len(list(gt_util.find_edge(mdg, epterm, p))) == 3 True >>> len(list(gt_util.find_edge(mdg, epterm, q))) == 1 True >>> mdg = rdflib_to_graphtool( ... g, ... e_prop_names=[str('name')], ... transform_p=lambda s, p, o: {str('name'): unicode(p)}) >>> epterm = mdg.edge_properties['name'] >>> len(list(gt_util.find_edge(mdg, epterm, unicode(p)))) == 3 True >>> len(list(gt_util.find_edge(mdg, epterm, unicode(q)))) == 1 True """ import graph_tool as gt g = gt.Graph() vprops = [(vpn, g.new_vertex_property('object')) for vpn in v_prop_names] for vpn, vprop in vprops: g.vertex_properties[vpn] = vprop eprops = [(epn, g.new_edge_property('object')) for epn in e_prop_names] for epn, eprop in eprops: g.edge_properties[epn] = eprop node_to_vertex = {} for s, p, o in graph: sv = node_to_vertex.get(s) if sv is None: v = g.add_vertex() node_to_vertex[s] = v tmp_props = transform_s(s, p, o) for vpn, vprop in vprops: vprop[v] = tmp_props[vpn] sv = v ov = node_to_vertex.get(o) if ov is None: v = g.add_vertex() node_to_vertex[o] = v tmp_props = transform_o(s, p, o) for vpn, vprop in vprops: vprop[v] = tmp_props[vpn] ov = v e = g.add_edge(sv, ov) tmp_props = transform_p(s, p, o) for epn, eprop in eprops: eprop[e] = tmp_props[epn] return g if __name__ == '__main__': import sys import logging.config logging.basicConfig(level=logging.DEBUG) import nose nose.run(argv=[sys.argv[0], sys.argv[0], '-v', '--with-doctest'])