Mercurial > repos > guerler > springsuite

#!/usr/bin/env python2.7
# encoding: utf-8


"""Convert (to and) from rdflib graphs to other well known graph libraries.

Currently the following libraries are supported:
- networkx: MultiDiGraph, DiGraph, Graph
- graph_tool: Graph

Doctests in this file are all skipped, as we can't run them conditionally if
networkx or graph_tool are available and they would err otherwise.
see ../../test/test_extras_external_graph_libs.py for conditional tests
"""

import logging
logger = logging.getLogger(__name__)

_identity = lambda x: x

def _rdflib_to_networkx_graph(
        graph,
        nxgraph,
        calc_weights,
        edge_attrs,
        transform_s=_identity, transform_o=_identity):
    """Helper method for multidigraph, digraph and graph.

    Modifies nxgraph in-place!

    Arguments:
        graph: an rdflib.Graph.
        nxgraph: a networkx.Graph/DiGraph/MultiDigraph.
        calc_weights: If True adds a 'weight' attribute to each edge according
            to the count of s,p,o triples between s and o, which is meaningful
            for Graph/DiGraph.
        edge_attrs: Callable to construct edge data from s, p, o.
           'triples' attribute is handled specially to be merged.
           'weight' should not be generated if calc_weights==True.
           (see invokers below!)
        transform_s: Callable to transform node generated from s.
        transform_o: Callable to transform node generated from o.
    """
    assert callable(edge_attrs)
    assert callable(transform_s)
    assert callable(transform_o)
    import networkx as nx
    for s, p, o in graph:
        ts, to = transform_s(s), transform_o(o)  # apply possible transformations
        data = nxgraph.get_edge_data(ts, to)
        if data is None or isinstance(nxgraph, nx.MultiDiGraph):
            # no edge yet, set defaults
            data = edge_attrs(s, p, o)
            if calc_weights:
                data['weight'] = 1
            nxgraph.add_edge(ts, to, **data)
        else:
            # already have an edge, just update attributes
            if calc_weights:
                data['weight'] += 1
            if 'triples' in data:
                d = edge_attrs(s, p, o)
                data['triples'].extend(d['triples'])

def rdflib_to_networkx_multidigraph(
        graph,
        edge_attrs=lambda s, p, o: {'key': p},
        **kwds):
    """Converts the given graph into a networkx.MultiDiGraph.

    The subjects and objects are the later nodes of the MultiDiGraph.
    The predicates are used as edge keys (to identify multi-edges).

    Arguments:
        graph: a rdflib.Graph.
        edge_attrs: Callable to construct later edge_attributes. It receives
            3 variables (s, p, o) and should construct a dictionary that is
            passed to networkx's add_edge(s, o, **attrs) function.

            By default this will include setting the MultiDiGraph key=p here.
            If you don't want to be able to re-identify the edge later on, you
            can set this to `lambda s, p, o: {}`. In this case MultiDiGraph's
            default (increasing ints) will be used.

    Returns:
        networkx.MultiDiGraph

    >>> from rdflib import Graph, URIRef, Literal
    >>> g = Graph()
    >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l')
    >>> p, q = URIRef('p'), URIRef('q')
    >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)]
    >>> for t in edges:
    ...     g.add(t)
    ...
    >>> mdg = rdflib_to_networkx_multidigraph(g)
    >>> len(mdg.edges())
    4
    >>> mdg.has_edge(a, b)
    True
    >>> mdg.has_edge(a, b, key=p)
    True
    >>> mdg.has_edge(a, b, key=q)
    True

    >>> mdg = rdflib_to_networkx_multidigraph(g, edge_attrs=lambda s,p,o: {})
    >>> mdg.has_edge(a, b, key=0)
    True
    >>> mdg.has_edge(a, b, key=1)
    True
    """
    import networkx as nx
    mdg = nx.MultiDiGraph()
    _rdflib_to_networkx_graph(graph, mdg, False, edge_attrs, **kwds)
    return mdg

def rdflib_to_networkx_digraph(
        graph,
        calc_weights=True,
        edge_attrs=lambda s, p, o: {'triples': [(s, p, o)]},
        **kwds):
    """Converts the given graph into a networkx.DiGraph.

    As an rdflib.Graph() can contain multiple edges between nodes, by default
    adds the a 'triples' attribute to the single DiGraph edge with a list of
    all triples between s and o.
    Also by default calculates the edge weight as the length of triples.

    Args:
        graph: a rdflib.Graph.
        calc_weights: If true calculate multi-graph edge-count as edge 'weight'
        edge_attrs: Callable to construct later edge_attributes. It receives
            3 variables (s, p, o) and should construct a dictionary that is
            passed to networkx's add_edge(s, o, **attrs) function.

            By default this will include setting the 'triples' attribute here,
            which is treated specially by us to be merged. Other attributes of
            multi-edges will only contain the attributes of the first edge.
            If you don't want the 'triples' attribute for tracking, set this to
            `lambda s, p, o: {}`.

    Returns:
        networkx.DiGraph

    >>> from rdflib import Graph, URIRef, Literal
    >>> g = Graph()
    >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l')
    >>> p, q = URIRef('p'), URIRef('q')
    >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)]
    >>> for t in edges:
    ...     g.add(t)
    ...
    >>> dg = rdflib_to_networkx_digraph(g)
    >>> dg[a][b]['weight']
    2
    >>> sorted(dg[a][b]['triples']) == [(a, p, b), (a, q, b)]
    True
    >>> len(dg.edges())
    3
    >>> dg.size()
    3
    >>> dg.size(weight='weight')
    4.0

    >>> dg = rdflib_to_networkx_graph(g, False, edge_attrs=lambda s,p,o:{})
    >>> 'weight' in dg[a][b]
    False
    >>> 'triples' in dg[a][b]
    False
    """
    import networkx as nx
    dg = nx.DiGraph()
    _rdflib_to_networkx_graph(graph, dg, calc_weights, edge_attrs, **kwds)
    return dg


def rdflib_to_networkx_graph(
        graph,
        calc_weights=True,
        edge_attrs=lambda s, p, o: {'triples': [(s, p, o)]},
        **kwds):
    """Converts the given graph into a networkx.Graph.

    As an rdflib.Graph() can contain multiple directed edges between nodes, by
    default adds the a 'triples' attribute to the single DiGraph edge with a
    list of triples between s and o in graph.
    Also by default calculates the edge weight as the len(triples).

    Args:
        graph: a rdflib.Graph.
        calc_weights: If true calculate multi-graph edge-count as edge 'weight'
        edge_attrs: Callable to construct later edge_attributes. It receives
            3 variables (s, p, o) and should construct a dictionary that is
            passed to networkx's add_edge(s, o, **attrs) function.

            By default this will include setting the 'triples' attribute here,
            which is treated specially by us to be merged. Other attributes of
            multi-edges will only contain the attributes of the first edge.
            If you don't want the 'triples' attribute for tracking, set this to
            `lambda s, p, o: {}`.

    Returns:
        networkx.Graph

    >>> from rdflib import Graph, URIRef, Literal
    >>> g = Graph()
    >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l')
    >>> p, q = URIRef('p'), URIRef('q')
    >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)]
    >>> for t in edges:
    ...     g.add(t)
    ...
    >>> ug = rdflib_to_networkx_graph(g)
    >>> ug[a][b]['weight']
    3
    >>> sorted(ug[a][b]['triples']) == [(a, p, b), (a, q, b), (b, p, a)]
    True
    >>> len(ug.edges())
    2
    >>> ug.size()
    2
    >>> ug.size(weight='weight')
    4.0

    >>> ug = rdflib_to_networkx_graph(g, False, edge_attrs=lambda s,p,o:{})
    >>> 'weight' in ug[a][b]
    False
    >>> 'triples' in ug[a][b]
    False
    """
    import networkx as nx
    g = nx.Graph()
    _rdflib_to_networkx_graph(graph, g, calc_weights, edge_attrs, **kwds)
    return g


def rdflib_to_graphtool(
        graph,
        v_prop_names=[str('term')],
        e_prop_names=[str('term')],
        transform_s=lambda s, p, o: {str('term'): s},
        transform_p=lambda s, p, o: {str('term'): p},
        transform_o=lambda s, p, o: {str('term'): o},
    ):
    """Converts the given graph into a graph_tool.Graph().

    The subjects and objects are the later vertices of the Graph.
    The predicates become edges.

    Arguments:
        graph: a rdflib.Graph.
        v_prop_names: a list of names for the vertex properties. The default is
            set to ['term'] (see transform_s, transform_o below).
        e_prop_names: a list of names for the edge properties.
        transform_s: callable with s, p, o input. Should return a dictionary
            containing a value for each name in v_prop_names. By default is set
            to {'term': s} which in combination with v_prop_names = ['term']
            adds s as 'term' property to the generated vertex for s.
        transform_p: similar to transform_s, but wrt. e_prop_names. By default
            returns {'term': p} which adds p as a property to the generated
            edge between the vertex for s and the vertex for o.
        transform_o: similar to transform_s.

    Returns:
        graph_tool.Graph()

    >>> from rdflib import Graph, URIRef, Literal
    >>> g = Graph()
    >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l')
    >>> p, q = URIRef('p'), URIRef('q')
    >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)]
    >>> for t in edges:
    ...     g.add(t)
    ...
    >>> mdg = rdflib_to_graphtool(g)
    >>> len(list(mdg.edges()))
    4
    >>> from graph_tool import util as gt_util
    >>> vpterm = mdg.vertex_properties['term']
    >>> va = gt_util.find_vertex(mdg, vpterm, a)[0]
    >>> vb = gt_util.find_vertex(mdg, vpterm, b)[0]
    >>> vl = gt_util.find_vertex(mdg, vpterm, l)[0]
    >>> (va, vb) in [(e.source(), e.target()) for e in list(mdg.edges())]
    True
    >>> epterm = mdg.edge_properties['term']
    >>> len(list(gt_util.find_edge(mdg, epterm, p))) == 3
    True
    >>> len(list(gt_util.find_edge(mdg, epterm, q))) == 1
    True

    >>> mdg = rdflib_to_graphtool(
    ...     g,
    ...     e_prop_names=[str('name')],
    ...     transform_p=lambda s, p, o: {str('name'): unicode(p)})
    >>> epterm = mdg.edge_properties['name']
    >>> len(list(gt_util.find_edge(mdg, epterm, unicode(p)))) == 3
    True
    >>> len(list(gt_util.find_edge(mdg, epterm, unicode(q)))) == 1
    True
    """
    import graph_tool as gt
    g = gt.Graph()

    vprops = [(vpn, g.new_vertex_property('object')) for vpn in v_prop_names]
    for vpn, vprop in vprops:
        g.vertex_properties[vpn] = vprop
    eprops = [(epn, g.new_edge_property('object')) for epn in e_prop_names]
    for epn, eprop in eprops:
        g.edge_properties[epn] = eprop
    node_to_vertex = {}
    for s, p, o in graph:
        sv = node_to_vertex.get(s)
        if sv is None:
            v = g.add_vertex()
            node_to_vertex[s] = v
            tmp_props = transform_s(s, p, o)
            for vpn, vprop in vprops:
                vprop[v] = tmp_props[vpn]
            sv = v

        ov = node_to_vertex.get(o)
        if ov is None:
            v = g.add_vertex()
            node_to_vertex[o] = v
            tmp_props = transform_o(s, p, o)
            for vpn, vprop in vprops:
                vprop[v] = tmp_props[vpn]
            ov = v

        e = g.add_edge(sv, ov)
        tmp_props = transform_p(s, p, o)
        for epn, eprop in eprops:
            eprop[e] = tmp_props[epn]
    return g


if __name__ == '__main__':
    import sys
    import logging.config
    logging.basicConfig(level=logging.DEBUG)

    import nose
    nose.run(argv=[sys.argv[0], sys.argv[0], '-v', '--with-doctest'])
author	guerler
date	Fri, 31 Jul 2020 00:32:28 -0400
parents
children