Mercurial > repos > guerler > springsuite
diff planemo/lib/python3.7/site-packages/networkx/convert_matrix.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/planemo/lib/python3.7/site-packages/networkx/convert_matrix.py Fri Jul 31 00:32:28 2020 -0400 @@ -0,0 +1,1249 @@ +# Copyright (C) 2006-2019 by +# Aric Hagberg <hagberg@lanl.gov> +# Dan Schult <dschult@colgate.edu> +# Pieter Swart <swart@lanl.gov> +# All rights reserved. +# BSD license. +"""Functions to convert NetworkX graphs to and from numpy/scipy matrices. + +The preferred way of converting data to a NetworkX graph is through the +graph constructor. The constructor calls the to_networkx_graph() function +which attempts to guess the input type and convert it automatically. + +Examples +-------- +Create a 10 node random graph from a numpy matrix + +>>> import numpy as np +>>> a = np.random.randint(0, 2, size=(10, 10)) +>>> D = nx.DiGraph(a) + +or equivalently + +>>> D = nx.to_networkx_graph(a, create_using=nx.DiGraph) + +See Also +-------- +nx_agraph, nx_pydot +""" + +import itertools +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = ['from_numpy_matrix', 'to_numpy_matrix', + 'from_pandas_adjacency', 'to_pandas_adjacency', + 'from_pandas_edgelist', 'to_pandas_edgelist', + 'to_numpy_recarray', + 'from_scipy_sparse_matrix', 'to_scipy_sparse_matrix', + 'from_numpy_array', 'to_numpy_array'] + + +def to_pandas_adjacency(G, nodelist=None, dtype=None, order=None, + multigraph_weight=sum, weight='weight', nonedge=0.0): + """Returns the graph adjacency matrix as a Pandas DataFrame. + + Parameters + ---------- + G : graph + The NetworkX graph used to construct the Pandas DataFrame. + + nodelist : list, optional + The rows and columns are ordered according to the nodes in `nodelist`. + If `nodelist` is None, then the ordering is produced by G.nodes(). + + multigraph_weight : {sum, min, max}, optional + An operator that determines how weights in multigraphs are handled. + The default is to sum the weights of the multiple edges. + + weight : string or None, optional + The edge attribute that holds the numerical value used for + the edge weight. If an edge does not have that attribute, then the + value 1 is used instead. + + nonedge : float, optional + The matrix values corresponding to nonedges are typically set to zero. + However, this could be undesirable if there are matrix values + corresponding to actual edges that also have the value zero. If so, + one might prefer nonedges to have some other value, such as nan. + + Returns + ------- + df : Pandas DataFrame + Graph adjacency matrix + + Notes + ----- + For directed graphs, entry i,j corresponds to an edge from i to j. + + The DataFrame entries are assigned to the weight edge attribute. When + an edge does not have a weight attribute, the value of the entry is set to + the number 1. For multiple (parallel) edges, the values of the entries + are determined by the 'multigraph_weight' parameter. The default is to + sum the weight attributes for each of the parallel edges. + + When `nodelist` does not contain every node in `G`, the matrix is built + from the subgraph of `G` that is induced by the nodes in `nodelist`. + + The convention used for self-loop edges in graphs is to assign the + diagonal matrix entry value to the weight attribute of the edge + (or the number 1 if the edge has no weight attribute). If the + alternate convention of doubling the edge weight is desired the + resulting Pandas DataFrame can be modified as follows: + + >>> import pandas as pd + >>> pd.options.display.max_columns = 20 + >>> import numpy as np + >>> G = nx.Graph([(1, 1)]) + >>> df = nx.to_pandas_adjacency(G, dtype=int) + >>> df + 1 + 1 1 + >>> df.values[np.diag_indices_from(df)] *= 2 + >>> df + 1 + 1 2 + + Examples + -------- + >>> G = nx.MultiDiGraph() + >>> G.add_edge(0, 1, weight=2) + 0 + >>> G.add_edge(1, 0) + 0 + >>> G.add_edge(2, 2, weight=3) + 0 + >>> G.add_edge(2, 2) + 1 + >>> nx.to_pandas_adjacency(G, nodelist=[0, 1, 2], dtype=int) + 0 1 2 + 0 0 2 0 + 1 1 0 0 + 2 0 0 4 + + """ + import pandas as pd + M = to_numpy_array(G, nodelist=nodelist, dtype=dtype, order=order, + multigraph_weight=multigraph_weight, weight=weight, + nonedge=nonedge) + if nodelist is None: + nodelist = list(G) + return pd.DataFrame(data=M, index=nodelist, columns=nodelist) + + +def from_pandas_adjacency(df, create_using=None): + r"""Returns a graph from Pandas DataFrame. + + The Pandas DataFrame is interpreted as an adjacency matrix for the graph. + + Parameters + ---------- + df : Pandas DataFrame + An adjacency matrix representation of a graph + + create_using : NetworkX graph constructor, optional (default=nx.Graph) + Graph type to create. If graph instance, then cleared before populated. + + Notes + ----- + For directed graphs, explicitly mention create_using=nx.Digraph, + and entry i,j of df corresponds to an edge from i to j. + + If the numpy matrix has a single data type for each matrix entry it + will be converted to an appropriate Python data type. + + If the numpy matrix has a user-specified compound data type the names + of the data fields will be used as attribute keys in the resulting + NetworkX graph. + + See Also + -------- + to_pandas_adjacency + + Examples + -------- + Simple integer weights on edges: + + >>> import pandas as pd + >>> pd.options.display.max_columns = 20 + >>> df = pd.DataFrame([[1, 1], [2, 1]]) + >>> df + 0 1 + 0 1 1 + 1 2 1 + >>> G = nx.from_pandas_adjacency(df) + >>> G.name = 'Graph from pandas adjacency matrix' + >>> print(nx.info(G)) + Name: Graph from pandas adjacency matrix + Type: Graph + Number of nodes: 2 + Number of edges: 3 + Average degree: 3.0000 + + """ + + try: + df = df[df.index] + except Exception: + msg = "%s not in columns" + missing = list(set(df.index).difference(set(df.columns))) + raise nx.NetworkXError("Columns must match Indices.", msg % missing) + + A = df.values + G = from_numpy_matrix(A, create_using=create_using) + + nx.relabel.relabel_nodes(G, dict(enumerate(df.columns)), copy=False) + return G + + +def to_pandas_edgelist(G, source='source', target='target', nodelist=None, + dtype=None, order=None): + """Returns the graph edge list as a Pandas DataFrame. + + Parameters + ---------- + G : graph + The NetworkX graph used to construct the Pandas DataFrame. + + source : str or int, optional + A valid column name (string or integer) for the source nodes (for the + directed case). + + target : str or int, optional + A valid column name (string or integer) for the target nodes (for the + directed case). + + nodelist : list, optional + Use only nodes specified in nodelist + + Returns + ------- + df : Pandas DataFrame + Graph edge list + + Examples + -------- + >>> G = nx.Graph([('A', 'B', {'cost': 1, 'weight': 7}), + ... ('C', 'E', {'cost': 9, 'weight': 10})]) + >>> df = nx.to_pandas_edgelist(G, nodelist=['A', 'C']) + >>> df[['source', 'target', 'cost', 'weight']] + source target cost weight + 0 A B 1 7 + 1 C E 9 10 + + """ + import pandas as pd + if nodelist is None: + edgelist = G.edges(data=True) + else: + edgelist = G.edges(nodelist, data=True) + source_nodes = [s for s, t, d in edgelist] + target_nodes = [t for s, t, d in edgelist] + all_keys = set().union(*(d.keys() for s, t, d in edgelist)) + edge_attr = {k: [d.get(k, float("nan")) for s, t, d in edgelist] + for k in all_keys} + edgelistdict = {source: source_nodes, target: target_nodes} + edgelistdict.update(edge_attr) + return pd.DataFrame(edgelistdict) + + +def from_pandas_edgelist(df, source='source', target='target', edge_attr=None, + create_using=None): + """Returns a graph from Pandas DataFrame containing an edge list. + + The Pandas DataFrame should contain at least two columns of node names and + zero or more columns of edge attributes. Each row will be processed as one + edge instance. + + Note: This function iterates over DataFrame.values, which is not + guaranteed to retain the data type across columns in the row. This is only + a problem if your row is entirely numeric and a mix of ints and floats. In + that case, all values will be returned as floats. See the + DataFrame.iterrows documentation for an example. + + Parameters + ---------- + df : Pandas DataFrame + An edge list representation of a graph + + source : str or int + A valid column name (string or integer) for the source nodes (for the + directed case). + + target : str or int + A valid column name (string or integer) for the target nodes (for the + directed case). + + edge_attr : str or int, iterable, True, or None + A valid column name (str or int) or iterable of column names that are + used to retrieve items and add them to the graph as edge attributes. + If `True`, all of the remaining columns will be added. + If `None`, no edge attributes are added to the graph. + + create_using : NetworkX graph constructor, optional (default=nx.Graph) + Graph type to create. If graph instance, then cleared before populated. + + See Also + -------- + to_pandas_edgelist + + Examples + -------- + Simple integer weights on edges: + + >>> import pandas as pd + >>> pd.options.display.max_columns = 20 + >>> import numpy as np + >>> rng = np.random.RandomState(seed=5) + >>> ints = rng.randint(1, 11, size=(3,2)) + >>> a = ['A', 'B', 'C'] + >>> b = ['D', 'A', 'E'] + >>> df = pd.DataFrame(ints, columns=['weight', 'cost']) + >>> df[0] = a + >>> df['b'] = b + >>> df[['weight', 'cost', 0, 'b']] + weight cost 0 b + 0 4 7 A D + 1 7 1 B A + 2 10 9 C E + >>> G = nx.from_pandas_edgelist(df, 0, 'b', ['weight', 'cost']) + >>> G['E']['C']['weight'] + 10 + >>> G['E']['C']['cost'] + 9 + >>> edges = pd.DataFrame({'source': [0, 1, 2], + ... 'target': [2, 2, 3], + ... 'weight': [3, 4, 5], + ... 'color': ['red', 'blue', 'blue']}) + >>> G = nx.from_pandas_edgelist(edges, edge_attr=True) + >>> G[0][2]['color'] + 'red' + + """ + g = nx.empty_graph(0, create_using) + + if edge_attr is None: + g.add_edges_from(zip(df[source], df[target])) + return g + + # Additional columns requested + if edge_attr is True: + cols = [c for c in df.columns if c is not source and c is not target] + elif isinstance(edge_attr, (list, tuple)): + cols = edge_attr + else: + cols = [edge_attr] + if len(cols) == 0: + msg = "Invalid edge_attr argument. No columns found with name: %s" + raise nx.NetworkXError(msg % cols) + + try: + eattrs = zip(*[df[col] for col in cols]) + except (KeyError, TypeError) as e: + msg = "Invalid edge_attr argument: %s" % edge_attr + raise nx.NetworkXError(msg) + for s, t, attrs in zip(df[source], df[target], eattrs): + if g.is_multigraph(): + key = g.add_edge(s, t) + g[s][t][key].update(zip(cols, attrs)) + else: + g.add_edge(s, t) + g[s][t].update(zip(cols, attrs)) + + return g + + +def to_numpy_matrix(G, nodelist=None, dtype=None, order=None, + multigraph_weight=sum, weight='weight', nonedge=0.0): + """Returns the graph adjacency matrix as a NumPy matrix. + + Parameters + ---------- + G : graph + The NetworkX graph used to construct the NumPy matrix. + + nodelist : list, optional + The rows and columns are ordered according to the nodes in `nodelist`. + If `nodelist` is None, then the ordering is produced by G.nodes(). + + dtype : NumPy data type, optional + A valid single NumPy data type used to initialize the array. + This must be a simple type such as int or numpy.float64 and + not a compound data type (see to_numpy_recarray) + If None, then the NumPy default is used. + + order : {'C', 'F'}, optional + Whether to store multidimensional data in C- or Fortran-contiguous + (row- or column-wise) order in memory. If None, then the NumPy default + is used. + + multigraph_weight : {sum, min, max}, optional + An operator that determines how weights in multigraphs are handled. + The default is to sum the weights of the multiple edges. + + weight : string or None optional (default = 'weight') + The edge attribute that holds the numerical value used for + the edge weight. If an edge does not have that attribute, then the + value 1 is used instead. + + nonedge : float (default = 0.0) + The matrix values corresponding to nonedges are typically set to zero. + However, this could be undesirable if there are matrix values + corresponding to actual edges that also have the value zero. If so, + one might prefer nonedges to have some other value, such as nan. + + Returns + ------- + M : NumPy matrix + Graph adjacency matrix + + See Also + -------- + to_numpy_recarray, from_numpy_matrix + + Notes + ----- + For directed graphs, entry i,j corresponds to an edge from i to j. + + The matrix entries are assigned to the weight edge attribute. When + an edge does not have a weight attribute, the value of the entry is set to + the number 1. For multiple (parallel) edges, the values of the entries + are determined by the `multigraph_weight` parameter. The default is to + sum the weight attributes for each of the parallel edges. + + When `nodelist` does not contain every node in `G`, the matrix is built + from the subgraph of `G` that is induced by the nodes in `nodelist`. + + The convention used for self-loop edges in graphs is to assign the + diagonal matrix entry value to the weight attribute of the edge + (or the number 1 if the edge has no weight attribute). If the + alternate convention of doubling the edge weight is desired the + resulting Numpy matrix can be modified as follows: + + >>> import numpy as np + >>> G = nx.Graph([(1, 1)]) + >>> A = nx.to_numpy_matrix(G) + >>> A + matrix([[1.]]) + >>> A[np.diag_indices_from(A)] *= 2 + >>> A + matrix([[2.]]) + + Examples + -------- + >>> G = nx.MultiDiGraph() + >>> G.add_edge(0, 1, weight=2) + 0 + >>> G.add_edge(1, 0) + 0 + >>> G.add_edge(2, 2, weight=3) + 0 + >>> G.add_edge(2, 2) + 1 + >>> nx.to_numpy_matrix(G, nodelist=[0, 1, 2]) + matrix([[0., 2., 0.], + [1., 0., 0.], + [0., 0., 4.]]) + + """ + import numpy as np + + A = to_numpy_array(G, nodelist=nodelist, dtype=dtype, order=order, + multigraph_weight=multigraph_weight, weight=weight, + nonedge=nonedge) + M = np.asmatrix(A, dtype=dtype) + return M + + +def from_numpy_matrix(A, parallel_edges=False, create_using=None): + """Returns a graph from numpy matrix. + + The numpy matrix is interpreted as an adjacency matrix for the graph. + + Parameters + ---------- + A : numpy matrix + An adjacency matrix representation of a graph + + parallel_edges : Boolean + If True, `create_using` is a multigraph, and `A` is an + integer matrix, then entry *(i, j)* in the matrix is interpreted as the + number of parallel edges joining vertices *i* and *j* in the graph. + If False, then the entries in the adjacency matrix are interpreted as + the weight of a single edge joining the vertices. + + create_using : NetworkX graph constructor, optional (default=nx.Graph) + Graph type to create. If graph instance, then cleared before populated. + + Notes + ----- + For directed graphs, explicitly mention create_using=nx.Digraph, + and entry i,j of A corresponds to an edge from i to j. + + If `create_using` is :class:`networkx.MultiGraph` or + :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the + entries of `A` are of type :class:`int`, then this function returns a + multigraph (constructed from `create_using`) with parallel edges. + + If `create_using` indicates an undirected multigraph, then only the edges + indicated by the upper triangle of the matrix `A` will be added to the + graph. + + If the numpy matrix has a single data type for each matrix entry it + will be converted to an appropriate Python data type. + + If the numpy matrix has a user-specified compound data type the names + of the data fields will be used as attribute keys in the resulting + NetworkX graph. + + See Also + -------- + to_numpy_matrix, to_numpy_recarray + + Examples + -------- + Simple integer weights on edges: + + >>> import numpy as np + >>> A = np.array([[1, 1], [2, 1]]) + >>> G = nx.from_numpy_matrix(A) + + If `create_using` indicates a multigraph and the matrix has only integer + entries and `parallel_edges` is False, then the entries will be treated + as weights for edges joining the nodes (without creating parallel edges): + + >>> A = np.array([[1, 1], [1, 2]]) + >>> G = nx.from_numpy_matrix(A, create_using=nx.MultiGraph) + >>> G[1][1] + AtlasView({0: {'weight': 2}}) + + If `create_using` indicates a multigraph and the matrix has only integer + entries and `parallel_edges` is True, then the entries will be treated + as the number of parallel edges joining those two vertices: + + >>> A = np.array([[1, 1], [1, 2]]) + >>> temp = nx.MultiGraph() + >>> G = nx.from_numpy_matrix(A, parallel_edges=True, create_using=temp) + >>> G[1][1] + AtlasView({0: {'weight': 1}, 1: {'weight': 1}}) + + User defined compound data type on edges: + + >>> dt = [('weight', float), ('cost', int)] + >>> A = np.array([[(1.0, 2)]], dtype=dt) + >>> G = nx.from_numpy_matrix(A) + >>> list(G.edges()) + [(0, 0)] + >>> G[0][0]['cost'] + 2 + >>> G[0][0]['weight'] + 1.0 + + """ + # This should never fail if you have created a numpy matrix with numpy... + import numpy as np + kind_to_python_type = {'f': float, + 'i': int, + 'u': int, + 'b': bool, + 'c': complex, + 'S': str, + 'V': 'void'} + try: # Python 3.x + blurb = chr(1245) # just to trigger the exception + kind_to_python_type['U'] = str + except ValueError: # Python 2.7 + kind_to_python_type['U'] = unicode + G = nx.empty_graph(0, create_using) + n, m = A.shape + if n != m: + raise nx.NetworkXError("Adjacency matrix is not square.", + "nx,ny=%s" % (A.shape,)) + dt = A.dtype + try: + python_type = kind_to_python_type[dt.kind] + except Exception: + raise TypeError("Unknown numpy data type: %s" % dt) + + # Make sure we get even the isolated nodes of the graph. + G.add_nodes_from(range(n)) + # Get a list of all the entries in the matrix with nonzero entries. These + # coordinates will become the edges in the graph. + edges = map(lambda e: (int(e[0]), int(e[1])), + zip(*(np.asarray(A).nonzero()))) + # handle numpy constructed data type + if python_type == 'void': + # Sort the fields by their offset, then by dtype, then by name. + fields = sorted((offset, dtype, name) for name, (dtype, offset) in + A.dtype.fields.items()) + triples = ((u, v, {name: kind_to_python_type[dtype.kind](val) + for (_, dtype, name), val in zip(fields, A[u, v])}) + for u, v in edges) + # If the entries in the adjacency matrix are integers, the graph is a + # multigraph, and parallel_edges is True, then create parallel edges, each + # with weight 1, for each entry in the adjacency matrix. Otherwise, create + # one edge for each positive entry in the adjacency matrix and set the + # weight of that edge to be the entry in the matrix. + elif python_type is int and G.is_multigraph() and parallel_edges: + chain = itertools.chain.from_iterable + # The following line is equivalent to: + # + # for (u, v) in edges: + # for d in range(A[u, v]): + # G.add_edge(u, v, weight=1) + # + triples = chain(((u, v, dict(weight=1)) for d in range(A[u, v])) + for (u, v) in edges) + else: # basic data type + triples = ((u, v, dict(weight=python_type(A[u, v]))) + for u, v in edges) + # If we are creating an undirected multigraph, only add the edges from the + # upper triangle of the matrix. Otherwise, add all the edges. This relies + # on the fact that the vertices created in the + # `_generated_weighted_edges()` function are actually the row/column + # indices for the matrix `A`. + # + # Without this check, we run into a problem where each edge is added twice + # when `G.add_edges_from()` is invoked below. + if G.is_multigraph() and not G.is_directed(): + triples = ((u, v, d) for u, v, d in triples if u <= v) + G.add_edges_from(triples) + return G + + +@not_implemented_for('multigraph') +def to_numpy_recarray(G, nodelist=None, dtype=None, order=None): + """Returns the graph adjacency matrix as a NumPy recarray. + + Parameters + ---------- + G : graph + The NetworkX graph used to construct the NumPy matrix. + + nodelist : list, optional + The rows and columns are ordered according to the nodes in `nodelist`. + If `nodelist` is None, then the ordering is produced by G.nodes(). + + dtype : NumPy data-type, optional + A valid NumPy named dtype used to initialize the NumPy recarray. + The data type names are assumed to be keys in the graph edge attribute + dictionary. + + order : {'C', 'F'}, optional + Whether to store multidimensional data in C- or Fortran-contiguous + (row- or column-wise) order in memory. If None, then the NumPy default + is used. + + Returns + ------- + M : NumPy recarray + The graph with specified edge data as a Numpy recarray + + Notes + ----- + When `nodelist` does not contain every node in `G`, the matrix is built + from the subgraph of `G` that is induced by the nodes in `nodelist`. + + Examples + -------- + >>> G = nx.Graph() + >>> G.add_edge(1, 2, weight=7.0, cost=5) + >>> A = nx.to_numpy_recarray(G, dtype=[('weight', float), ('cost', int)]) + >>> print(A.weight) + [[0. 7.] + [7. 0.]] + >>> print(A.cost) + [[0 5] + [5 0]] + + """ + if dtype is None: + dtype = [('weight', float)] + import numpy as np + if nodelist is None: + nodelist = list(G) + nodeset = set(nodelist) + if len(nodelist) != len(nodeset): + msg = "Ambiguous ordering: `nodelist` contained duplicates." + raise nx.NetworkXError(msg) + nlen = len(nodelist) + undirected = not G.is_directed() + index = dict(zip(nodelist, range(nlen))) + M = np.zeros((nlen, nlen), dtype=dtype, order=order) + + names = M.dtype.names + for u, v, attrs in G.edges(data=True): + if (u in nodeset) and (v in nodeset): + i, j = index[u], index[v] + values = tuple([attrs[n] for n in names]) + M[i, j] = values + if undirected: + M[j, i] = M[i, j] + + return M.view(np.recarray) + + +def to_scipy_sparse_matrix(G, nodelist=None, dtype=None, + weight='weight', format='csr'): + """Returns the graph adjacency matrix as a SciPy sparse matrix. + + Parameters + ---------- + G : graph + The NetworkX graph used to construct the NumPy matrix. + + nodelist : list, optional + The rows and columns are ordered according to the nodes in `nodelist`. + If `nodelist` is None, then the ordering is produced by G.nodes(). + + dtype : NumPy data-type, optional + A valid NumPy dtype used to initialize the array. If None, then the + NumPy default is used. + + weight : string or None optional (default='weight') + The edge attribute that holds the numerical value used for + the edge weight. If None then all edge weights are 1. + + format : str in {'bsr', 'csr', 'csc', 'coo', 'lil', 'dia', 'dok'} + The type of the matrix to be returned (default 'csr'). For + some algorithms different implementations of sparse matrices + can perform better. See [1]_ for details. + + Returns + ------- + M : SciPy sparse matrix + Graph adjacency matrix. + + Notes + ----- + For directed graphs, matrix entry i,j corresponds to an edge from i to j. + + The matrix entries are populated using the edge attribute held in + parameter weight. When an edge does not have that attribute, the + value of the entry is 1. + + For multiple edges the matrix values are the sums of the edge weights. + + When `nodelist` does not contain every node in `G`, the matrix is built + from the subgraph of `G` that is induced by the nodes in `nodelist`. + + Uses coo_matrix format. To convert to other formats specify the + format= keyword. + + The convention used for self-loop edges in graphs is to assign the + diagonal matrix entry value to the weight attribute of the edge + (or the number 1 if the edge has no weight attribute). If the + alternate convention of doubling the edge weight is desired the + resulting Scipy sparse matrix can be modified as follows: + + >>> import scipy as sp + >>> G = nx.Graph([(1, 1)]) + >>> A = nx.to_scipy_sparse_matrix(G) + >>> print(A.todense()) + [[1]] + >>> A.setdiag(A.diagonal() * 2) + >>> print(A.todense()) + [[2]] + + Examples + -------- + >>> G = nx.MultiDiGraph() + >>> G.add_edge(0, 1, weight=2) + 0 + >>> G.add_edge(1, 0) + 0 + >>> G.add_edge(2, 2, weight=3) + 0 + >>> G.add_edge(2, 2) + 1 + >>> S = nx.to_scipy_sparse_matrix(G, nodelist=[0, 1, 2]) + >>> print(S.todense()) + [[0 2 0] + [1 0 0] + [0 0 4]] + + References + ---------- + .. [1] Scipy Dev. References, "Sparse Matrices", + https://docs.scipy.org/doc/scipy/reference/sparse.html + """ + from scipy import sparse + if nodelist is None: + nodelist = list(G) + nlen = len(nodelist) + if nlen == 0: + raise nx.NetworkXError("Graph has no nodes or edges") + + if len(nodelist) != len(set(nodelist)): + msg = "Ambiguous ordering: `nodelist` contained duplicates." + raise nx.NetworkXError(msg) + + index = dict(zip(nodelist, range(nlen))) + coefficients = zip(*((index[u], index[v], d.get(weight, 1)) + for u, v, d in G.edges(nodelist, data=True) + if u in index and v in index)) + try: + row, col, data = coefficients + except ValueError: + # there is no edge in the subgraph + row, col, data = [], [], [] + + if G.is_directed(): + M = sparse.coo_matrix((data, (row, col)), + shape=(nlen, nlen), dtype=dtype) + else: + # symmetrize matrix + d = data + data + r = row + col + c = col + row + # selfloop entries get double counted when symmetrizing + # so we subtract the data on the diagonal + selfloops = list(nx.selfloop_edges(G, data=True)) + if selfloops: + diag_index, diag_data = zip(*((index[u], -d.get(weight, 1)) + for u, v, d in selfloops + if u in index and v in index)) + d += diag_data + r += diag_index + c += diag_index + M = sparse.coo_matrix((d, (r, c)), shape=(nlen, nlen), dtype=dtype) + try: + return M.asformat(format) + # From Scipy 1.1.0, asformat will throw a ValueError instead of an + # AttributeError if the format if not recognized. + except (AttributeError, ValueError): + raise nx.NetworkXError("Unknown sparse matrix format: %s" % format) + + +def _csr_gen_triples(A): + """Converts a SciPy sparse matrix in **Compressed Sparse Row** format to + an iterable of weighted edge triples. + + """ + nrows = A.shape[0] + data, indices, indptr = A.data, A.indices, A.indptr + for i in range(nrows): + for j in range(indptr[i], indptr[i + 1]): + yield i, indices[j], data[j] + + +def _csc_gen_triples(A): + """Converts a SciPy sparse matrix in **Compressed Sparse Column** format to + an iterable of weighted edge triples. + + """ + ncols = A.shape[1] + data, indices, indptr = A.data, A.indices, A.indptr + for i in range(ncols): + for j in range(indptr[i], indptr[i + 1]): + yield indices[j], i, data[j] + + +def _coo_gen_triples(A): + """Converts a SciPy sparse matrix in **Coordinate** format to an iterable + of weighted edge triples. + + """ + row, col, data = A.row, A.col, A.data + return zip(row, col, data) + + +def _dok_gen_triples(A): + """Converts a SciPy sparse matrix in **Dictionary of Keys** format to an + iterable of weighted edge triples. + + """ + for (r, c), v in A.items(): + yield r, c, v + + +def _generate_weighted_edges(A): + """Returns an iterable over (u, v, w) triples, where u and v are adjacent + vertices and w is the weight of the edge joining u and v. + + `A` is a SciPy sparse matrix (in any format). + + """ + if A.format == 'csr': + return _csr_gen_triples(A) + if A.format == 'csc': + return _csc_gen_triples(A) + if A.format == 'dok': + return _dok_gen_triples(A) + # If A is in any other format (including COO), convert it to COO format. + return _coo_gen_triples(A.tocoo()) + + +def from_scipy_sparse_matrix(A, parallel_edges=False, create_using=None, + edge_attribute='weight'): + """Creates a new graph from an adjacency matrix given as a SciPy sparse + matrix. + + Parameters + ---------- + A: scipy sparse matrix + An adjacency matrix representation of a graph + + parallel_edges : Boolean + If this is True, `create_using` is a multigraph, and `A` is an + integer matrix, then entry *(i, j)* in the matrix is interpreted as the + number of parallel edges joining vertices *i* and *j* in the graph. + If it is False, then the entries in the matrix are interpreted as + the weight of a single edge joining the vertices. + + create_using : NetworkX graph constructor, optional (default=nx.Graph) + Graph type to create. If graph instance, then cleared before populated. + + edge_attribute: string + Name of edge attribute to store matrix numeric value. The data will + have the same type as the matrix entry (int, float, (real,imag)). + + Notes + ----- + For directed graphs, explicitly mention create_using=nx.Digraph, + and entry i,j of A corresponds to an edge from i to j. + + If `create_using` is :class:`networkx.MultiGraph` or + :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the + entries of `A` are of type :class:`int`, then this function returns a + multigraph (constructed from `create_using`) with parallel edges. + In this case, `edge_attribute` will be ignored. + + If `create_using` indicates an undirected multigraph, then only the edges + indicated by the upper triangle of the matrix `A` will be added to the + graph. + + Examples + -------- + >>> import scipy as sp + >>> A = sp.sparse.eye(2, 2, 1) + >>> G = nx.from_scipy_sparse_matrix(A) + + If `create_using` indicates a multigraph and the matrix has only integer + entries and `parallel_edges` is False, then the entries will be treated + as weights for edges joining the nodes (without creating parallel edges): + + >>> A = sp.sparse.csr_matrix([[1, 1], [1, 2]]) + >>> G = nx.from_scipy_sparse_matrix(A, create_using=nx.MultiGraph) + >>> G[1][1] + AtlasView({0: {'weight': 2}}) + + If `create_using` indicates a multigraph and the matrix has only integer + entries and `parallel_edges` is True, then the entries will be treated + as the number of parallel edges joining those two vertices: + + >>> A = sp.sparse.csr_matrix([[1, 1], [1, 2]]) + >>> G = nx.from_scipy_sparse_matrix(A, parallel_edges=True, + ... create_using=nx.MultiGraph) + >>> G[1][1] + AtlasView({0: {'weight': 1}, 1: {'weight': 1}}) + + """ + G = nx.empty_graph(0, create_using) + n, m = A.shape + if n != m: + raise nx.NetworkXError( + "Adjacency matrix is not square. nx,ny=%s" % (A.shape,)) + # Make sure we get even the isolated nodes of the graph. + G.add_nodes_from(range(n)) + # Create an iterable over (u, v, w) triples and for each triple, add an + # edge from u to v with weight w. + triples = _generate_weighted_edges(A) + # If the entries in the adjacency matrix are integers, the graph is a + # multigraph, and parallel_edges is True, then create parallel edges, each + # with weight 1, for each entry in the adjacency matrix. Otherwise, create + # one edge for each positive entry in the adjacency matrix and set the + # weight of that edge to be the entry in the matrix. + if A.dtype.kind in ('i', 'u') and G.is_multigraph() and parallel_edges: + chain = itertools.chain.from_iterable + # The following line is equivalent to: + # + # for (u, v) in edges: + # for d in range(A[u, v]): + # G.add_edge(u, v, weight=1) + # + triples = chain(((u, v, 1) for d in range(w)) for (u, v, w) in triples) + # If we are creating an undirected multigraph, only add the edges from the + # upper triangle of the matrix. Otherwise, add all the edges. This relies + # on the fact that the vertices created in the + # `_generated_weighted_edges()` function are actually the row/column + # indices for the matrix `A`. + # + # Without this check, we run into a problem where each edge is added twice + # when `G.add_weighted_edges_from()` is invoked below. + if G.is_multigraph() and not G.is_directed(): + triples = ((u, v, d) for u, v, d in triples if u <= v) + G.add_weighted_edges_from(triples, weight=edge_attribute) + return G + + +def to_numpy_array(G, nodelist=None, dtype=None, order=None, + multigraph_weight=sum, weight='weight', nonedge=0.0): + """Returns the graph adjacency matrix as a NumPy array. + + Parameters + ---------- + G : graph + The NetworkX graph used to construct the NumPy array. + + nodelist : list, optional + The rows and columns are ordered according to the nodes in `nodelist`. + If `nodelist` is None, then the ordering is produced by G.nodes(). + + dtype : NumPy data type, optional + A valid single NumPy data type used to initialize the array. + This must be a simple type such as int or numpy.float64 and + not a compound data type (see to_numpy_recarray) + If None, then the NumPy default is used. + + order : {'C', 'F'}, optional + Whether to store multidimensional data in C- or Fortran-contiguous + (row- or column-wise) order in memory. If None, then the NumPy default + is used. + + multigraph_weight : {sum, min, max}, optional + An operator that determines how weights in multigraphs are handled. + The default is to sum the weights of the multiple edges. + + weight : string or None optional (default = 'weight') + The edge attribute that holds the numerical value used for + the edge weight. If an edge does not have that attribute, then the + value 1 is used instead. + + nonedge : float (default = 0.0) + The array values corresponding to nonedges are typically set to zero. + However, this could be undesirable if there are array values + corresponding to actual edges that also have the value zero. If so, + one might prefer nonedges to have some other value, such as nan. + + Returns + ------- + A : NumPy ndarray + Graph adjacency matrix + + See Also + -------- + from_numpy_array + + Notes + ----- + For directed graphs, entry i,j corresponds to an edge from i to j. + + Entries in the adjacency matrix are assigned to the weight edge attribute. + When an edge does not have a weight attribute, the value of the entry is + set to the number 1. For multiple (parallel) edges, the values of the + entries are determined by the `multigraph_weight` parameter. The default is + to sum the weight attributes for each of the parallel edges. + + When `nodelist` does not contain every node in `G`, the adjacency matrix is + built from the subgraph of `G` that is induced by the nodes in `nodelist`. + + The convention used for self-loop edges in graphs is to assign the + diagonal array entry value to the weight attribute of the edge + (or the number 1 if the edge has no weight attribute). If the + alternate convention of doubling the edge weight is desired the + resulting NumPy array can be modified as follows: + + >>> import numpy as np + >>> G = nx.Graph([(1, 1)]) + >>> A = nx.to_numpy_array(G) + >>> A + array([[1.]]) + >>> A[np.diag_indices_from(A)] *= 2 + >>> A + array([[2.]]) + + Examples + -------- + >>> G = nx.MultiDiGraph() + >>> G.add_edge(0, 1, weight=2) + 0 + >>> G.add_edge(1, 0) + 0 + >>> G.add_edge(2, 2, weight=3) + 0 + >>> G.add_edge(2, 2) + 1 + >>> nx.to_numpy_array(G, nodelist=[0, 1, 2]) + array([[0., 2., 0.], + [1., 0., 0.], + [0., 0., 4.]]) + + """ + import numpy as np + + if nodelist is None: + nodelist = list(G) + nodeset = set(nodelist) + if len(nodelist) != len(nodeset): + msg = "Ambiguous ordering: `nodelist` contained duplicates." + raise nx.NetworkXError(msg) + + nlen = len(nodelist) + undirected = not G.is_directed() + index = dict(zip(nodelist, range(nlen))) + + # Initially, we start with an array of nans. Then we populate the array + # using data from the graph. Afterwards, any leftover nans will be + # converted to the value of `nonedge`. Note, we use nans initially, + # instead of zero, for two reasons: + # + # 1) It can be important to distinguish a real edge with the value 0 + # from a nonedge with the value 0. + # + # 2) When working with multi(di)graphs, we must combine the values of all + # edges between any two nodes in some manner. This often takes the + # form of a sum, min, or max. Using the value 0 for a nonedge would + # have undesirable effects with min and max, but using nanmin and + # nanmax with initially nan values is not problematic at all. + # + # That said, there are still some drawbacks to this approach. Namely, if + # a real edge is nan, then that value is a) not distinguishable from + # nonedges and b) is ignored by the default combinator (nansum, nanmin, + # nanmax) functions used for multi(di)graphs. If this becomes an issue, + # an alternative approach is to use masked arrays. Initially, every + # element is masked and set to some `initial` value. As we populate the + # graph, elements are unmasked (automatically) when we combine the initial + # value with the values given by real edges. At the end, we convert all + # masked values to `nonedge`. Using masked arrays fully addresses reason 1, + # but for reason 2, we would still have the issue with min and max if the + # initial values were 0.0. Note: an initial value of +inf is appropriate + # for min, while an initial value of -inf is appropriate for max. When + # working with sum, an initial value of zero is appropriate. Ideally then, + # we'd want to allow users to specify both a value for nonedges and also + # an initial value. For multi(di)graphs, the choice of the initial value + # will, in general, depend on the combinator function---sensible defaults + # can be provided. + + if G.is_multigraph(): + # Handle MultiGraphs and MultiDiGraphs + A = np.full((nlen, nlen), np.nan, order=order) + # use numpy nan-aware operations + operator = {sum: np.nansum, min: np.nanmin, max: np.nanmax} + try: + op = operator[multigraph_weight] + except Exception: + raise ValueError('multigraph_weight must be sum, min, or max') + + for u, v, attrs in G.edges(data=True): + if (u in nodeset) and (v in nodeset): + i, j = index[u], index[v] + e_weight = attrs.get(weight, 1) + A[i, j] = op([e_weight, A[i, j]]) + if undirected: + A[j, i] = A[i, j] + else: + # Graph or DiGraph, this is much faster than above + A = np.full((nlen, nlen), np.nan, order=order) + for u, nbrdict in G.adjacency(): + for v, d in nbrdict.items(): + try: + A[index[u], index[v]] = d.get(weight, 1) + except KeyError: + # This occurs when there are fewer desired nodes than + # there are nodes in the graph: len(nodelist) < len(G) + pass + + A[np.isnan(A)] = nonedge + A = np.asarray(A, dtype=dtype) + return A + + +def from_numpy_array(A, parallel_edges=False, create_using=None): + """Returns a graph from NumPy array. + + The NumPy array is interpreted as an adjacency matrix for the graph. + + Parameters + ---------- + A : NumPy ndarray + An adjacency matrix representation of a graph + + parallel_edges : Boolean + If this is True, `create_using` is a multigraph, and `A` is an + integer array, then entry *(i, j)* in the array is interpreted as the + number of parallel edges joining vertices *i* and *j* in the graph. + If it is False, then the entries in the array are interpreted as + the weight of a single edge joining the vertices. + + create_using : NetworkX graph constructor, optional (default=nx.Graph) + Graph type to create. If graph instance, then cleared before populated. + + Notes + ----- + For directed graphs, explicitly mention create_using=nx.Digraph, + and entry i,j of A corresponds to an edge from i to j. + + If `create_using` is :class:`networkx.MultiGraph` or + :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the + entries of `A` are of type :class:`int`, then this function returns a + multigraph (of the same type as `create_using`) with parallel edges. + + If `create_using` indicates an undirected multigraph, then only the edges + indicated by the upper triangle of the array `A` will be added to the + graph. + + If the NumPy array has a single data type for each array entry it + will be converted to an appropriate Python data type. + + If the NumPy array has a user-specified compound data type the names + of the data fields will be used as attribute keys in the resulting + NetworkX graph. + + See Also + -------- + to_numpy_array + + Examples + -------- + Simple integer weights on edges: + + >>> import numpy as np + >>> A = np.array([[1, 1], [2, 1]]) + >>> G = nx.from_numpy_array(A) + >>> G.edges(data=True) + EdgeDataView([(0, 0, {'weight': 1}), (0, 1, {'weight': 2}), \ +(1, 1, {'weight': 1})]) + + If `create_using` indicates a multigraph and the array has only integer + entries and `parallel_edges` is False, then the entries will be treated + as weights for edges joining the nodes (without creating parallel edges): + + >>> A = np.array([[1, 1], [1, 2]]) + >>> G = nx.from_numpy_array(A, create_using=nx.MultiGraph) + >>> G[1][1] + AtlasView({0: {'weight': 2}}) + + If `create_using` indicates a multigraph and the array has only integer + entries and `parallel_edges` is True, then the entries will be treated + as the number of parallel edges joining those two vertices: + + >>> A = np.array([[1, 1], [1, 2]]) + >>> temp = nx.MultiGraph() + >>> G = nx.from_numpy_array(A, parallel_edges=True, create_using=temp) + >>> G[1][1] + AtlasView({0: {'weight': 1}, 1: {'weight': 1}}) + + User defined compound data type on edges: + + >>> dt = [('weight', float), ('cost', int)] + >>> A = np.array([[(1.0, 2)]], dtype=dt) + >>> G = nx.from_numpy_array(A) + >>> G.edges() + EdgeView([(0, 0)]) + >>> G[0][0]['cost'] + 2 + >>> G[0][0]['weight'] + 1.0 + + """ + return from_numpy_matrix(A, parallel_edges=parallel_edges, + create_using=create_using) + + +# fixture for pytest +def setup_module(module): + import pytest + numpy = pytest.importorskip('numpy') + scipy = pytest.importorskip('scipy') + pandas = pytest.importorskip('pandas')