comparison env/lib/python3.7/site-packages/networkx/convert_matrix.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 # Copyright (C) 2006-2019 by
2 # Aric Hagberg <hagberg@lanl.gov>
3 # Dan Schult <dschult@colgate.edu>
4 # Pieter Swart <swart@lanl.gov>
5 # All rights reserved.
6 # BSD license.
7 """Functions to convert NetworkX graphs to and from numpy/scipy matrices.
8
9 The preferred way of converting data to a NetworkX graph is through the
10 graph constructor. The constructor calls the to_networkx_graph() function
11 which attempts to guess the input type and convert it automatically.
12
13 Examples
14 --------
15 Create a 10 node random graph from a numpy matrix
16
17 >>> import numpy as np
18 >>> a = np.random.randint(0, 2, size=(10, 10))
19 >>> D = nx.DiGraph(a)
20
21 or equivalently
22
23 >>> D = nx.to_networkx_graph(a, create_using=nx.DiGraph)
24
25 See Also
26 --------
27 nx_agraph, nx_pydot
28 """
29
30 import itertools
31 import networkx as nx
32 from networkx.utils import not_implemented_for
33
34 __all__ = ['from_numpy_matrix', 'to_numpy_matrix',
35 'from_pandas_adjacency', 'to_pandas_adjacency',
36 'from_pandas_edgelist', 'to_pandas_edgelist',
37 'to_numpy_recarray',
38 'from_scipy_sparse_matrix', 'to_scipy_sparse_matrix',
39 'from_numpy_array', 'to_numpy_array']
40
41
42 def to_pandas_adjacency(G, nodelist=None, dtype=None, order=None,
43 multigraph_weight=sum, weight='weight', nonedge=0.0):
44 """Returns the graph adjacency matrix as a Pandas DataFrame.
45
46 Parameters
47 ----------
48 G : graph
49 The NetworkX graph used to construct the Pandas DataFrame.
50
51 nodelist : list, optional
52 The rows and columns are ordered according to the nodes in `nodelist`.
53 If `nodelist` is None, then the ordering is produced by G.nodes().
54
55 multigraph_weight : {sum, min, max}, optional
56 An operator that determines how weights in multigraphs are handled.
57 The default is to sum the weights of the multiple edges.
58
59 weight : string or None, optional
60 The edge attribute that holds the numerical value used for
61 the edge weight. If an edge does not have that attribute, then the
62 value 1 is used instead.
63
64 nonedge : float, optional
65 The matrix values corresponding to nonedges are typically set to zero.
66 However, this could be undesirable if there are matrix values
67 corresponding to actual edges that also have the value zero. If so,
68 one might prefer nonedges to have some other value, such as nan.
69
70 Returns
71 -------
72 df : Pandas DataFrame
73 Graph adjacency matrix
74
75 Notes
76 -----
77 For directed graphs, entry i,j corresponds to an edge from i to j.
78
79 The DataFrame entries are assigned to the weight edge attribute. When
80 an edge does not have a weight attribute, the value of the entry is set to
81 the number 1. For multiple (parallel) edges, the values of the entries
82 are determined by the 'multigraph_weight' parameter. The default is to
83 sum the weight attributes for each of the parallel edges.
84
85 When `nodelist` does not contain every node in `G`, the matrix is built
86 from the subgraph of `G` that is induced by the nodes in `nodelist`.
87
88 The convention used for self-loop edges in graphs is to assign the
89 diagonal matrix entry value to the weight attribute of the edge
90 (or the number 1 if the edge has no weight attribute). If the
91 alternate convention of doubling the edge weight is desired the
92 resulting Pandas DataFrame can be modified as follows:
93
94 >>> import pandas as pd
95 >>> pd.options.display.max_columns = 20
96 >>> import numpy as np
97 >>> G = nx.Graph([(1, 1)])
98 >>> df = nx.to_pandas_adjacency(G, dtype=int)
99 >>> df
100 1
101 1 1
102 >>> df.values[np.diag_indices_from(df)] *= 2
103 >>> df
104 1
105 1 2
106
107 Examples
108 --------
109 >>> G = nx.MultiDiGraph()
110 >>> G.add_edge(0, 1, weight=2)
111 0
112 >>> G.add_edge(1, 0)
113 0
114 >>> G.add_edge(2, 2, weight=3)
115 0
116 >>> G.add_edge(2, 2)
117 1
118 >>> nx.to_pandas_adjacency(G, nodelist=[0, 1, 2], dtype=int)
119 0 1 2
120 0 0 2 0
121 1 1 0 0
122 2 0 0 4
123
124 """
125 import pandas as pd
126 M = to_numpy_array(G, nodelist=nodelist, dtype=dtype, order=order,
127 multigraph_weight=multigraph_weight, weight=weight,
128 nonedge=nonedge)
129 if nodelist is None:
130 nodelist = list(G)
131 return pd.DataFrame(data=M, index=nodelist, columns=nodelist)
132
133
134 def from_pandas_adjacency(df, create_using=None):
135 r"""Returns a graph from Pandas DataFrame.
136
137 The Pandas DataFrame is interpreted as an adjacency matrix for the graph.
138
139 Parameters
140 ----------
141 df : Pandas DataFrame
142 An adjacency matrix representation of a graph
143
144 create_using : NetworkX graph constructor, optional (default=nx.Graph)
145 Graph type to create. If graph instance, then cleared before populated.
146
147 Notes
148 -----
149 For directed graphs, explicitly mention create_using=nx.Digraph,
150 and entry i,j of df corresponds to an edge from i to j.
151
152 If the numpy matrix has a single data type for each matrix entry it
153 will be converted to an appropriate Python data type.
154
155 If the numpy matrix has a user-specified compound data type the names
156 of the data fields will be used as attribute keys in the resulting
157 NetworkX graph.
158
159 See Also
160 --------
161 to_pandas_adjacency
162
163 Examples
164 --------
165 Simple integer weights on edges:
166
167 >>> import pandas as pd
168 >>> pd.options.display.max_columns = 20
169 >>> df = pd.DataFrame([[1, 1], [2, 1]])
170 >>> df
171 0 1
172 0 1 1
173 1 2 1
174 >>> G = nx.from_pandas_adjacency(df)
175 >>> G.name = 'Graph from pandas adjacency matrix'
176 >>> print(nx.info(G))
177 Name: Graph from pandas adjacency matrix
178 Type: Graph
179 Number of nodes: 2
180 Number of edges: 3
181 Average degree: 3.0000
182
183 """
184
185 try:
186 df = df[df.index]
187 except Exception:
188 msg = "%s not in columns"
189 missing = list(set(df.index).difference(set(df.columns)))
190 raise nx.NetworkXError("Columns must match Indices.", msg % missing)
191
192 A = df.values
193 G = from_numpy_matrix(A, create_using=create_using)
194
195 nx.relabel.relabel_nodes(G, dict(enumerate(df.columns)), copy=False)
196 return G
197
198
199 def to_pandas_edgelist(G, source='source', target='target', nodelist=None,
200 dtype=None, order=None):
201 """Returns the graph edge list as a Pandas DataFrame.
202
203 Parameters
204 ----------
205 G : graph
206 The NetworkX graph used to construct the Pandas DataFrame.
207
208 source : str or int, optional
209 A valid column name (string or integer) for the source nodes (for the
210 directed case).
211
212 target : str or int, optional
213 A valid column name (string or integer) for the target nodes (for the
214 directed case).
215
216 nodelist : list, optional
217 Use only nodes specified in nodelist
218
219 Returns
220 -------
221 df : Pandas DataFrame
222 Graph edge list
223
224 Examples
225 --------
226 >>> G = nx.Graph([('A', 'B', {'cost': 1, 'weight': 7}),
227 ... ('C', 'E', {'cost': 9, 'weight': 10})])
228 >>> df = nx.to_pandas_edgelist(G, nodelist=['A', 'C'])
229 >>> df[['source', 'target', 'cost', 'weight']]
230 source target cost weight
231 0 A B 1 7
232 1 C E 9 10
233
234 """
235 import pandas as pd
236 if nodelist is None:
237 edgelist = G.edges(data=True)
238 else:
239 edgelist = G.edges(nodelist, data=True)
240 source_nodes = [s for s, t, d in edgelist]
241 target_nodes = [t for s, t, d in edgelist]
242 all_keys = set().union(*(d.keys() for s, t, d in edgelist))
243 edge_attr = {k: [d.get(k, float("nan")) for s, t, d in edgelist]
244 for k in all_keys}
245 edgelistdict = {source: source_nodes, target: target_nodes}
246 edgelistdict.update(edge_attr)
247 return pd.DataFrame(edgelistdict)
248
249
250 def from_pandas_edgelist(df, source='source', target='target', edge_attr=None,
251 create_using=None):
252 """Returns a graph from Pandas DataFrame containing an edge list.
253
254 The Pandas DataFrame should contain at least two columns of node names and
255 zero or more columns of edge attributes. Each row will be processed as one
256 edge instance.
257
258 Note: This function iterates over DataFrame.values, which is not
259 guaranteed to retain the data type across columns in the row. This is only
260 a problem if your row is entirely numeric and a mix of ints and floats. In
261 that case, all values will be returned as floats. See the
262 DataFrame.iterrows documentation for an example.
263
264 Parameters
265 ----------
266 df : Pandas DataFrame
267 An edge list representation of a graph
268
269 source : str or int
270 A valid column name (string or integer) for the source nodes (for the
271 directed case).
272
273 target : str or int
274 A valid column name (string or integer) for the target nodes (for the
275 directed case).
276
277 edge_attr : str or int, iterable, True, or None
278 A valid column name (str or int) or iterable of column names that are
279 used to retrieve items and add them to the graph as edge attributes.
280 If `True`, all of the remaining columns will be added.
281 If `None`, no edge attributes are added to the graph.
282
283 create_using : NetworkX graph constructor, optional (default=nx.Graph)
284 Graph type to create. If graph instance, then cleared before populated.
285
286 See Also
287 --------
288 to_pandas_edgelist
289
290 Examples
291 --------
292 Simple integer weights on edges:
293
294 >>> import pandas as pd
295 >>> pd.options.display.max_columns = 20
296 >>> import numpy as np
297 >>> rng = np.random.RandomState(seed=5)
298 >>> ints = rng.randint(1, 11, size=(3,2))
299 >>> a = ['A', 'B', 'C']
300 >>> b = ['D', 'A', 'E']
301 >>> df = pd.DataFrame(ints, columns=['weight', 'cost'])
302 >>> df[0] = a
303 >>> df['b'] = b
304 >>> df[['weight', 'cost', 0, 'b']]
305 weight cost 0 b
306 0 4 7 A D
307 1 7 1 B A
308 2 10 9 C E
309 >>> G = nx.from_pandas_edgelist(df, 0, 'b', ['weight', 'cost'])
310 >>> G['E']['C']['weight']
311 10
312 >>> G['E']['C']['cost']
313 9
314 >>> edges = pd.DataFrame({'source': [0, 1, 2],
315 ... 'target': [2, 2, 3],
316 ... 'weight': [3, 4, 5],
317 ... 'color': ['red', 'blue', 'blue']})
318 >>> G = nx.from_pandas_edgelist(edges, edge_attr=True)
319 >>> G[0][2]['color']
320 'red'
321
322 """
323 g = nx.empty_graph(0, create_using)
324
325 if edge_attr is None:
326 g.add_edges_from(zip(df[source], df[target]))
327 return g
328
329 # Additional columns requested
330 if edge_attr is True:
331 cols = [c for c in df.columns if c is not source and c is not target]
332 elif isinstance(edge_attr, (list, tuple)):
333 cols = edge_attr
334 else:
335 cols = [edge_attr]
336 if len(cols) == 0:
337 msg = "Invalid edge_attr argument. No columns found with name: %s"
338 raise nx.NetworkXError(msg % cols)
339
340 try:
341 eattrs = zip(*[df[col] for col in cols])
342 except (KeyError, TypeError) as e:
343 msg = "Invalid edge_attr argument: %s" % edge_attr
344 raise nx.NetworkXError(msg)
345 for s, t, attrs in zip(df[source], df[target], eattrs):
346 if g.is_multigraph():
347 key = g.add_edge(s, t)
348 g[s][t][key].update(zip(cols, attrs))
349 else:
350 g.add_edge(s, t)
351 g[s][t].update(zip(cols, attrs))
352
353 return g
354
355
356 def to_numpy_matrix(G, nodelist=None, dtype=None, order=None,
357 multigraph_weight=sum, weight='weight', nonedge=0.0):
358 """Returns the graph adjacency matrix as a NumPy matrix.
359
360 Parameters
361 ----------
362 G : graph
363 The NetworkX graph used to construct the NumPy matrix.
364
365 nodelist : list, optional
366 The rows and columns are ordered according to the nodes in `nodelist`.
367 If `nodelist` is None, then the ordering is produced by G.nodes().
368
369 dtype : NumPy data type, optional
370 A valid single NumPy data type used to initialize the array.
371 This must be a simple type such as int or numpy.float64 and
372 not a compound data type (see to_numpy_recarray)
373 If None, then the NumPy default is used.
374
375 order : {'C', 'F'}, optional
376 Whether to store multidimensional data in C- or Fortran-contiguous
377 (row- or column-wise) order in memory. If None, then the NumPy default
378 is used.
379
380 multigraph_weight : {sum, min, max}, optional
381 An operator that determines how weights in multigraphs are handled.
382 The default is to sum the weights of the multiple edges.
383
384 weight : string or None optional (default = 'weight')
385 The edge attribute that holds the numerical value used for
386 the edge weight. If an edge does not have that attribute, then the
387 value 1 is used instead.
388
389 nonedge : float (default = 0.0)
390 The matrix values corresponding to nonedges are typically set to zero.
391 However, this could be undesirable if there are matrix values
392 corresponding to actual edges that also have the value zero. If so,
393 one might prefer nonedges to have some other value, such as nan.
394
395 Returns
396 -------
397 M : NumPy matrix
398 Graph adjacency matrix
399
400 See Also
401 --------
402 to_numpy_recarray, from_numpy_matrix
403
404 Notes
405 -----
406 For directed graphs, entry i,j corresponds to an edge from i to j.
407
408 The matrix entries are assigned to the weight edge attribute. When
409 an edge does not have a weight attribute, the value of the entry is set to
410 the number 1. For multiple (parallel) edges, the values of the entries
411 are determined by the `multigraph_weight` parameter. The default is to
412 sum the weight attributes for each of the parallel edges.
413
414 When `nodelist` does not contain every node in `G`, the matrix is built
415 from the subgraph of `G` that is induced by the nodes in `nodelist`.
416
417 The convention used for self-loop edges in graphs is to assign the
418 diagonal matrix entry value to the weight attribute of the edge
419 (or the number 1 if the edge has no weight attribute). If the
420 alternate convention of doubling the edge weight is desired the
421 resulting Numpy matrix can be modified as follows:
422
423 >>> import numpy as np
424 >>> G = nx.Graph([(1, 1)])
425 >>> A = nx.to_numpy_matrix(G)
426 >>> A
427 matrix([[1.]])
428 >>> A[np.diag_indices_from(A)] *= 2
429 >>> A
430 matrix([[2.]])
431
432 Examples
433 --------
434 >>> G = nx.MultiDiGraph()
435 >>> G.add_edge(0, 1, weight=2)
436 0
437 >>> G.add_edge(1, 0)
438 0
439 >>> G.add_edge(2, 2, weight=3)
440 0
441 >>> G.add_edge(2, 2)
442 1
443 >>> nx.to_numpy_matrix(G, nodelist=[0, 1, 2])
444 matrix([[0., 2., 0.],
445 [1., 0., 0.],
446 [0., 0., 4.]])
447
448 """
449 import numpy as np
450
451 A = to_numpy_array(G, nodelist=nodelist, dtype=dtype, order=order,
452 multigraph_weight=multigraph_weight, weight=weight,
453 nonedge=nonedge)
454 M = np.asmatrix(A, dtype=dtype)
455 return M
456
457
458 def from_numpy_matrix(A, parallel_edges=False, create_using=None):
459 """Returns a graph from numpy matrix.
460
461 The numpy matrix is interpreted as an adjacency matrix for the graph.
462
463 Parameters
464 ----------
465 A : numpy matrix
466 An adjacency matrix representation of a graph
467
468 parallel_edges : Boolean
469 If True, `create_using` is a multigraph, and `A` is an
470 integer matrix, then entry *(i, j)* in the matrix is interpreted as the
471 number of parallel edges joining vertices *i* and *j* in the graph.
472 If False, then the entries in the adjacency matrix are interpreted as
473 the weight of a single edge joining the vertices.
474
475 create_using : NetworkX graph constructor, optional (default=nx.Graph)
476 Graph type to create. If graph instance, then cleared before populated.
477
478 Notes
479 -----
480 For directed graphs, explicitly mention create_using=nx.Digraph,
481 and entry i,j of A corresponds to an edge from i to j.
482
483 If `create_using` is :class:`networkx.MultiGraph` or
484 :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the
485 entries of `A` are of type :class:`int`, then this function returns a
486 multigraph (constructed from `create_using`) with parallel edges.
487
488 If `create_using` indicates an undirected multigraph, then only the edges
489 indicated by the upper triangle of the matrix `A` will be added to the
490 graph.
491
492 If the numpy matrix has a single data type for each matrix entry it
493 will be converted to an appropriate Python data type.
494
495 If the numpy matrix has a user-specified compound data type the names
496 of the data fields will be used as attribute keys in the resulting
497 NetworkX graph.
498
499 See Also
500 --------
501 to_numpy_matrix, to_numpy_recarray
502
503 Examples
504 --------
505 Simple integer weights on edges:
506
507 >>> import numpy as np
508 >>> A = np.array([[1, 1], [2, 1]])
509 >>> G = nx.from_numpy_matrix(A)
510
511 If `create_using` indicates a multigraph and the matrix has only integer
512 entries and `parallel_edges` is False, then the entries will be treated
513 as weights for edges joining the nodes (without creating parallel edges):
514
515 >>> A = np.array([[1, 1], [1, 2]])
516 >>> G = nx.from_numpy_matrix(A, create_using=nx.MultiGraph)
517 >>> G[1][1]
518 AtlasView({0: {'weight': 2}})
519
520 If `create_using` indicates a multigraph and the matrix has only integer
521 entries and `parallel_edges` is True, then the entries will be treated
522 as the number of parallel edges joining those two vertices:
523
524 >>> A = np.array([[1, 1], [1, 2]])
525 >>> temp = nx.MultiGraph()
526 >>> G = nx.from_numpy_matrix(A, parallel_edges=True, create_using=temp)
527 >>> G[1][1]
528 AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
529
530 User defined compound data type on edges:
531
532 >>> dt = [('weight', float), ('cost', int)]
533 >>> A = np.array([[(1.0, 2)]], dtype=dt)
534 >>> G = nx.from_numpy_matrix(A)
535 >>> list(G.edges())
536 [(0, 0)]
537 >>> G[0][0]['cost']
538 2
539 >>> G[0][0]['weight']
540 1.0
541
542 """
543 # This should never fail if you have created a numpy matrix with numpy...
544 import numpy as np
545 kind_to_python_type = {'f': float,
546 'i': int,
547 'u': int,
548 'b': bool,
549 'c': complex,
550 'S': str,
551 'V': 'void'}
552 try: # Python 3.x
553 blurb = chr(1245) # just to trigger the exception
554 kind_to_python_type['U'] = str
555 except ValueError: # Python 2.7
556 kind_to_python_type['U'] = unicode
557 G = nx.empty_graph(0, create_using)
558 n, m = A.shape
559 if n != m:
560 raise nx.NetworkXError("Adjacency matrix is not square.",
561 "nx,ny=%s" % (A.shape,))
562 dt = A.dtype
563 try:
564 python_type = kind_to_python_type[dt.kind]
565 except Exception:
566 raise TypeError("Unknown numpy data type: %s" % dt)
567
568 # Make sure we get even the isolated nodes of the graph.
569 G.add_nodes_from(range(n))
570 # Get a list of all the entries in the matrix with nonzero entries. These
571 # coordinates will become the edges in the graph.
572 edges = map(lambda e: (int(e[0]), int(e[1])),
573 zip(*(np.asarray(A).nonzero())))
574 # handle numpy constructed data type
575 if python_type == 'void':
576 # Sort the fields by their offset, then by dtype, then by name.
577 fields = sorted((offset, dtype, name) for name, (dtype, offset) in
578 A.dtype.fields.items())
579 triples = ((u, v, {name: kind_to_python_type[dtype.kind](val)
580 for (_, dtype, name), val in zip(fields, A[u, v])})
581 for u, v in edges)
582 # If the entries in the adjacency matrix are integers, the graph is a
583 # multigraph, and parallel_edges is True, then create parallel edges, each
584 # with weight 1, for each entry in the adjacency matrix. Otherwise, create
585 # one edge for each positive entry in the adjacency matrix and set the
586 # weight of that edge to be the entry in the matrix.
587 elif python_type is int and G.is_multigraph() and parallel_edges:
588 chain = itertools.chain.from_iterable
589 # The following line is equivalent to:
590 #
591 # for (u, v) in edges:
592 # for d in range(A[u, v]):
593 # G.add_edge(u, v, weight=1)
594 #
595 triples = chain(((u, v, dict(weight=1)) for d in range(A[u, v]))
596 for (u, v) in edges)
597 else: # basic data type
598 triples = ((u, v, dict(weight=python_type(A[u, v])))
599 for u, v in edges)
600 # If we are creating an undirected multigraph, only add the edges from the
601 # upper triangle of the matrix. Otherwise, add all the edges. This relies
602 # on the fact that the vertices created in the
603 # `_generated_weighted_edges()` function are actually the row/column
604 # indices for the matrix `A`.
605 #
606 # Without this check, we run into a problem where each edge is added twice
607 # when `G.add_edges_from()` is invoked below.
608 if G.is_multigraph() and not G.is_directed():
609 triples = ((u, v, d) for u, v, d in triples if u <= v)
610 G.add_edges_from(triples)
611 return G
612
613
614 @not_implemented_for('multigraph')
615 def to_numpy_recarray(G, nodelist=None, dtype=None, order=None):
616 """Returns the graph adjacency matrix as a NumPy recarray.
617
618 Parameters
619 ----------
620 G : graph
621 The NetworkX graph used to construct the NumPy matrix.
622
623 nodelist : list, optional
624 The rows and columns are ordered according to the nodes in `nodelist`.
625 If `nodelist` is None, then the ordering is produced by G.nodes().
626
627 dtype : NumPy data-type, optional
628 A valid NumPy named dtype used to initialize the NumPy recarray.
629 The data type names are assumed to be keys in the graph edge attribute
630 dictionary.
631
632 order : {'C', 'F'}, optional
633 Whether to store multidimensional data in C- or Fortran-contiguous
634 (row- or column-wise) order in memory. If None, then the NumPy default
635 is used.
636
637 Returns
638 -------
639 M : NumPy recarray
640 The graph with specified edge data as a Numpy recarray
641
642 Notes
643 -----
644 When `nodelist` does not contain every node in `G`, the matrix is built
645 from the subgraph of `G` that is induced by the nodes in `nodelist`.
646
647 Examples
648 --------
649 >>> G = nx.Graph()
650 >>> G.add_edge(1, 2, weight=7.0, cost=5)
651 >>> A = nx.to_numpy_recarray(G, dtype=[('weight', float), ('cost', int)])
652 >>> print(A.weight)
653 [[0. 7.]
654 [7. 0.]]
655 >>> print(A.cost)
656 [[0 5]
657 [5 0]]
658
659 """
660 if dtype is None:
661 dtype = [('weight', float)]
662 import numpy as np
663 if nodelist is None:
664 nodelist = list(G)
665 nodeset = set(nodelist)
666 if len(nodelist) != len(nodeset):
667 msg = "Ambiguous ordering: `nodelist` contained duplicates."
668 raise nx.NetworkXError(msg)
669 nlen = len(nodelist)
670 undirected = not G.is_directed()
671 index = dict(zip(nodelist, range(nlen)))
672 M = np.zeros((nlen, nlen), dtype=dtype, order=order)
673
674 names = M.dtype.names
675 for u, v, attrs in G.edges(data=True):
676 if (u in nodeset) and (v in nodeset):
677 i, j = index[u], index[v]
678 values = tuple([attrs[n] for n in names])
679 M[i, j] = values
680 if undirected:
681 M[j, i] = M[i, j]
682
683 return M.view(np.recarray)
684
685
686 def to_scipy_sparse_matrix(G, nodelist=None, dtype=None,
687 weight='weight', format='csr'):
688 """Returns the graph adjacency matrix as a SciPy sparse matrix.
689
690 Parameters
691 ----------
692 G : graph
693 The NetworkX graph used to construct the NumPy matrix.
694
695 nodelist : list, optional
696 The rows and columns are ordered according to the nodes in `nodelist`.
697 If `nodelist` is None, then the ordering is produced by G.nodes().
698
699 dtype : NumPy data-type, optional
700 A valid NumPy dtype used to initialize the array. If None, then the
701 NumPy default is used.
702
703 weight : string or None optional (default='weight')
704 The edge attribute that holds the numerical value used for
705 the edge weight. If None then all edge weights are 1.
706
707 format : str in {'bsr', 'csr', 'csc', 'coo', 'lil', 'dia', 'dok'}
708 The type of the matrix to be returned (default 'csr'). For
709 some algorithms different implementations of sparse matrices
710 can perform better. See [1]_ for details.
711
712 Returns
713 -------
714 M : SciPy sparse matrix
715 Graph adjacency matrix.
716
717 Notes
718 -----
719 For directed graphs, matrix entry i,j corresponds to an edge from i to j.
720
721 The matrix entries are populated using the edge attribute held in
722 parameter weight. When an edge does not have that attribute, the
723 value of the entry is 1.
724
725 For multiple edges the matrix values are the sums of the edge weights.
726
727 When `nodelist` does not contain every node in `G`, the matrix is built
728 from the subgraph of `G` that is induced by the nodes in `nodelist`.
729
730 Uses coo_matrix format. To convert to other formats specify the
731 format= keyword.
732
733 The convention used for self-loop edges in graphs is to assign the
734 diagonal matrix entry value to the weight attribute of the edge
735 (or the number 1 if the edge has no weight attribute). If the
736 alternate convention of doubling the edge weight is desired the
737 resulting Scipy sparse matrix can be modified as follows:
738
739 >>> import scipy as sp
740 >>> G = nx.Graph([(1, 1)])
741 >>> A = nx.to_scipy_sparse_matrix(G)
742 >>> print(A.todense())
743 [[1]]
744 >>> A.setdiag(A.diagonal() * 2)
745 >>> print(A.todense())
746 [[2]]
747
748 Examples
749 --------
750 >>> G = nx.MultiDiGraph()
751 >>> G.add_edge(0, 1, weight=2)
752 0
753 >>> G.add_edge(1, 0)
754 0
755 >>> G.add_edge(2, 2, weight=3)
756 0
757 >>> G.add_edge(2, 2)
758 1
759 >>> S = nx.to_scipy_sparse_matrix(G, nodelist=[0, 1, 2])
760 >>> print(S.todense())
761 [[0 2 0]
762 [1 0 0]
763 [0 0 4]]
764
765 References
766 ----------
767 .. [1] Scipy Dev. References, "Sparse Matrices",
768 https://docs.scipy.org/doc/scipy/reference/sparse.html
769 """
770 from scipy import sparse
771 if nodelist is None:
772 nodelist = list(G)
773 nlen = len(nodelist)
774 if nlen == 0:
775 raise nx.NetworkXError("Graph has no nodes or edges")
776
777 if len(nodelist) != len(set(nodelist)):
778 msg = "Ambiguous ordering: `nodelist` contained duplicates."
779 raise nx.NetworkXError(msg)
780
781 index = dict(zip(nodelist, range(nlen)))
782 coefficients = zip(*((index[u], index[v], d.get(weight, 1))
783 for u, v, d in G.edges(nodelist, data=True)
784 if u in index and v in index))
785 try:
786 row, col, data = coefficients
787 except ValueError:
788 # there is no edge in the subgraph
789 row, col, data = [], [], []
790
791 if G.is_directed():
792 M = sparse.coo_matrix((data, (row, col)),
793 shape=(nlen, nlen), dtype=dtype)
794 else:
795 # symmetrize matrix
796 d = data + data
797 r = row + col
798 c = col + row
799 # selfloop entries get double counted when symmetrizing
800 # so we subtract the data on the diagonal
801 selfloops = list(nx.selfloop_edges(G, data=True))
802 if selfloops:
803 diag_index, diag_data = zip(*((index[u], -d.get(weight, 1))
804 for u, v, d in selfloops
805 if u in index and v in index))
806 d += diag_data
807 r += diag_index
808 c += diag_index
809 M = sparse.coo_matrix((d, (r, c)), shape=(nlen, nlen), dtype=dtype)
810 try:
811 return M.asformat(format)
812 # From Scipy 1.1.0, asformat will throw a ValueError instead of an
813 # AttributeError if the format if not recognized.
814 except (AttributeError, ValueError):
815 raise nx.NetworkXError("Unknown sparse matrix format: %s" % format)
816
817
818 def _csr_gen_triples(A):
819 """Converts a SciPy sparse matrix in **Compressed Sparse Row** format to
820 an iterable of weighted edge triples.
821
822 """
823 nrows = A.shape[0]
824 data, indices, indptr = A.data, A.indices, A.indptr
825 for i in range(nrows):
826 for j in range(indptr[i], indptr[i + 1]):
827 yield i, indices[j], data[j]
828
829
830 def _csc_gen_triples(A):
831 """Converts a SciPy sparse matrix in **Compressed Sparse Column** format to
832 an iterable of weighted edge triples.
833
834 """
835 ncols = A.shape[1]
836 data, indices, indptr = A.data, A.indices, A.indptr
837 for i in range(ncols):
838 for j in range(indptr[i], indptr[i + 1]):
839 yield indices[j], i, data[j]
840
841
842 def _coo_gen_triples(A):
843 """Converts a SciPy sparse matrix in **Coordinate** format to an iterable
844 of weighted edge triples.
845
846 """
847 row, col, data = A.row, A.col, A.data
848 return zip(row, col, data)
849
850
851 def _dok_gen_triples(A):
852 """Converts a SciPy sparse matrix in **Dictionary of Keys** format to an
853 iterable of weighted edge triples.
854
855 """
856 for (r, c), v in A.items():
857 yield r, c, v
858
859
860 def _generate_weighted_edges(A):
861 """Returns an iterable over (u, v, w) triples, where u and v are adjacent
862 vertices and w is the weight of the edge joining u and v.
863
864 `A` is a SciPy sparse matrix (in any format).
865
866 """
867 if A.format == 'csr':
868 return _csr_gen_triples(A)
869 if A.format == 'csc':
870 return _csc_gen_triples(A)
871 if A.format == 'dok':
872 return _dok_gen_triples(A)
873 # If A is in any other format (including COO), convert it to COO format.
874 return _coo_gen_triples(A.tocoo())
875
876
877 def from_scipy_sparse_matrix(A, parallel_edges=False, create_using=None,
878 edge_attribute='weight'):
879 """Creates a new graph from an adjacency matrix given as a SciPy sparse
880 matrix.
881
882 Parameters
883 ----------
884 A: scipy sparse matrix
885 An adjacency matrix representation of a graph
886
887 parallel_edges : Boolean
888 If this is True, `create_using` is a multigraph, and `A` is an
889 integer matrix, then entry *(i, j)* in the matrix is interpreted as the
890 number of parallel edges joining vertices *i* and *j* in the graph.
891 If it is False, then the entries in the matrix are interpreted as
892 the weight of a single edge joining the vertices.
893
894 create_using : NetworkX graph constructor, optional (default=nx.Graph)
895 Graph type to create. If graph instance, then cleared before populated.
896
897 edge_attribute: string
898 Name of edge attribute to store matrix numeric value. The data will
899 have the same type as the matrix entry (int, float, (real,imag)).
900
901 Notes
902 -----
903 For directed graphs, explicitly mention create_using=nx.Digraph,
904 and entry i,j of A corresponds to an edge from i to j.
905
906 If `create_using` is :class:`networkx.MultiGraph` or
907 :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the
908 entries of `A` are of type :class:`int`, then this function returns a
909 multigraph (constructed from `create_using`) with parallel edges.
910 In this case, `edge_attribute` will be ignored.
911
912 If `create_using` indicates an undirected multigraph, then only the edges
913 indicated by the upper triangle of the matrix `A` will be added to the
914 graph.
915
916 Examples
917 --------
918 >>> import scipy as sp
919 >>> A = sp.sparse.eye(2, 2, 1)
920 >>> G = nx.from_scipy_sparse_matrix(A)
921
922 If `create_using` indicates a multigraph and the matrix has only integer
923 entries and `parallel_edges` is False, then the entries will be treated
924 as weights for edges joining the nodes (without creating parallel edges):
925
926 >>> A = sp.sparse.csr_matrix([[1, 1], [1, 2]])
927 >>> G = nx.from_scipy_sparse_matrix(A, create_using=nx.MultiGraph)
928 >>> G[1][1]
929 AtlasView({0: {'weight': 2}})
930
931 If `create_using` indicates a multigraph and the matrix has only integer
932 entries and `parallel_edges` is True, then the entries will be treated
933 as the number of parallel edges joining those two vertices:
934
935 >>> A = sp.sparse.csr_matrix([[1, 1], [1, 2]])
936 >>> G = nx.from_scipy_sparse_matrix(A, parallel_edges=True,
937 ... create_using=nx.MultiGraph)
938 >>> G[1][1]
939 AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
940
941 """
942 G = nx.empty_graph(0, create_using)
943 n, m = A.shape
944 if n != m:
945 raise nx.NetworkXError(
946 "Adjacency matrix is not square. nx,ny=%s" % (A.shape,))
947 # Make sure we get even the isolated nodes of the graph.
948 G.add_nodes_from(range(n))
949 # Create an iterable over (u, v, w) triples and for each triple, add an
950 # edge from u to v with weight w.
951 triples = _generate_weighted_edges(A)
952 # If the entries in the adjacency matrix are integers, the graph is a
953 # multigraph, and parallel_edges is True, then create parallel edges, each
954 # with weight 1, for each entry in the adjacency matrix. Otherwise, create
955 # one edge for each positive entry in the adjacency matrix and set the
956 # weight of that edge to be the entry in the matrix.
957 if A.dtype.kind in ('i', 'u') and G.is_multigraph() and parallel_edges:
958 chain = itertools.chain.from_iterable
959 # The following line is equivalent to:
960 #
961 # for (u, v) in edges:
962 # for d in range(A[u, v]):
963 # G.add_edge(u, v, weight=1)
964 #
965 triples = chain(((u, v, 1) for d in range(w)) for (u, v, w) in triples)
966 # If we are creating an undirected multigraph, only add the edges from the
967 # upper triangle of the matrix. Otherwise, add all the edges. This relies
968 # on the fact that the vertices created in the
969 # `_generated_weighted_edges()` function are actually the row/column
970 # indices for the matrix `A`.
971 #
972 # Without this check, we run into a problem where each edge is added twice
973 # when `G.add_weighted_edges_from()` is invoked below.
974 if G.is_multigraph() and not G.is_directed():
975 triples = ((u, v, d) for u, v, d in triples if u <= v)
976 G.add_weighted_edges_from(triples, weight=edge_attribute)
977 return G
978
979
980 def to_numpy_array(G, nodelist=None, dtype=None, order=None,
981 multigraph_weight=sum, weight='weight', nonedge=0.0):
982 """Returns the graph adjacency matrix as a NumPy array.
983
984 Parameters
985 ----------
986 G : graph
987 The NetworkX graph used to construct the NumPy array.
988
989 nodelist : list, optional
990 The rows and columns are ordered according to the nodes in `nodelist`.
991 If `nodelist` is None, then the ordering is produced by G.nodes().
992
993 dtype : NumPy data type, optional
994 A valid single NumPy data type used to initialize the array.
995 This must be a simple type such as int or numpy.float64 and
996 not a compound data type (see to_numpy_recarray)
997 If None, then the NumPy default is used.
998
999 order : {'C', 'F'}, optional
1000 Whether to store multidimensional data in C- or Fortran-contiguous
1001 (row- or column-wise) order in memory. If None, then the NumPy default
1002 is used.
1003
1004 multigraph_weight : {sum, min, max}, optional
1005 An operator that determines how weights in multigraphs are handled.
1006 The default is to sum the weights of the multiple edges.
1007
1008 weight : string or None optional (default = 'weight')
1009 The edge attribute that holds the numerical value used for
1010 the edge weight. If an edge does not have that attribute, then the
1011 value 1 is used instead.
1012
1013 nonedge : float (default = 0.0)
1014 The array values corresponding to nonedges are typically set to zero.
1015 However, this could be undesirable if there are array values
1016 corresponding to actual edges that also have the value zero. If so,
1017 one might prefer nonedges to have some other value, such as nan.
1018
1019 Returns
1020 -------
1021 A : NumPy ndarray
1022 Graph adjacency matrix
1023
1024 See Also
1025 --------
1026 from_numpy_array
1027
1028 Notes
1029 -----
1030 For directed graphs, entry i,j corresponds to an edge from i to j.
1031
1032 Entries in the adjacency matrix are assigned to the weight edge attribute.
1033 When an edge does not have a weight attribute, the value of the entry is
1034 set to the number 1. For multiple (parallel) edges, the values of the
1035 entries are determined by the `multigraph_weight` parameter. The default is
1036 to sum the weight attributes for each of the parallel edges.
1037
1038 When `nodelist` does not contain every node in `G`, the adjacency matrix is
1039 built from the subgraph of `G` that is induced by the nodes in `nodelist`.
1040
1041 The convention used for self-loop edges in graphs is to assign the
1042 diagonal array entry value to the weight attribute of the edge
1043 (or the number 1 if the edge has no weight attribute). If the
1044 alternate convention of doubling the edge weight is desired the
1045 resulting NumPy array can be modified as follows:
1046
1047 >>> import numpy as np
1048 >>> G = nx.Graph([(1, 1)])
1049 >>> A = nx.to_numpy_array(G)
1050 >>> A
1051 array([[1.]])
1052 >>> A[np.diag_indices_from(A)] *= 2
1053 >>> A
1054 array([[2.]])
1055
1056 Examples
1057 --------
1058 >>> G = nx.MultiDiGraph()
1059 >>> G.add_edge(0, 1, weight=2)
1060 0
1061 >>> G.add_edge(1, 0)
1062 0
1063 >>> G.add_edge(2, 2, weight=3)
1064 0
1065 >>> G.add_edge(2, 2)
1066 1
1067 >>> nx.to_numpy_array(G, nodelist=[0, 1, 2])
1068 array([[0., 2., 0.],
1069 [1., 0., 0.],
1070 [0., 0., 4.]])
1071
1072 """
1073 import numpy as np
1074
1075 if nodelist is None:
1076 nodelist = list(G)
1077 nodeset = set(nodelist)
1078 if len(nodelist) != len(nodeset):
1079 msg = "Ambiguous ordering: `nodelist` contained duplicates."
1080 raise nx.NetworkXError(msg)
1081
1082 nlen = len(nodelist)
1083 undirected = not G.is_directed()
1084 index = dict(zip(nodelist, range(nlen)))
1085
1086 # Initially, we start with an array of nans. Then we populate the array
1087 # using data from the graph. Afterwards, any leftover nans will be
1088 # converted to the value of `nonedge`. Note, we use nans initially,
1089 # instead of zero, for two reasons:
1090 #
1091 # 1) It can be important to distinguish a real edge with the value 0
1092 # from a nonedge with the value 0.
1093 #
1094 # 2) When working with multi(di)graphs, we must combine the values of all
1095 # edges between any two nodes in some manner. This often takes the
1096 # form of a sum, min, or max. Using the value 0 for a nonedge would
1097 # have undesirable effects with min and max, but using nanmin and
1098 # nanmax with initially nan values is not problematic at all.
1099 #
1100 # That said, there are still some drawbacks to this approach. Namely, if
1101 # a real edge is nan, then that value is a) not distinguishable from
1102 # nonedges and b) is ignored by the default combinator (nansum, nanmin,
1103 # nanmax) functions used for multi(di)graphs. If this becomes an issue,
1104 # an alternative approach is to use masked arrays. Initially, every
1105 # element is masked and set to some `initial` value. As we populate the
1106 # graph, elements are unmasked (automatically) when we combine the initial
1107 # value with the values given by real edges. At the end, we convert all
1108 # masked values to `nonedge`. Using masked arrays fully addresses reason 1,
1109 # but for reason 2, we would still have the issue with min and max if the
1110 # initial values were 0.0. Note: an initial value of +inf is appropriate
1111 # for min, while an initial value of -inf is appropriate for max. When
1112 # working with sum, an initial value of zero is appropriate. Ideally then,
1113 # we'd want to allow users to specify both a value for nonedges and also
1114 # an initial value. For multi(di)graphs, the choice of the initial value
1115 # will, in general, depend on the combinator function---sensible defaults
1116 # can be provided.
1117
1118 if G.is_multigraph():
1119 # Handle MultiGraphs and MultiDiGraphs
1120 A = np.full((nlen, nlen), np.nan, order=order)
1121 # use numpy nan-aware operations
1122 operator = {sum: np.nansum, min: np.nanmin, max: np.nanmax}
1123 try:
1124 op = operator[multigraph_weight]
1125 except Exception:
1126 raise ValueError('multigraph_weight must be sum, min, or max')
1127
1128 for u, v, attrs in G.edges(data=True):
1129 if (u in nodeset) and (v in nodeset):
1130 i, j = index[u], index[v]
1131 e_weight = attrs.get(weight, 1)
1132 A[i, j] = op([e_weight, A[i, j]])
1133 if undirected:
1134 A[j, i] = A[i, j]
1135 else:
1136 # Graph or DiGraph, this is much faster than above
1137 A = np.full((nlen, nlen), np.nan, order=order)
1138 for u, nbrdict in G.adjacency():
1139 for v, d in nbrdict.items():
1140 try:
1141 A[index[u], index[v]] = d.get(weight, 1)
1142 except KeyError:
1143 # This occurs when there are fewer desired nodes than
1144 # there are nodes in the graph: len(nodelist) < len(G)
1145 pass
1146
1147 A[np.isnan(A)] = nonedge
1148 A = np.asarray(A, dtype=dtype)
1149 return A
1150
1151
1152 def from_numpy_array(A, parallel_edges=False, create_using=None):
1153 """Returns a graph from NumPy array.
1154
1155 The NumPy array is interpreted as an adjacency matrix for the graph.
1156
1157 Parameters
1158 ----------
1159 A : NumPy ndarray
1160 An adjacency matrix representation of a graph
1161
1162 parallel_edges : Boolean
1163 If this is True, `create_using` is a multigraph, and `A` is an
1164 integer array, then entry *(i, j)* in the array is interpreted as the
1165 number of parallel edges joining vertices *i* and *j* in the graph.
1166 If it is False, then the entries in the array are interpreted as
1167 the weight of a single edge joining the vertices.
1168
1169 create_using : NetworkX graph constructor, optional (default=nx.Graph)
1170 Graph type to create. If graph instance, then cleared before populated.
1171
1172 Notes
1173 -----
1174 For directed graphs, explicitly mention create_using=nx.Digraph,
1175 and entry i,j of A corresponds to an edge from i to j.
1176
1177 If `create_using` is :class:`networkx.MultiGraph` or
1178 :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the
1179 entries of `A` are of type :class:`int`, then this function returns a
1180 multigraph (of the same type as `create_using`) with parallel edges.
1181
1182 If `create_using` indicates an undirected multigraph, then only the edges
1183 indicated by the upper triangle of the array `A` will be added to the
1184 graph.
1185
1186 If the NumPy array has a single data type for each array entry it
1187 will be converted to an appropriate Python data type.
1188
1189 If the NumPy array has a user-specified compound data type the names
1190 of the data fields will be used as attribute keys in the resulting
1191 NetworkX graph.
1192
1193 See Also
1194 --------
1195 to_numpy_array
1196
1197 Examples
1198 --------
1199 Simple integer weights on edges:
1200
1201 >>> import numpy as np
1202 >>> A = np.array([[1, 1], [2, 1]])
1203 >>> G = nx.from_numpy_array(A)
1204 >>> G.edges(data=True)
1205 EdgeDataView([(0, 0, {'weight': 1}), (0, 1, {'weight': 2}), \
1206 (1, 1, {'weight': 1})])
1207
1208 If `create_using` indicates a multigraph and the array has only integer
1209 entries and `parallel_edges` is False, then the entries will be treated
1210 as weights for edges joining the nodes (without creating parallel edges):
1211
1212 >>> A = np.array([[1, 1], [1, 2]])
1213 >>> G = nx.from_numpy_array(A, create_using=nx.MultiGraph)
1214 >>> G[1][1]
1215 AtlasView({0: {'weight': 2}})
1216
1217 If `create_using` indicates a multigraph and the array has only integer
1218 entries and `parallel_edges` is True, then the entries will be treated
1219 as the number of parallel edges joining those two vertices:
1220
1221 >>> A = np.array([[1, 1], [1, 2]])
1222 >>> temp = nx.MultiGraph()
1223 >>> G = nx.from_numpy_array(A, parallel_edges=True, create_using=temp)
1224 >>> G[1][1]
1225 AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
1226
1227 User defined compound data type on edges:
1228
1229 >>> dt = [('weight', float), ('cost', int)]
1230 >>> A = np.array([[(1.0, 2)]], dtype=dt)
1231 >>> G = nx.from_numpy_array(A)
1232 >>> G.edges()
1233 EdgeView([(0, 0)])
1234 >>> G[0][0]['cost']
1235 2
1236 >>> G[0][0]['weight']
1237 1.0
1238
1239 """
1240 return from_numpy_matrix(A, parallel_edges=parallel_edges,
1241 create_using=create_using)
1242
1243
1244 # fixture for pytest
1245 def setup_module(module):
1246 import pytest
1247 numpy = pytest.importorskip('numpy')
1248 scipy = pytest.importorskip('scipy')
1249 pandas = pytest.importorskip('pandas')