comparison planemo/lib/python3.7/site-packages/networkx/linalg/attrmatrix.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 """
2 Functions for constructing matrix-like objects from graph attributes.
3 """
4
5 __all__ = ['attr_matrix', 'attr_sparse_matrix']
6
7 import networkx as nx
8
9
10 def _node_value(G, node_attr):
11 """Returns a function that returns a value from G.nodes[u].
12
13 We return a function expecting a node as its sole argument. Then, in the
14 simplest scenario, the returned function will return G.nodes[u][node_attr].
15 However, we also handle the case when `node_attr` is None or when it is a
16 function itself.
17
18 Parameters
19 ----------
20 G : graph
21 A NetworkX graph
22
23 node_attr : {None, str, callable}
24 Specification of how the value of the node attribute should be obtained
25 from the node attribute dictionary.
26
27 Returns
28 -------
29 value : function
30 A function expecting a node as its sole argument. The function will
31 returns a value from G.nodes[u] that depends on `edge_attr`.
32
33 """
34 if node_attr is None:
35 def value(u): return u
36 elif not hasattr(node_attr, '__call__'):
37 # assume it is a key for the node attribute dictionary
38 def value(u): return G.nodes[u][node_attr]
39 else:
40 # Advanced: Allow users to specify something else.
41 #
42 # For example,
43 # node_attr = lambda u: G.nodes[u].get('size', .5) * 3
44 #
45 value = node_attr
46
47 return value
48
49
50 def _edge_value(G, edge_attr):
51 """Returns a function that returns a value from G[u][v].
52
53 Suppose there exists an edge between u and v. Then we return a function
54 expecting u and v as arguments. For Graph and DiGraph, G[u][v] is
55 the edge attribute dictionary, and the function (essentially) returns
56 G[u][v][edge_attr]. However, we also handle cases when `edge_attr` is None
57 and when it is a function itself. For MultiGraph and MultiDiGraph, G[u][v]
58 is a dictionary of all edges between u and v. In this case, the returned
59 function sums the value of `edge_attr` for every edge between u and v.
60
61 Parameters
62 ----------
63 G : graph
64 A NetworkX graph
65
66 edge_attr : {None, str, callable}
67 Specification of how the value of the edge attribute should be obtained
68 from the edge attribute dictionary, G[u][v]. For multigraphs, G[u][v]
69 is a dictionary of all the edges between u and v. This allows for
70 special treatment of multiedges.
71
72 Returns
73 -------
74 value : function
75 A function expecting two nodes as parameters. The nodes should
76 represent the from- and to- node of an edge. The function will
77 return a value from G[u][v] that depends on `edge_attr`.
78
79 """
80
81 if edge_attr is None:
82 # topological count of edges
83
84 if G.is_multigraph():
85 def value(u, v): return len(G[u][v])
86 else:
87 def value(u, v): return 1
88
89 elif not hasattr(edge_attr, '__call__'):
90 # assume it is a key for the edge attribute dictionary
91
92 if edge_attr == 'weight':
93 # provide a default value
94 if G.is_multigraph():
95 def value(u, v): return sum([d.get(edge_attr, 1) for d in G[u][v].values()])
96 else:
97 def value(u, v): return G[u][v].get(edge_attr, 1)
98 else:
99 # otherwise, the edge attribute MUST exist for each edge
100 if G.is_multigraph():
101 def value(u, v): return sum([d[edge_attr] for d in G[u][v].values()])
102 else:
103 def value(u, v): return G[u][v][edge_attr]
104
105 else:
106 # Advanced: Allow users to specify something else.
107 #
108 # Alternative default value:
109 # edge_attr = lambda u,v: G[u][v].get('thickness', .5)
110 #
111 # Function on an attribute:
112 # edge_attr = lambda u,v: abs(G[u][v]['weight'])
113 #
114 # Handle Multi(Di)Graphs differently:
115 # edge_attr = lambda u,v: numpy.prod([d['size'] for d in G[u][v].values()])
116 #
117 # Ignore multiple edges
118 # edge_attr = lambda u,v: 1 if len(G[u][v]) else 0
119 #
120 value = edge_attr
121
122 return value
123
124
125 def attr_matrix(G, edge_attr=None, node_attr=None, normalized=False,
126 rc_order=None, dtype=None, order=None):
127 """Returns a NumPy matrix using attributes from G.
128
129 If only `G` is passed in, then the adjacency matrix is constructed.
130
131 Let A be a discrete set of values for the node attribute `node_attr`. Then
132 the elements of A represent the rows and columns of the constructed matrix.
133 Now, iterate through every edge e=(u,v) in `G` and consider the value
134 of the edge attribute `edge_attr`. If ua and va are the values of the
135 node attribute `node_attr` for u and v, respectively, then the value of
136 the edge attribute is added to the matrix element at (ua, va).
137
138 Parameters
139 ----------
140 G : graph
141 The NetworkX graph used to construct the NumPy matrix.
142
143 edge_attr : str, optional
144 Each element of the matrix represents a running total of the
145 specified edge attribute for edges whose node attributes correspond
146 to the rows/cols of the matirx. The attribute must be present for
147 all edges in the graph. If no attribute is specified, then we
148 just count the number of edges whose node attributes correspond
149 to the matrix element.
150
151 node_attr : str, optional
152 Each row and column in the matrix represents a particular value
153 of the node attribute. The attribute must be present for all nodes
154 in the graph. Note, the values of this attribute should be reliably
155 hashable. So, float values are not recommended. If no attribute is
156 specified, then the rows and columns will be the nodes of the graph.
157
158 normalized : bool, optional
159 If True, then each row is normalized by the summation of its values.
160
161 rc_order : list, optional
162 A list of the node attribute values. This list specifies the ordering
163 of rows and columns of the array. If no ordering is provided, then
164 the ordering will be random (and also, a return value).
165
166 Other Parameters
167 ----------------
168 dtype : NumPy data-type, optional
169 A valid NumPy dtype used to initialize the array. Keep in mind certain
170 dtypes can yield unexpected results if the array is to be normalized.
171 The parameter is passed to numpy.zeros(). If unspecified, the NumPy
172 default is used.
173
174 order : {'C', 'F'}, optional
175 Whether to store multidimensional data in C- or Fortran-contiguous
176 (row- or column-wise) order in memory. This parameter is passed to
177 numpy.zeros(). If unspecified, the NumPy default is used.
178
179 Returns
180 -------
181 M : NumPy matrix
182 The attribute matrix.
183
184 ordering : list
185 If `rc_order` was specified, then only the matrix is returned.
186 However, if `rc_order` was None, then the ordering used to construct
187 the matrix is returned as well.
188
189 Examples
190 --------
191 Construct an adjacency matrix:
192
193 >>> G = nx.Graph()
194 >>> G.add_edge(0, 1, thickness=1, weight=3)
195 >>> G.add_edge(0, 2, thickness=2)
196 >>> G.add_edge(1, 2, thickness=3)
197 >>> nx.attr_matrix(G, rc_order=[0, 1, 2])
198 matrix([[0., 1., 1.],
199 [1., 0., 1.],
200 [1., 1., 0.]])
201
202 Alternatively, we can obtain the matrix describing edge thickness.
203
204 >>> nx.attr_matrix(G, edge_attr='thickness', rc_order=[0, 1, 2])
205 matrix([[0., 1., 2.],
206 [1., 0., 3.],
207 [2., 3., 0.]])
208
209 We can also color the nodes and ask for the probability distribution over
210 all edges (u,v) describing:
211
212 Pr(v has color Y | u has color X)
213
214 >>> G.nodes[0]['color'] = 'red'
215 >>> G.nodes[1]['color'] = 'red'
216 >>> G.nodes[2]['color'] = 'blue'
217 >>> rc = ['red', 'blue']
218 >>> nx.attr_matrix(G, node_attr='color', normalized=True, rc_order=rc)
219 matrix([[0.33333333, 0.66666667],
220 [1. , 0. ]])
221
222 For example, the above tells us that for all edges (u,v):
223
224 Pr( v is red | u is red) = 1/3
225 Pr( v is blue | u is red) = 2/3
226
227 Pr( v is red | u is blue) = 1
228 Pr( v is blue | u is blue) = 0
229
230 Finally, we can obtain the total weights listed by the node colors.
231
232 >>> nx.attr_matrix(G, edge_attr='weight', node_attr='color', rc_order=rc)
233 matrix([[3., 2.],
234 [2., 0.]])
235
236 Thus, the total weight over all edges (u,v) with u and v having colors:
237
238 (red, red) is 3 # the sole contribution is from edge (0,1)
239 (red, blue) is 2 # contributions from edges (0,2) and (1,2)
240 (blue, red) is 2 # same as (red, blue) since graph is undirected
241 (blue, blue) is 0 # there are no edges with blue endpoints
242
243 """
244 try:
245 import numpy as np
246 except ImportError:
247 raise ImportError(
248 "attr_matrix() requires numpy: http://scipy.org/ ")
249
250 edge_value = _edge_value(G, edge_attr)
251 node_value = _node_value(G, node_attr)
252
253 if rc_order is None:
254 ordering = list(set([node_value(n) for n in G]))
255 else:
256 ordering = rc_order
257
258 N = len(ordering)
259 undirected = not G.is_directed()
260 index = dict(zip(ordering, range(N)))
261 M = np.zeros((N, N), dtype=dtype, order=order)
262
263 seen = set([])
264 for u, nbrdict in G.adjacency():
265 for v in nbrdict:
266 # Obtain the node attribute values.
267 i, j = index[node_value(u)], index[node_value(v)]
268 if v not in seen:
269 M[i, j] += edge_value(u, v)
270 if undirected:
271 M[j, i] = M[i, j]
272
273 if undirected:
274 seen.add(u)
275
276 if normalized:
277 M /= M.sum(axis=1).reshape((N, 1))
278
279 M = np.asmatrix(M)
280
281 if rc_order is None:
282 return M, ordering
283 else:
284 return M
285
286
287 def attr_sparse_matrix(G, edge_attr=None, node_attr=None,
288 normalized=False, rc_order=None, dtype=None):
289 """Returns a SciPy sparse matrix using attributes from G.
290
291 If only `G` is passed in, then the adjacency matrix is constructed.
292
293 Let A be a discrete set of values for the node attribute `node_attr`. Then
294 the elements of A represent the rows and columns of the constructed matrix.
295 Now, iterate through every edge e=(u,v) in `G` and consider the value
296 of the edge attribute `edge_attr`. If ua and va are the values of the
297 node attribute `node_attr` for u and v, respectively, then the value of
298 the edge attribute is added to the matrix element at (ua, va).
299
300 Parameters
301 ----------
302 G : graph
303 The NetworkX graph used to construct the NumPy matrix.
304
305 edge_attr : str, optional
306 Each element of the matrix represents a running total of the
307 specified edge attribute for edges whose node attributes correspond
308 to the rows/cols of the matirx. The attribute must be present for
309 all edges in the graph. If no attribute is specified, then we
310 just count the number of edges whose node attributes correspond
311 to the matrix element.
312
313 node_attr : str, optional
314 Each row and column in the matrix represents a particular value
315 of the node attribute. The attribute must be present for all nodes
316 in the graph. Note, the values of this attribute should be reliably
317 hashable. So, float values are not recommended. If no attribute is
318 specified, then the rows and columns will be the nodes of the graph.
319
320 normalized : bool, optional
321 If True, then each row is normalized by the summation of its values.
322
323 rc_order : list, optional
324 A list of the node attribute values. This list specifies the ordering
325 of rows and columns of the array. If no ordering is provided, then
326 the ordering will be random (and also, a return value).
327
328 Other Parameters
329 ----------------
330 dtype : NumPy data-type, optional
331 A valid NumPy dtype used to initialize the array. Keep in mind certain
332 dtypes can yield unexpected results if the array is to be normalized.
333 The parameter is passed to numpy.zeros(). If unspecified, the NumPy
334 default is used.
335
336 Returns
337 -------
338 M : SciPy sparse matrix
339 The attribute matrix.
340
341 ordering : list
342 If `rc_order` was specified, then only the matrix is returned.
343 However, if `rc_order` was None, then the ordering used to construct
344 the matrix is returned as well.
345
346 Examples
347 --------
348 Construct an adjacency matrix:
349
350 >>> G = nx.Graph()
351 >>> G.add_edge(0,1,thickness=1,weight=3)
352 >>> G.add_edge(0,2,thickness=2)
353 >>> G.add_edge(1,2,thickness=3)
354 >>> M = nx.attr_sparse_matrix(G, rc_order=[0,1,2])
355 >>> M.todense()
356 matrix([[0., 1., 1.],
357 [1., 0., 1.],
358 [1., 1., 0.]])
359
360 Alternatively, we can obtain the matrix describing edge thickness.
361
362 >>> M = nx.attr_sparse_matrix(G, edge_attr='thickness', rc_order=[0,1,2])
363 >>> M.todense()
364 matrix([[0., 1., 2.],
365 [1., 0., 3.],
366 [2., 3., 0.]])
367
368 We can also color the nodes and ask for the probability distribution over
369 all edges (u,v) describing:
370
371 Pr(v has color Y | u has color X)
372
373 >>> G.nodes[0]['color'] = 'red'
374 >>> G.nodes[1]['color'] = 'red'
375 >>> G.nodes[2]['color'] = 'blue'
376 >>> rc = ['red', 'blue']
377 >>> M = nx.attr_sparse_matrix(G, node_attr='color', \
378 normalized=True, rc_order=rc)
379 >>> M.todense()
380 matrix([[0.33333333, 0.66666667],
381 [1. , 0. ]])
382
383 For example, the above tells us that for all edges (u,v):
384
385 Pr( v is red | u is red) = 1/3
386 Pr( v is blue | u is red) = 2/3
387
388 Pr( v is red | u is blue) = 1
389 Pr( v is blue | u is blue) = 0
390
391 Finally, we can obtain the total weights listed by the node colors.
392
393 >>> M = nx.attr_sparse_matrix(G, edge_attr='weight',\
394 node_attr='color', rc_order=rc)
395 >>> M.todense()
396 matrix([[3., 2.],
397 [2., 0.]])
398
399 Thus, the total weight over all edges (u,v) with u and v having colors:
400
401 (red, red) is 3 # the sole contribution is from edge (0,1)
402 (red, blue) is 2 # contributions from edges (0,2) and (1,2)
403 (blue, red) is 2 # same as (red, blue) since graph is undirected
404 (blue, blue) is 0 # there are no edges with blue endpoints
405
406 """
407 try:
408 import numpy as np
409 from scipy import sparse
410 except ImportError:
411 raise ImportError(
412 "attr_sparse_matrix() requires scipy: http://scipy.org/ ")
413
414 edge_value = _edge_value(G, edge_attr)
415 node_value = _node_value(G, node_attr)
416
417 if rc_order is None:
418 ordering = list(set([node_value(n) for n in G]))
419 else:
420 ordering = rc_order
421
422 N = len(ordering)
423 undirected = not G.is_directed()
424 index = dict(zip(ordering, range(N)))
425 M = sparse.lil_matrix((N, N), dtype=dtype)
426
427 seen = set([])
428 for u, nbrdict in G.adjacency():
429 for v in nbrdict:
430 # Obtain the node attribute values.
431 i, j = index[node_value(u)], index[node_value(v)]
432 if v not in seen:
433 M[i, j] += edge_value(u, v)
434 if undirected:
435 M[j, i] = M[i, j]
436
437 if undirected:
438 seen.add(u)
439
440 if normalized:
441 norms = np.asarray(M.sum(axis=1)).ravel()
442 for i, norm in enumerate(norms):
443 M[i, :] /= norm
444
445 if rc_order is None:
446 return M, ordering
447 else:
448 return M
449
450
451 # fixture for pytest
452 def setup_module(module):
453 import pytest
454 numpy = pytest.importorskip('numpy')
455 scipy = pytest.importorskip('scipy')