Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/networkx/algorithms/cluster.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 # -*- coding: utf-8 -*- | |
2 # | |
3 # Copyright (C) 2004-2019 by | |
4 # Aric Hagberg <hagberg@lanl.gov> | |
5 # Dan Schult <dschult@colgate.edu> | |
6 # Pieter Swart <swart@lanl.gov> | |
7 # All rights reserved. | |
8 # BSD license. | |
9 """Algorithms to characterize the number of triangles in a graph.""" | |
10 | |
11 from itertools import chain | |
12 from itertools import combinations | |
13 from collections import Counter | |
14 | |
15 import networkx as nx | |
16 from networkx.utils import not_implemented_for | |
17 | |
18 __author__ = """\n""".join(['Aric Hagberg <aric.hagberg@gmail.com>', | |
19 'Dan Schult (dschult@colgate.edu)', | |
20 'Pieter Swart (swart@lanl.gov)', | |
21 'Jordi Torrents <jtorrents@milnou.net>']) | |
22 | |
23 __all__ = ['triangles', 'average_clustering', 'clustering', 'transitivity', | |
24 'square_clustering', 'generalized_degree'] | |
25 | |
26 | |
27 @not_implemented_for('directed') | |
28 def triangles(G, nodes=None): | |
29 """Compute the number of triangles. | |
30 | |
31 Finds the number of triangles that include a node as one vertex. | |
32 | |
33 Parameters | |
34 ---------- | |
35 G : graph | |
36 A networkx graph | |
37 nodes : container of nodes, optional (default= all nodes in G) | |
38 Compute triangles for nodes in this container. | |
39 | |
40 Returns | |
41 ------- | |
42 out : dictionary | |
43 Number of triangles keyed by node label. | |
44 | |
45 Examples | |
46 -------- | |
47 >>> G=nx.complete_graph(5) | |
48 >>> print(nx.triangles(G,0)) | |
49 6 | |
50 >>> print(nx.triangles(G)) | |
51 {0: 6, 1: 6, 2: 6, 3: 6, 4: 6} | |
52 >>> print(list(nx.triangles(G,(0,1)).values())) | |
53 [6, 6] | |
54 | |
55 Notes | |
56 ----- | |
57 When computing triangles for the entire graph each triangle is counted | |
58 three times, once at each node. Self loops are ignored. | |
59 | |
60 """ | |
61 # If `nodes` represents a single node in the graph, return only its number | |
62 # of triangles. | |
63 if nodes in G: | |
64 return next(_triangles_and_degree_iter(G, nodes))[2] // 2 | |
65 # Otherwise, `nodes` represents an iterable of nodes, so return a | |
66 # dictionary mapping node to number of triangles. | |
67 return {v: t // 2 for v, d, t, _ in _triangles_and_degree_iter(G, nodes)} | |
68 | |
69 | |
70 @not_implemented_for('multigraph') | |
71 def _triangles_and_degree_iter(G, nodes=None): | |
72 """ Return an iterator of (node, degree, triangles, generalized degree). | |
73 | |
74 This double counts triangles so you may want to divide by 2. | |
75 See degree(), triangles() and generalized_degree() for definitions | |
76 and details. | |
77 | |
78 """ | |
79 if nodes is None: | |
80 nodes_nbrs = G.adj.items() | |
81 else: | |
82 nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes)) | |
83 | |
84 for v, v_nbrs in nodes_nbrs: | |
85 vs = set(v_nbrs) - {v} | |
86 gen_degree = Counter(len(vs & (set(G[w]) - {w})) for w in vs) | |
87 ntriangles = sum(k * val for k, val in gen_degree.items()) | |
88 yield (v, len(vs), ntriangles, gen_degree) | |
89 | |
90 | |
91 @not_implemented_for('multigraph') | |
92 def _weighted_triangles_and_degree_iter(G, nodes=None, weight='weight'): | |
93 """ Return an iterator of (node, degree, weighted_triangles). | |
94 | |
95 Used for weighted clustering. | |
96 | |
97 """ | |
98 if weight is None or G.number_of_edges() == 0: | |
99 max_weight = 1 | |
100 else: | |
101 max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True)) | |
102 if nodes is None: | |
103 nodes_nbrs = G.adj.items() | |
104 else: | |
105 nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes)) | |
106 | |
107 def wt(u, v): | |
108 return G[u][v].get(weight, 1) / max_weight | |
109 | |
110 for i, nbrs in nodes_nbrs: | |
111 inbrs = set(nbrs) - {i} | |
112 weighted_triangles = 0 | |
113 seen = set() | |
114 for j in inbrs: | |
115 seen.add(j) | |
116 # This prevents double counting. | |
117 jnbrs = set(G[j]) - seen | |
118 # Only compute the edge weight once, before the inner inner | |
119 # loop. | |
120 wij = wt(i, j) | |
121 weighted_triangles += sum((wij * wt(j, k) * wt(k, i)) ** (1 / 3) | |
122 for k in inbrs & jnbrs) | |
123 yield (i, len(inbrs), 2 * weighted_triangles) | |
124 | |
125 | |
126 @not_implemented_for('multigraph') | |
127 def _directed_triangles_and_degree_iter(G, nodes=None): | |
128 """ Return an iterator of | |
129 (node, total_degree, reciprocal_degree, directed_triangles). | |
130 | |
131 Used for directed clustering. | |
132 | |
133 """ | |
134 nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes)) | |
135 | |
136 for i, preds, succs in nodes_nbrs: | |
137 ipreds = set(preds) - {i} | |
138 isuccs = set(succs) - {i} | |
139 | |
140 directed_triangles = 0 | |
141 for j in chain(ipreds, isuccs): | |
142 jpreds = set(G._pred[j]) - {j} | |
143 jsuccs = set(G._succ[j]) - {j} | |
144 directed_triangles += sum((1 for k in | |
145 chain((ipreds & jpreds), | |
146 (ipreds & jsuccs), | |
147 (isuccs & jpreds), | |
148 (isuccs & jsuccs)))) | |
149 dtotal = len(ipreds) + len(isuccs) | |
150 dbidirectional = len(ipreds & isuccs) | |
151 yield (i, dtotal, dbidirectional, directed_triangles) | |
152 | |
153 | |
154 @not_implemented_for('multigraph') | |
155 def _directed_weighted_triangles_and_degree_iter(G, nodes=None, weight = 'weight'): | |
156 """ Return an iterator of | |
157 (node, total_degree, reciprocal_degree, directed_weighted_triangles). | |
158 | |
159 Used for directed weighted clustering. | |
160 | |
161 """ | |
162 if weight is None or G.number_of_edges() == 0: | |
163 max_weight = 1 | |
164 else: | |
165 max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True)) | |
166 | |
167 nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes)) | |
168 | |
169 def wt(u, v): | |
170 return G[u][v].get(weight, 1) / max_weight | |
171 | |
172 for i, preds, succs in nodes_nbrs: | |
173 ipreds = set(preds) - {i} | |
174 isuccs = set(succs) - {i} | |
175 | |
176 directed_triangles = 0 | |
177 for j in ipreds: | |
178 jpreds = set(G._pred[j]) - {j} | |
179 jsuccs = set(G._succ[j]) - {j} | |
180 directed_triangles += sum((wt(j, i) * wt(k, i) * wt(k, j))**(1 / 3) | |
181 for k in ipreds & jpreds) | |
182 directed_triangles += sum((wt(j, i) * wt(k, i) * wt(j, k))**(1 / 3) | |
183 for k in ipreds & jsuccs) | |
184 directed_triangles += sum((wt(j, i) * wt(i, k) * wt(k, j))**(1 / 3) | |
185 for k in isuccs & jpreds) | |
186 directed_triangles += sum((wt(j, i) * wt(i, k) * wt(j, k))**(1 / 3) | |
187 for k in isuccs & jsuccs) | |
188 | |
189 for j in isuccs: | |
190 jpreds = set(G._pred[j]) - {j} | |
191 jsuccs = set(G._succ[j]) - {j} | |
192 directed_triangles += sum((wt(i, j) * wt(k, i) * wt(k, j))**(1 / 3) | |
193 for k in ipreds & jpreds) | |
194 directed_triangles += sum((wt(i, j) * wt(k, i) * wt(j, k))**(1 / 3) | |
195 for k in ipreds & jsuccs) | |
196 directed_triangles += sum((wt(i, j) * wt(i, k) * wt(k, j))**(1 / 3) | |
197 for k in isuccs & jpreds) | |
198 directed_triangles += sum((wt(i, j) * wt(i, k) * wt(j, k))**(1 / 3) | |
199 for k in isuccs & jsuccs) | |
200 | |
201 dtotal = len(ipreds) + len(isuccs) | |
202 dbidirectional = len(ipreds & isuccs) | |
203 yield (i, dtotal, dbidirectional, directed_triangles) | |
204 | |
205 | |
206 def average_clustering(G, nodes=None, weight=None, count_zeros=True): | |
207 r"""Compute the average clustering coefficient for the graph G. | |
208 | |
209 The clustering coefficient for the graph is the average, | |
210 | |
211 .. math:: | |
212 | |
213 C = \frac{1}{n}\sum_{v \in G} c_v, | |
214 | |
215 where :math:`n` is the number of nodes in `G`. | |
216 | |
217 Parameters | |
218 ---------- | |
219 G : graph | |
220 | |
221 nodes : container of nodes, optional (default=all nodes in G) | |
222 Compute average clustering for nodes in this container. | |
223 | |
224 weight : string or None, optional (default=None) | |
225 The edge attribute that holds the numerical value used as a weight. | |
226 If None, then each edge has weight 1. | |
227 | |
228 count_zeros : bool | |
229 If False include only the nodes with nonzero clustering in the average. | |
230 | |
231 Returns | |
232 ------- | |
233 avg : float | |
234 Average clustering | |
235 | |
236 Examples | |
237 -------- | |
238 >>> G=nx.complete_graph(5) | |
239 >>> print(nx.average_clustering(G)) | |
240 1.0 | |
241 | |
242 Notes | |
243 ----- | |
244 This is a space saving routine; it might be faster | |
245 to use the clustering function to get a list and then take the average. | |
246 | |
247 Self loops are ignored. | |
248 | |
249 References | |
250 ---------- | |
251 .. [1] Generalizations of the clustering coefficient to weighted | |
252 complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela, | |
253 K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007). | |
254 http://jponnela.com/web_documents/a9.pdf | |
255 .. [2] Marcus Kaiser, Mean clustering coefficients: the role of isolated | |
256 nodes and leafs on clustering measures for small-world networks. | |
257 https://arxiv.org/abs/0802.2512 | |
258 """ | |
259 c = clustering(G, nodes, weight=weight).values() | |
260 if not count_zeros: | |
261 c = [v for v in c if v > 0] | |
262 return sum(c) / len(c) | |
263 | |
264 | |
265 def clustering(G, nodes=None, weight=None): | |
266 r"""Compute the clustering coefficient for nodes. | |
267 | |
268 For unweighted graphs, the clustering of a node :math:`u` | |
269 is the fraction of possible triangles through that node that exist, | |
270 | |
271 .. math:: | |
272 | |
273 c_u = \frac{2 T(u)}{deg(u)(deg(u)-1)}, | |
274 | |
275 where :math:`T(u)` is the number of triangles through node :math:`u` and | |
276 :math:`deg(u)` is the degree of :math:`u`. | |
277 | |
278 For weighted graphs, there are several ways to define clustering [1]_. | |
279 the one used here is defined | |
280 as the geometric average of the subgraph edge weights [2]_, | |
281 | |
282 .. math:: | |
283 | |
284 c_u = \frac{1}{deg(u)(deg(u)-1))} | |
285 \sum_{vw} (\hat{w}_{uv} \hat{w}_{uw} \hat{w}_{vw})^{1/3}. | |
286 | |
287 The edge weights :math:`\hat{w}_{uv}` are normalized by the maximum weight | |
288 in the network :math:`\hat{w}_{uv} = w_{uv}/\max(w)`. | |
289 | |
290 The value of :math:`c_u` is assigned to 0 if :math:`deg(u) < 2`. | |
291 | |
292 For directed graphs, the clustering is similarly defined as the fraction | |
293 of all possible directed triangles or geometric average of the subgraph | |
294 edge weights for unweighted and weighted directed graph respectively [3]_. | |
295 | |
296 .. math:: | |
297 | |
298 c_u = \frac{1}{deg^{tot}(u)(deg^{tot}(u)-1) - 2deg^{\leftrightarrow}(u)} | |
299 T(u), | |
300 | |
301 where :math:`T(u)` is the number of directed triangles through node | |
302 :math:`u`, :math:`deg^{tot}(u)` is the sum of in degree and out degree of | |
303 :math:`u` and :math:`deg^{\leftrightarrow}(u)` is the reciprocal degree of | |
304 :math:`u`. | |
305 | |
306 Parameters | |
307 ---------- | |
308 G : graph | |
309 | |
310 nodes : container of nodes, optional (default=all nodes in G) | |
311 Compute clustering for nodes in this container. | |
312 | |
313 weight : string or None, optional (default=None) | |
314 The edge attribute that holds the numerical value used as a weight. | |
315 If None, then each edge has weight 1. | |
316 | |
317 Returns | |
318 ------- | |
319 out : float, or dictionary | |
320 Clustering coefficient at specified nodes | |
321 | |
322 Examples | |
323 -------- | |
324 >>> G=nx.complete_graph(5) | |
325 >>> print(nx.clustering(G,0)) | |
326 1.0 | |
327 >>> print(nx.clustering(G)) | |
328 {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0} | |
329 | |
330 Notes | |
331 ----- | |
332 Self loops are ignored. | |
333 | |
334 References | |
335 ---------- | |
336 .. [1] Generalizations of the clustering coefficient to weighted | |
337 complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela, | |
338 K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007). | |
339 http://jponnela.com/web_documents/a9.pdf | |
340 .. [2] Intensity and coherence of motifs in weighted complex | |
341 networks by J. P. Onnela, J. Saramäki, J. Kertész, and K. Kaski, | |
342 Physical Review E, 71(6), 065103 (2005). | |
343 .. [3] Clustering in complex directed networks by G. Fagiolo, | |
344 Physical Review E, 76(2), 026107 (2007). | |
345 """ | |
346 if G.is_directed(): | |
347 if weight is not None: | |
348 td_iter = _directed_weighted_triangles_and_degree_iter( | |
349 G, nodes, weight) | |
350 clusterc = {v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2) | |
351 for v, dt, db, t in td_iter} | |
352 else: | |
353 td_iter = _directed_triangles_and_degree_iter(G, nodes) | |
354 clusterc = {v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2) | |
355 for v, dt, db, t in td_iter} | |
356 else: | |
357 if weight is not None: | |
358 td_iter = _weighted_triangles_and_degree_iter(G, nodes, weight) | |
359 clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for | |
360 v, d, t in td_iter} | |
361 else: | |
362 td_iter = _triangles_and_degree_iter(G, nodes) | |
363 clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for | |
364 v, d, t, _ in td_iter} | |
365 if nodes in G: | |
366 # Return the value of the sole entry in the dictionary. | |
367 return clusterc[nodes] | |
368 return clusterc | |
369 | |
370 | |
371 def transitivity(G): | |
372 r"""Compute graph transitivity, the fraction of all possible triangles | |
373 present in G. | |
374 | |
375 Possible triangles are identified by the number of "triads" | |
376 (two edges with a shared vertex). | |
377 | |
378 The transitivity is | |
379 | |
380 .. math:: | |
381 | |
382 T = 3\frac{\#triangles}{\#triads}. | |
383 | |
384 Parameters | |
385 ---------- | |
386 G : graph | |
387 | |
388 Returns | |
389 ------- | |
390 out : float | |
391 Transitivity | |
392 | |
393 Examples | |
394 -------- | |
395 >>> G = nx.complete_graph(5) | |
396 >>> print(nx.transitivity(G)) | |
397 1.0 | |
398 """ | |
399 triangles = sum(t for v, d, t, _ in _triangles_and_degree_iter(G)) | |
400 contri = sum(d * (d - 1) for v, d, t, _ in _triangles_and_degree_iter(G)) | |
401 return 0 if triangles == 0 else triangles / contri | |
402 | |
403 | |
404 def square_clustering(G, nodes=None): | |
405 r""" Compute the squares clustering coefficient for nodes. | |
406 | |
407 For each node return the fraction of possible squares that exist at | |
408 the node [1]_ | |
409 | |
410 .. math:: | |
411 C_4(v) = \frac{ \sum_{u=1}^{k_v} | |
412 \sum_{w=u+1}^{k_v} q_v(u,w) }{ \sum_{u=1}^{k_v} | |
413 \sum_{w=u+1}^{k_v} [a_v(u,w) + q_v(u,w)]}, | |
414 | |
415 where :math:`q_v(u,w)` are the number of common neighbors of :math:`u` and | |
416 :math:`w` other than :math:`v` (ie squares), and :math:`a_v(u,w) = (k_u - | |
417 (1+q_v(u,w)+\theta_{uv}))(k_w - (1+q_v(u,w)+\theta_{uw}))`, where | |
418 :math:`\theta_{uw} = 1` if :math:`u` and :math:`w` are connected and 0 | |
419 otherwise. | |
420 | |
421 Parameters | |
422 ---------- | |
423 G : graph | |
424 | |
425 nodes : container of nodes, optional (default=all nodes in G) | |
426 Compute clustering for nodes in this container. | |
427 | |
428 Returns | |
429 ------- | |
430 c4 : dictionary | |
431 A dictionary keyed by node with the square clustering coefficient value. | |
432 | |
433 Examples | |
434 -------- | |
435 >>> G=nx.complete_graph(5) | |
436 >>> print(nx.square_clustering(G,0)) | |
437 1.0 | |
438 >>> print(nx.square_clustering(G)) | |
439 {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0} | |
440 | |
441 Notes | |
442 ----- | |
443 While :math:`C_3(v)` (triangle clustering) gives the probability that | |
444 two neighbors of node v are connected with each other, :math:`C_4(v)` is | |
445 the probability that two neighbors of node v share a common | |
446 neighbor different from v. This algorithm can be applied to both | |
447 bipartite and unipartite networks. | |
448 | |
449 References | |
450 ---------- | |
451 .. [1] Pedro G. Lind, Marta C. González, and Hans J. Herrmann. 2005 | |
452 Cycles and clustering in bipartite networks. | |
453 Physical Review E (72) 056127. | |
454 """ | |
455 if nodes is None: | |
456 node_iter = G | |
457 else: | |
458 node_iter = G.nbunch_iter(nodes) | |
459 clustering = {} | |
460 for v in node_iter: | |
461 clustering[v] = 0 | |
462 potential = 0 | |
463 for u, w in combinations(G[v], 2): | |
464 squares = len((set(G[u]) & set(G[w])) - set([v])) | |
465 clustering[v] += squares | |
466 degm = squares + 1 | |
467 if w in G[u]: | |
468 degm += 1 | |
469 potential += (len(G[u]) - degm) * (len(G[w]) - degm) + squares | |
470 if potential > 0: | |
471 clustering[v] /= potential | |
472 if nodes in G: | |
473 # Return the value of the sole entry in the dictionary. | |
474 return clustering[nodes] | |
475 return clustering | |
476 | |
477 | |
478 @not_implemented_for('directed') | |
479 def generalized_degree(G, nodes=None): | |
480 r""" Compute the generalized degree for nodes. | |
481 | |
482 For each node, the generalized degree shows how many edges of given | |
483 triangle multiplicity the node is connected to. The triangle multiplicity | |
484 of an edge is the number of triangles an edge participates in. The | |
485 generalized degree of node :math:`i` can be written as a vector | |
486 :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc, k_i^{(N-2)})` where | |
487 :math:`k_i^{(j)}` is the number of edges attached to node :math:`i` that | |
488 participate in :math:`j` triangles. | |
489 | |
490 Parameters | |
491 ---------- | |
492 G : graph | |
493 | |
494 nodes : container of nodes, optional (default=all nodes in G) | |
495 Compute the generalized degree for nodes in this container. | |
496 | |
497 Returns | |
498 ------- | |
499 out : Counter, or dictionary of Counters | |
500 Generalized degree of specified nodes. The Counter is keyed by edge | |
501 triangle multiplicity. | |
502 | |
503 Examples | |
504 -------- | |
505 >>> G=nx.complete_graph(5) | |
506 >>> print(nx.generalized_degree(G,0)) | |
507 Counter({3: 4}) | |
508 >>> print(nx.generalized_degree(G)) | |
509 {0: Counter({3: 4}), 1: Counter({3: 4}), 2: Counter({3: 4}), 3: Counter({3: 4}), 4: Counter({3: 4})} | |
510 | |
511 To recover the number of triangles attached to a node: | |
512 | |
513 >>> k1 = nx.generalized_degree(G,0) | |
514 >>> sum([k*v for k,v in k1.items()])/2 == nx.triangles(G,0) | |
515 True | |
516 | |
517 Notes | |
518 ----- | |
519 In a network of N nodes, the highest triangle multiplicty an edge can have | |
520 is N-2. | |
521 | |
522 The return value does not include a `zero` entry if no edges of a | |
523 particular triangle multiplicity are present. | |
524 | |
525 The number of triangles node :math:`i` is attached to can be recovered from | |
526 the generalized degree :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc, | |
527 k_i^{(N-2)})` by :math:`(k_i^{(1)}+2k_i^{(2)}+\dotsc +(N-2)k_i^{(N-2)})/2`. | |
528 | |
529 References | |
530 ---------- | |
531 .. [1] Networks with arbitrary edge multiplicities by V. Zlatić, | |
532 D. Garlaschelli and G. Caldarelli, EPL (Europhysics Letters), | |
533 Volume 97, Number 2 (2012). | |
534 https://iopscience.iop.org/article/10.1209/0295-5075/97/28005 | |
535 """ | |
536 if nodes in G: | |
537 return next(_triangles_and_degree_iter(G, nodes))[3] | |
538 return {v: gd for v, d, t, gd in _triangles_and_degree_iter(G, nodes)} |