comparison planemo/lib/python3.7/site-packages/networkx/algorithms/cluster.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright (C) 2004-2019 by
4 # Aric Hagberg <hagberg@lanl.gov>
5 # Dan Schult <dschult@colgate.edu>
6 # Pieter Swart <swart@lanl.gov>
7 # All rights reserved.
8 # BSD license.
9 """Algorithms to characterize the number of triangles in a graph."""
10
11 from itertools import chain
12 from itertools import combinations
13 from collections import Counter
14
15 import networkx as nx
16 from networkx.utils import not_implemented_for
17
18 __author__ = """\n""".join(['Aric Hagberg <aric.hagberg@gmail.com>',
19 'Dan Schult (dschult@colgate.edu)',
20 'Pieter Swart (swart@lanl.gov)',
21 'Jordi Torrents <jtorrents@milnou.net>'])
22
23 __all__ = ['triangles', 'average_clustering', 'clustering', 'transitivity',
24 'square_clustering', 'generalized_degree']
25
26
27 @not_implemented_for('directed')
28 def triangles(G, nodes=None):
29 """Compute the number of triangles.
30
31 Finds the number of triangles that include a node as one vertex.
32
33 Parameters
34 ----------
35 G : graph
36 A networkx graph
37 nodes : container of nodes, optional (default= all nodes in G)
38 Compute triangles for nodes in this container.
39
40 Returns
41 -------
42 out : dictionary
43 Number of triangles keyed by node label.
44
45 Examples
46 --------
47 >>> G=nx.complete_graph(5)
48 >>> print(nx.triangles(G,0))
49 6
50 >>> print(nx.triangles(G))
51 {0: 6, 1: 6, 2: 6, 3: 6, 4: 6}
52 >>> print(list(nx.triangles(G,(0,1)).values()))
53 [6, 6]
54
55 Notes
56 -----
57 When computing triangles for the entire graph each triangle is counted
58 three times, once at each node. Self loops are ignored.
59
60 """
61 # If `nodes` represents a single node in the graph, return only its number
62 # of triangles.
63 if nodes in G:
64 return next(_triangles_and_degree_iter(G, nodes))[2] // 2
65 # Otherwise, `nodes` represents an iterable of nodes, so return a
66 # dictionary mapping node to number of triangles.
67 return {v: t // 2 for v, d, t, _ in _triangles_and_degree_iter(G, nodes)}
68
69
70 @not_implemented_for('multigraph')
71 def _triangles_and_degree_iter(G, nodes=None):
72 """ Return an iterator of (node, degree, triangles, generalized degree).
73
74 This double counts triangles so you may want to divide by 2.
75 See degree(), triangles() and generalized_degree() for definitions
76 and details.
77
78 """
79 if nodes is None:
80 nodes_nbrs = G.adj.items()
81 else:
82 nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes))
83
84 for v, v_nbrs in nodes_nbrs:
85 vs = set(v_nbrs) - {v}
86 gen_degree = Counter(len(vs & (set(G[w]) - {w})) for w in vs)
87 ntriangles = sum(k * val for k, val in gen_degree.items())
88 yield (v, len(vs), ntriangles, gen_degree)
89
90
91 @not_implemented_for('multigraph')
92 def _weighted_triangles_and_degree_iter(G, nodes=None, weight='weight'):
93 """ Return an iterator of (node, degree, weighted_triangles).
94
95 Used for weighted clustering.
96
97 """
98 if weight is None or G.number_of_edges() == 0:
99 max_weight = 1
100 else:
101 max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True))
102 if nodes is None:
103 nodes_nbrs = G.adj.items()
104 else:
105 nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes))
106
107 def wt(u, v):
108 return G[u][v].get(weight, 1) / max_weight
109
110 for i, nbrs in nodes_nbrs:
111 inbrs = set(nbrs) - {i}
112 weighted_triangles = 0
113 seen = set()
114 for j in inbrs:
115 seen.add(j)
116 # This prevents double counting.
117 jnbrs = set(G[j]) - seen
118 # Only compute the edge weight once, before the inner inner
119 # loop.
120 wij = wt(i, j)
121 weighted_triangles += sum((wij * wt(j, k) * wt(k, i)) ** (1 / 3)
122 for k in inbrs & jnbrs)
123 yield (i, len(inbrs), 2 * weighted_triangles)
124
125
126 @not_implemented_for('multigraph')
127 def _directed_triangles_and_degree_iter(G, nodes=None):
128 """ Return an iterator of
129 (node, total_degree, reciprocal_degree, directed_triangles).
130
131 Used for directed clustering.
132
133 """
134 nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes))
135
136 for i, preds, succs in nodes_nbrs:
137 ipreds = set(preds) - {i}
138 isuccs = set(succs) - {i}
139
140 directed_triangles = 0
141 for j in chain(ipreds, isuccs):
142 jpreds = set(G._pred[j]) - {j}
143 jsuccs = set(G._succ[j]) - {j}
144 directed_triangles += sum((1 for k in
145 chain((ipreds & jpreds),
146 (ipreds & jsuccs),
147 (isuccs & jpreds),
148 (isuccs & jsuccs))))
149 dtotal = len(ipreds) + len(isuccs)
150 dbidirectional = len(ipreds & isuccs)
151 yield (i, dtotal, dbidirectional, directed_triangles)
152
153
154 @not_implemented_for('multigraph')
155 def _directed_weighted_triangles_and_degree_iter(G, nodes=None, weight = 'weight'):
156 """ Return an iterator of
157 (node, total_degree, reciprocal_degree, directed_weighted_triangles).
158
159 Used for directed weighted clustering.
160
161 """
162 if weight is None or G.number_of_edges() == 0:
163 max_weight = 1
164 else:
165 max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True))
166
167 nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes))
168
169 def wt(u, v):
170 return G[u][v].get(weight, 1) / max_weight
171
172 for i, preds, succs in nodes_nbrs:
173 ipreds = set(preds) - {i}
174 isuccs = set(succs) - {i}
175
176 directed_triangles = 0
177 for j in ipreds:
178 jpreds = set(G._pred[j]) - {j}
179 jsuccs = set(G._succ[j]) - {j}
180 directed_triangles += sum((wt(j, i) * wt(k, i) * wt(k, j))**(1 / 3)
181 for k in ipreds & jpreds)
182 directed_triangles += sum((wt(j, i) * wt(k, i) * wt(j, k))**(1 / 3)
183 for k in ipreds & jsuccs)
184 directed_triangles += sum((wt(j, i) * wt(i, k) * wt(k, j))**(1 / 3)
185 for k in isuccs & jpreds)
186 directed_triangles += sum((wt(j, i) * wt(i, k) * wt(j, k))**(1 / 3)
187 for k in isuccs & jsuccs)
188
189 for j in isuccs:
190 jpreds = set(G._pred[j]) - {j}
191 jsuccs = set(G._succ[j]) - {j}
192 directed_triangles += sum((wt(i, j) * wt(k, i) * wt(k, j))**(1 / 3)
193 for k in ipreds & jpreds)
194 directed_triangles += sum((wt(i, j) * wt(k, i) * wt(j, k))**(1 / 3)
195 for k in ipreds & jsuccs)
196 directed_triangles += sum((wt(i, j) * wt(i, k) * wt(k, j))**(1 / 3)
197 for k in isuccs & jpreds)
198 directed_triangles += sum((wt(i, j) * wt(i, k) * wt(j, k))**(1 / 3)
199 for k in isuccs & jsuccs)
200
201 dtotal = len(ipreds) + len(isuccs)
202 dbidirectional = len(ipreds & isuccs)
203 yield (i, dtotal, dbidirectional, directed_triangles)
204
205
206 def average_clustering(G, nodes=None, weight=None, count_zeros=True):
207 r"""Compute the average clustering coefficient for the graph G.
208
209 The clustering coefficient for the graph is the average,
210
211 .. math::
212
213 C = \frac{1}{n}\sum_{v \in G} c_v,
214
215 where :math:`n` is the number of nodes in `G`.
216
217 Parameters
218 ----------
219 G : graph
220
221 nodes : container of nodes, optional (default=all nodes in G)
222 Compute average clustering for nodes in this container.
223
224 weight : string or None, optional (default=None)
225 The edge attribute that holds the numerical value used as a weight.
226 If None, then each edge has weight 1.
227
228 count_zeros : bool
229 If False include only the nodes with nonzero clustering in the average.
230
231 Returns
232 -------
233 avg : float
234 Average clustering
235
236 Examples
237 --------
238 >>> G=nx.complete_graph(5)
239 >>> print(nx.average_clustering(G))
240 1.0
241
242 Notes
243 -----
244 This is a space saving routine; it might be faster
245 to use the clustering function to get a list and then take the average.
246
247 Self loops are ignored.
248
249 References
250 ----------
251 .. [1] Generalizations of the clustering coefficient to weighted
252 complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela,
253 K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007).
254 http://jponnela.com/web_documents/a9.pdf
255 .. [2] Marcus Kaiser, Mean clustering coefficients: the role of isolated
256 nodes and leafs on clustering measures for small-world networks.
257 https://arxiv.org/abs/0802.2512
258 """
259 c = clustering(G, nodes, weight=weight).values()
260 if not count_zeros:
261 c = [v for v in c if v > 0]
262 return sum(c) / len(c)
263
264
265 def clustering(G, nodes=None, weight=None):
266 r"""Compute the clustering coefficient for nodes.
267
268 For unweighted graphs, the clustering of a node :math:`u`
269 is the fraction of possible triangles through that node that exist,
270
271 .. math::
272
273 c_u = \frac{2 T(u)}{deg(u)(deg(u)-1)},
274
275 where :math:`T(u)` is the number of triangles through node :math:`u` and
276 :math:`deg(u)` is the degree of :math:`u`.
277
278 For weighted graphs, there are several ways to define clustering [1]_.
279 the one used here is defined
280 as the geometric average of the subgraph edge weights [2]_,
281
282 .. math::
283
284 c_u = \frac{1}{deg(u)(deg(u)-1))}
285 \sum_{vw} (\hat{w}_{uv} \hat{w}_{uw} \hat{w}_{vw})^{1/3}.
286
287 The edge weights :math:`\hat{w}_{uv}` are normalized by the maximum weight
288 in the network :math:`\hat{w}_{uv} = w_{uv}/\max(w)`.
289
290 The value of :math:`c_u` is assigned to 0 if :math:`deg(u) < 2`.
291
292 For directed graphs, the clustering is similarly defined as the fraction
293 of all possible directed triangles or geometric average of the subgraph
294 edge weights for unweighted and weighted directed graph respectively [3]_.
295
296 .. math::
297
298 c_u = \frac{1}{deg^{tot}(u)(deg^{tot}(u)-1) - 2deg^{\leftrightarrow}(u)}
299 T(u),
300
301 where :math:`T(u)` is the number of directed triangles through node
302 :math:`u`, :math:`deg^{tot}(u)` is the sum of in degree and out degree of
303 :math:`u` and :math:`deg^{\leftrightarrow}(u)` is the reciprocal degree of
304 :math:`u`.
305
306 Parameters
307 ----------
308 G : graph
309
310 nodes : container of nodes, optional (default=all nodes in G)
311 Compute clustering for nodes in this container.
312
313 weight : string or None, optional (default=None)
314 The edge attribute that holds the numerical value used as a weight.
315 If None, then each edge has weight 1.
316
317 Returns
318 -------
319 out : float, or dictionary
320 Clustering coefficient at specified nodes
321
322 Examples
323 --------
324 >>> G=nx.complete_graph(5)
325 >>> print(nx.clustering(G,0))
326 1.0
327 >>> print(nx.clustering(G))
328 {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}
329
330 Notes
331 -----
332 Self loops are ignored.
333
334 References
335 ----------
336 .. [1] Generalizations of the clustering coefficient to weighted
337 complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela,
338 K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007).
339 http://jponnela.com/web_documents/a9.pdf
340 .. [2] Intensity and coherence of motifs in weighted complex
341 networks by J. P. Onnela, J. Saramäki, J. Kertész, and K. Kaski,
342 Physical Review E, 71(6), 065103 (2005).
343 .. [3] Clustering in complex directed networks by G. Fagiolo,
344 Physical Review E, 76(2), 026107 (2007).
345 """
346 if G.is_directed():
347 if weight is not None:
348 td_iter = _directed_weighted_triangles_and_degree_iter(
349 G, nodes, weight)
350 clusterc = {v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2)
351 for v, dt, db, t in td_iter}
352 else:
353 td_iter = _directed_triangles_and_degree_iter(G, nodes)
354 clusterc = {v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2)
355 for v, dt, db, t in td_iter}
356 else:
357 if weight is not None:
358 td_iter = _weighted_triangles_and_degree_iter(G, nodes, weight)
359 clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for
360 v, d, t in td_iter}
361 else:
362 td_iter = _triangles_and_degree_iter(G, nodes)
363 clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for
364 v, d, t, _ in td_iter}
365 if nodes in G:
366 # Return the value of the sole entry in the dictionary.
367 return clusterc[nodes]
368 return clusterc
369
370
371 def transitivity(G):
372 r"""Compute graph transitivity, the fraction of all possible triangles
373 present in G.
374
375 Possible triangles are identified by the number of "triads"
376 (two edges with a shared vertex).
377
378 The transitivity is
379
380 .. math::
381
382 T = 3\frac{\#triangles}{\#triads}.
383
384 Parameters
385 ----------
386 G : graph
387
388 Returns
389 -------
390 out : float
391 Transitivity
392
393 Examples
394 --------
395 >>> G = nx.complete_graph(5)
396 >>> print(nx.transitivity(G))
397 1.0
398 """
399 triangles = sum(t for v, d, t, _ in _triangles_and_degree_iter(G))
400 contri = sum(d * (d - 1) for v, d, t, _ in _triangles_and_degree_iter(G))
401 return 0 if triangles == 0 else triangles / contri
402
403
404 def square_clustering(G, nodes=None):
405 r""" Compute the squares clustering coefficient for nodes.
406
407 For each node return the fraction of possible squares that exist at
408 the node [1]_
409
410 .. math::
411 C_4(v) = \frac{ \sum_{u=1}^{k_v}
412 \sum_{w=u+1}^{k_v} q_v(u,w) }{ \sum_{u=1}^{k_v}
413 \sum_{w=u+1}^{k_v} [a_v(u,w) + q_v(u,w)]},
414
415 where :math:`q_v(u,w)` are the number of common neighbors of :math:`u` and
416 :math:`w` other than :math:`v` (ie squares), and :math:`a_v(u,w) = (k_u -
417 (1+q_v(u,w)+\theta_{uv}))(k_w - (1+q_v(u,w)+\theta_{uw}))`, where
418 :math:`\theta_{uw} = 1` if :math:`u` and :math:`w` are connected and 0
419 otherwise.
420
421 Parameters
422 ----------
423 G : graph
424
425 nodes : container of nodes, optional (default=all nodes in G)
426 Compute clustering for nodes in this container.
427
428 Returns
429 -------
430 c4 : dictionary
431 A dictionary keyed by node with the square clustering coefficient value.
432
433 Examples
434 --------
435 >>> G=nx.complete_graph(5)
436 >>> print(nx.square_clustering(G,0))
437 1.0
438 >>> print(nx.square_clustering(G))
439 {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}
440
441 Notes
442 -----
443 While :math:`C_3(v)` (triangle clustering) gives the probability that
444 two neighbors of node v are connected with each other, :math:`C_4(v)` is
445 the probability that two neighbors of node v share a common
446 neighbor different from v. This algorithm can be applied to both
447 bipartite and unipartite networks.
448
449 References
450 ----------
451 .. [1] Pedro G. Lind, Marta C. González, and Hans J. Herrmann. 2005
452 Cycles and clustering in bipartite networks.
453 Physical Review E (72) 056127.
454 """
455 if nodes is None:
456 node_iter = G
457 else:
458 node_iter = G.nbunch_iter(nodes)
459 clustering = {}
460 for v in node_iter:
461 clustering[v] = 0
462 potential = 0
463 for u, w in combinations(G[v], 2):
464 squares = len((set(G[u]) & set(G[w])) - set([v]))
465 clustering[v] += squares
466 degm = squares + 1
467 if w in G[u]:
468 degm += 1
469 potential += (len(G[u]) - degm) * (len(G[w]) - degm) + squares
470 if potential > 0:
471 clustering[v] /= potential
472 if nodes in G:
473 # Return the value of the sole entry in the dictionary.
474 return clustering[nodes]
475 return clustering
476
477
478 @not_implemented_for('directed')
479 def generalized_degree(G, nodes=None):
480 r""" Compute the generalized degree for nodes.
481
482 For each node, the generalized degree shows how many edges of given
483 triangle multiplicity the node is connected to. The triangle multiplicity
484 of an edge is the number of triangles an edge participates in. The
485 generalized degree of node :math:`i` can be written as a vector
486 :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc, k_i^{(N-2)})` where
487 :math:`k_i^{(j)}` is the number of edges attached to node :math:`i` that
488 participate in :math:`j` triangles.
489
490 Parameters
491 ----------
492 G : graph
493
494 nodes : container of nodes, optional (default=all nodes in G)
495 Compute the generalized degree for nodes in this container.
496
497 Returns
498 -------
499 out : Counter, or dictionary of Counters
500 Generalized degree of specified nodes. The Counter is keyed by edge
501 triangle multiplicity.
502
503 Examples
504 --------
505 >>> G=nx.complete_graph(5)
506 >>> print(nx.generalized_degree(G,0))
507 Counter({3: 4})
508 >>> print(nx.generalized_degree(G))
509 {0: Counter({3: 4}), 1: Counter({3: 4}), 2: Counter({3: 4}), 3: Counter({3: 4}), 4: Counter({3: 4})}
510
511 To recover the number of triangles attached to a node:
512
513 >>> k1 = nx.generalized_degree(G,0)
514 >>> sum([k*v for k,v in k1.items()])/2 == nx.triangles(G,0)
515 True
516
517 Notes
518 -----
519 In a network of N nodes, the highest triangle multiplicty an edge can have
520 is N-2.
521
522 The return value does not include a `zero` entry if no edges of a
523 particular triangle multiplicity are present.
524
525 The number of triangles node :math:`i` is attached to can be recovered from
526 the generalized degree :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc,
527 k_i^{(N-2)})` by :math:`(k_i^{(1)}+2k_i^{(2)}+\dotsc +(N-2)k_i^{(N-2)})/2`.
528
529 References
530 ----------
531 .. [1] Networks with arbitrary edge multiplicities by V. Zlatić,
532 D. Garlaschelli and G. Caldarelli, EPL (Europhysics Letters),
533 Volume 97, Number 2 (2012).
534 https://iopscience.iop.org/article/10.1209/0295-5075/97/28005
535 """
536 if nodes in G:
537 return next(_triangles_and_degree_iter(G, nodes))[3]
538 return {v: gd for v, d, t, gd in _triangles_and_degree_iter(G, nodes)}