Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/networkx/generators/community.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 # Copyright(C) 2011-2019 by | |
2 # Ben Edwards <bedwards@cs.unm.edu> | |
3 # Aric Hagberg <hagberg@lanl.gov> | |
4 # Konstantinos Karakatsanis <dinoskarakas@gmail.com> | |
5 # All rights reserved. | |
6 # BSD license. | |
7 # | |
8 # Authors: Ben Edwards (bedwards@cs.unm.edu) | |
9 # Aric Hagberg (hagberg@lanl.gov) | |
10 # Konstantinos Karakatsanis (dinoskarakas@gmail.com) | |
11 # Jean-Gabriel Young (jean.gabriel.young@gmail.com) | |
12 """Generators for classes of graphs used in studying social networks.""" | |
13 import itertools | |
14 import math | |
15 import networkx as nx | |
16 from networkx.utils import py_random_state | |
17 | |
18 # Accommodates for both SciPy and non-SciPy implementations.. | |
19 try: | |
20 from scipy.special import zeta as _zeta | |
21 | |
22 def zeta(x, q, tolerance): | |
23 return _zeta(x, q) | |
24 except ImportError: | |
25 def zeta(x, q, tolerance): | |
26 """The Hurwitz zeta function, or the Riemann zeta function of two | |
27 arguments. | |
28 | |
29 ``x`` must be greater than one and ``q`` must be positive. | |
30 | |
31 This function repeatedly computes subsequent partial sums until | |
32 convergence, as decided by ``tolerance``. | |
33 """ | |
34 z = 0 | |
35 z_prev = -float('inf') | |
36 k = 0 | |
37 while abs(z - z_prev) > tolerance: | |
38 z_prev = z | |
39 z += 1 / ((k + q) ** x) | |
40 k += 1 | |
41 return z | |
42 | |
43 __all__ = ['caveman_graph', 'connected_caveman_graph', | |
44 'relaxed_caveman_graph', 'random_partition_graph', | |
45 'planted_partition_graph', 'gaussian_random_partition_graph', | |
46 'ring_of_cliques', 'windmill_graph', 'stochastic_block_model', | |
47 'LFR_benchmark_graph'] | |
48 | |
49 | |
50 def caveman_graph(l, k): | |
51 """Returns a caveman graph of `l` cliques of size `k`. | |
52 | |
53 Parameters | |
54 ---------- | |
55 l : int | |
56 Number of cliques | |
57 k : int | |
58 Size of cliques | |
59 | |
60 Returns | |
61 ------- | |
62 G : NetworkX Graph | |
63 caveman graph | |
64 | |
65 Notes | |
66 ----- | |
67 This returns an undirected graph, it can be converted to a directed | |
68 graph using :func:`nx.to_directed`, or a multigraph using | |
69 ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is | |
70 described in [1]_ and it is unclear which of the directed | |
71 generalizations is most useful. | |
72 | |
73 Examples | |
74 -------- | |
75 >>> G = nx.caveman_graph(3, 3) | |
76 | |
77 See also | |
78 -------- | |
79 | |
80 connected_caveman_graph | |
81 | |
82 References | |
83 ---------- | |
84 .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.' | |
85 Amer. J. Soc. 105, 493-527, 1999. | |
86 """ | |
87 # l disjoint cliques of size k | |
88 G = nx.empty_graph(l * k) | |
89 if k > 1: | |
90 for start in range(0, l * k, k): | |
91 edges = itertools.combinations(range(start, start + k), 2) | |
92 G.add_edges_from(edges) | |
93 return G | |
94 | |
95 | |
96 def connected_caveman_graph(l, k): | |
97 """Returns a connected caveman graph of `l` cliques of size `k`. | |
98 | |
99 The connected caveman graph is formed by creating `n` cliques of size | |
100 `k`, then a single edge in each clique is rewired to a node in an | |
101 adjacent clique. | |
102 | |
103 Parameters | |
104 ---------- | |
105 l : int | |
106 number of cliques | |
107 k : int | |
108 size of cliques | |
109 | |
110 Returns | |
111 ------- | |
112 G : NetworkX Graph | |
113 connected caveman graph | |
114 | |
115 Notes | |
116 ----- | |
117 This returns an undirected graph, it can be converted to a directed | |
118 graph using :func:`nx.to_directed`, or a multigraph using | |
119 ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is | |
120 described in [1]_ and it is unclear which of the directed | |
121 generalizations is most useful. | |
122 | |
123 Examples | |
124 -------- | |
125 >>> G = nx.connected_caveman_graph(3, 3) | |
126 | |
127 References | |
128 ---------- | |
129 .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.' | |
130 Amer. J. Soc. 105, 493-527, 1999. | |
131 """ | |
132 G = nx.caveman_graph(l, k) | |
133 for start in range(0, l * k, k): | |
134 G.remove_edge(start, start + 1) | |
135 G.add_edge(start, (start - 1) % (l * k)) | |
136 return G | |
137 | |
138 | |
139 @py_random_state(3) | |
140 def relaxed_caveman_graph(l, k, p, seed=None): | |
141 """Returns a relaxed caveman graph. | |
142 | |
143 A relaxed caveman graph starts with `l` cliques of size `k`. Edges are | |
144 then randomly rewired with probability `p` to link different cliques. | |
145 | |
146 Parameters | |
147 ---------- | |
148 l : int | |
149 Number of groups | |
150 k : int | |
151 Size of cliques | |
152 p : float | |
153 Probabilty of rewiring each edge. | |
154 seed : integer, random_state, or None (default) | |
155 Indicator of random number generation state. | |
156 See :ref:`Randomness<randomness>`. | |
157 | |
158 Returns | |
159 ------- | |
160 G : NetworkX Graph | |
161 Relaxed Caveman Graph | |
162 | |
163 Raises | |
164 ------ | |
165 NetworkXError: | |
166 If p is not in [0,1] | |
167 | |
168 Examples | |
169 -------- | |
170 >>> G = nx.relaxed_caveman_graph(2, 3, 0.1, seed=42) | |
171 | |
172 References | |
173 ---------- | |
174 .. [1] Santo Fortunato, Community Detection in Graphs, | |
175 Physics Reports Volume 486, Issues 3-5, February 2010, Pages 75-174. | |
176 https://arxiv.org/abs/0906.0612 | |
177 """ | |
178 G = nx.caveman_graph(l, k) | |
179 nodes = list(G) | |
180 for (u, v) in G.edges(): | |
181 if seed.random() < p: # rewire the edge | |
182 x = seed.choice(nodes) | |
183 if G.has_edge(u, x): | |
184 continue | |
185 G.remove_edge(u, v) | |
186 G.add_edge(u, x) | |
187 return G | |
188 | |
189 | |
190 @py_random_state(3) | |
191 def random_partition_graph(sizes, p_in, p_out, seed=None, directed=False): | |
192 """Returns the random partition graph with a partition of sizes. | |
193 | |
194 A partition graph is a graph of communities with sizes defined by | |
195 s in sizes. Nodes in the same group are connected with probability | |
196 p_in and nodes of different groups are connected with probability | |
197 p_out. | |
198 | |
199 Parameters | |
200 ---------- | |
201 sizes : list of ints | |
202 Sizes of groups | |
203 p_in : float | |
204 probability of edges with in groups | |
205 p_out : float | |
206 probability of edges between groups | |
207 directed : boolean optional, default=False | |
208 Whether to create a directed graph | |
209 seed : integer, random_state, or None (default) | |
210 Indicator of random number generation state. | |
211 See :ref:`Randomness<randomness>`. | |
212 | |
213 Returns | |
214 ------- | |
215 G : NetworkX Graph or DiGraph | |
216 random partition graph of size sum(gs) | |
217 | |
218 Raises | |
219 ------ | |
220 NetworkXError | |
221 If p_in or p_out is not in [0,1] | |
222 | |
223 Examples | |
224 -------- | |
225 >>> G = nx.random_partition_graph([10,10,10],.25,.01) | |
226 >>> len(G) | |
227 30 | |
228 >>> partition = G.graph['partition'] | |
229 >>> len(partition) | |
230 3 | |
231 | |
232 Notes | |
233 ----- | |
234 This is a generalization of the planted-l-partition described in | |
235 [1]_. It allows for the creation of groups of any size. | |
236 | |
237 The partition is store as a graph attribute 'partition'. | |
238 | |
239 References | |
240 ---------- | |
241 .. [1] Santo Fortunato 'Community Detection in Graphs' Physical Reports | |
242 Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612 | |
243 """ | |
244 # Use geometric method for O(n+m) complexity algorithm | |
245 # partition = nx.community_sets(nx.get_node_attributes(G, 'affiliation')) | |
246 if not 0.0 <= p_in <= 1.0: | |
247 raise nx.NetworkXError("p_in must be in [0,1]") | |
248 if not 0.0 <= p_out <= 1.0: | |
249 raise nx.NetworkXError("p_out must be in [0,1]") | |
250 | |
251 # create connection matrix | |
252 num_blocks = len(sizes) | |
253 p = [[p_out for s in range(num_blocks)] for r in range(num_blocks)] | |
254 for r in range(num_blocks): | |
255 p[r][r] = p_in | |
256 | |
257 return stochastic_block_model(sizes, p, nodelist=None, seed=seed, | |
258 directed=directed, selfloops=False, | |
259 sparse=True) | |
260 | |
261 | |
262 @py_random_state(4) | |
263 def planted_partition_graph(l, k, p_in, p_out, seed=None, directed=False): | |
264 """Returns the planted l-partition graph. | |
265 | |
266 This model partitions a graph with n=l*k vertices in | |
267 l groups with k vertices each. Vertices of the same | |
268 group are linked with a probability p_in, and vertices | |
269 of different groups are linked with probability p_out. | |
270 | |
271 Parameters | |
272 ---------- | |
273 l : int | |
274 Number of groups | |
275 k : int | |
276 Number of vertices in each group | |
277 p_in : float | |
278 probability of connecting vertices within a group | |
279 p_out : float | |
280 probability of connected vertices between groups | |
281 seed : integer, random_state, or None (default) | |
282 Indicator of random number generation state. | |
283 See :ref:`Randomness<randomness>`. | |
284 directed : bool,optional (default=False) | |
285 If True return a directed graph | |
286 | |
287 Returns | |
288 ------- | |
289 G : NetworkX Graph or DiGraph | |
290 planted l-partition graph | |
291 | |
292 Raises | |
293 ------ | |
294 NetworkXError: | |
295 If p_in,p_out are not in [0,1] or | |
296 | |
297 Examples | |
298 -------- | |
299 >>> G = nx.planted_partition_graph(4, 3, 0.5, 0.1, seed=42) | |
300 | |
301 See Also | |
302 -------- | |
303 random_partition_model | |
304 | |
305 References | |
306 ---------- | |
307 .. [1] A. Condon, R.M. Karp, Algorithms for graph partitioning | |
308 on the planted partition model, | |
309 Random Struct. Algor. 18 (2001) 116-140. | |
310 | |
311 .. [2] Santo Fortunato 'Community Detection in Graphs' Physical Reports | |
312 Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612 | |
313 """ | |
314 return random_partition_graph([k] * l, p_in, p_out, seed, directed) | |
315 | |
316 | |
317 @py_random_state(6) | |
318 def gaussian_random_partition_graph(n, s, v, p_in, p_out, directed=False, | |
319 seed=None): | |
320 """Generate a Gaussian random partition graph. | |
321 | |
322 A Gaussian random partition graph is created by creating k partitions | |
323 each with a size drawn from a normal distribution with mean s and variance | |
324 s/v. Nodes are connected within clusters with probability p_in and | |
325 between clusters with probability p_out[1] | |
326 | |
327 Parameters | |
328 ---------- | |
329 n : int | |
330 Number of nodes in the graph | |
331 s : float | |
332 Mean cluster size | |
333 v : float | |
334 Shape parameter. The variance of cluster size distribution is s/v. | |
335 p_in : float | |
336 Probabilty of intra cluster connection. | |
337 p_out : float | |
338 Probability of inter cluster connection. | |
339 directed : boolean, optional default=False | |
340 Whether to create a directed graph or not | |
341 seed : integer, random_state, or None (default) | |
342 Indicator of random number generation state. | |
343 See :ref:`Randomness<randomness>`. | |
344 | |
345 Returns | |
346 ------- | |
347 G : NetworkX Graph or DiGraph | |
348 gaussian random partition graph | |
349 | |
350 Raises | |
351 ------ | |
352 NetworkXError | |
353 If s is > n | |
354 If p_in or p_out is not in [0,1] | |
355 | |
356 Notes | |
357 ----- | |
358 Note the number of partitions is dependent on s,v and n, and that the | |
359 last partition may be considerably smaller, as it is sized to simply | |
360 fill out the nodes [1] | |
361 | |
362 See Also | |
363 -------- | |
364 random_partition_graph | |
365 | |
366 Examples | |
367 -------- | |
368 >>> G = nx.gaussian_random_partition_graph(100,10,10,.25,.1) | |
369 >>> len(G) | |
370 100 | |
371 | |
372 References | |
373 ---------- | |
374 .. [1] Ulrik Brandes, Marco Gaertler, Dorothea Wagner, | |
375 Experiments on Graph Clustering Algorithms, | |
376 In the proceedings of the 11th Europ. Symp. Algorithms, 2003. | |
377 """ | |
378 if s > n: | |
379 raise nx.NetworkXError("s must be <= n") | |
380 assigned = 0 | |
381 sizes = [] | |
382 while True: | |
383 size = int(seed.gauss(s, float(s) / v + 0.5)) | |
384 if size < 1: # how to handle 0 or negative sizes? | |
385 continue | |
386 if assigned + size >= n: | |
387 sizes.append(n - assigned) | |
388 break | |
389 assigned += size | |
390 sizes.append(size) | |
391 return random_partition_graph(sizes, p_in, p_out, directed, seed) | |
392 | |
393 | |
394 def ring_of_cliques(num_cliques, clique_size): | |
395 """Defines a "ring of cliques" graph. | |
396 | |
397 A ring of cliques graph is consisting of cliques, connected through single | |
398 links. Each clique is a complete graph. | |
399 | |
400 Parameters | |
401 ---------- | |
402 num_cliques : int | |
403 Number of cliques | |
404 clique_size : int | |
405 Size of cliques | |
406 | |
407 Returns | |
408 ------- | |
409 G : NetworkX Graph | |
410 ring of cliques graph | |
411 | |
412 Raises | |
413 ------ | |
414 NetworkXError | |
415 If the number of cliques is lower than 2 or | |
416 if the size of cliques is smaller than 2. | |
417 | |
418 Examples | |
419 -------- | |
420 >>> G = nx.ring_of_cliques(8, 4) | |
421 | |
422 See Also | |
423 -------- | |
424 connected_caveman_graph | |
425 | |
426 Notes | |
427 ----- | |
428 The `connected_caveman_graph` graph removes a link from each clique to | |
429 connect it with the next clique. Instead, the `ring_of_cliques` graph | |
430 simply adds the link without removing any link from the cliques. | |
431 """ | |
432 if num_cliques < 2: | |
433 raise nx.NetworkXError('A ring of cliques must have at least ' | |
434 'two cliques') | |
435 if clique_size < 2: | |
436 raise nx.NetworkXError('The cliques must have at least two nodes') | |
437 | |
438 G = nx.Graph() | |
439 for i in range(num_cliques): | |
440 edges = itertools.combinations(range(i * clique_size, i * clique_size + | |
441 clique_size), 2) | |
442 G.add_edges_from(edges) | |
443 G.add_edge(i * clique_size + 1, (i + 1) * clique_size % | |
444 (num_cliques * clique_size)) | |
445 return G | |
446 | |
447 | |
448 def windmill_graph(n, k): | |
449 """Generate a windmill graph. | |
450 A windmill graph is a graph of `n` cliques each of size `k` that are all | |
451 joined at one node. | |
452 It can be thought of as taking a disjoint union of `n` cliques of size `k`, | |
453 selecting one point from each, and contracting all of the selected points. | |
454 Alternatively, one could generate `n` cliques of size `k-1` and one node | |
455 that is connected to all other nodes in the graph. | |
456 | |
457 Parameters | |
458 ---------- | |
459 n : int | |
460 Number of cliques | |
461 k : int | |
462 Size of cliques | |
463 | |
464 Returns | |
465 ------- | |
466 G : NetworkX Graph | |
467 windmill graph with n cliques of size k | |
468 | |
469 Raises | |
470 ------ | |
471 NetworkXError | |
472 If the number of cliques is less than two | |
473 If the size of the cliques are less than two | |
474 | |
475 Examples | |
476 -------- | |
477 >>> G = nx.windmill_graph(4, 5) | |
478 | |
479 Notes | |
480 ----- | |
481 The node labeled `0` will be the node connected to all other nodes. | |
482 Note that windmill graphs are usually denoted `Wd(k,n)`, so the parameters | |
483 are in the opposite order as the parameters of this method. | |
484 """ | |
485 if n < 2: | |
486 msg = 'A windmill graph must have at least two cliques' | |
487 raise nx.NetworkXError(msg) | |
488 if k < 2: | |
489 raise nx.NetworkXError('The cliques must have at least two nodes') | |
490 | |
491 G = nx.disjoint_union_all(itertools.chain([nx.complete_graph(k)], | |
492 (nx.complete_graph(k - 1) | |
493 for _ in range(n - 1)))) | |
494 G.add_edges_from((0, i) for i in range(k, G.number_of_nodes())) | |
495 return G | |
496 | |
497 | |
498 @py_random_state(3) | |
499 def stochastic_block_model(sizes, p, nodelist=None, seed=None, | |
500 directed=False, selfloops=False, sparse=True): | |
501 """Returns a stochastic block model graph. | |
502 | |
503 This model partitions the nodes in blocks of arbitrary sizes, and places | |
504 edges between pairs of nodes independently, with a probability that depends | |
505 on the blocks. | |
506 | |
507 Parameters | |
508 ---------- | |
509 sizes : list of ints | |
510 Sizes of blocks | |
511 p : list of list of floats | |
512 Element (r,s) gives the density of edges going from the nodes | |
513 of group r to nodes of group s. | |
514 p must match the number of groups (len(sizes) == len(p)), | |
515 and it must be symmetric if the graph is undirected. | |
516 nodelist : list, optional | |
517 The block tags are assigned according to the node identifiers | |
518 in nodelist. If nodelist is None, then the ordering is the | |
519 range [0,sum(sizes)-1]. | |
520 seed : integer, random_state, or None (default) | |
521 Indicator of random number generation state. | |
522 See :ref:`Randomness<randomness>`. | |
523 directed : boolean optional, default=False | |
524 Whether to create a directed graph or not. | |
525 selfloops : boolean optional, default=False | |
526 Whether to include self-loops or not. | |
527 sparse: boolean optional, default=True | |
528 Use the sparse heuristic to speed up the generator. | |
529 | |
530 Returns | |
531 ------- | |
532 g : NetworkX Graph or DiGraph | |
533 Stochastic block model graph of size sum(sizes) | |
534 | |
535 Raises | |
536 ------ | |
537 NetworkXError | |
538 If probabilities are not in [0,1]. | |
539 If the probability matrix is not square (directed case). | |
540 If the probability matrix is not symmetric (undirected case). | |
541 If the sizes list does not match nodelist or the probability matrix. | |
542 If nodelist contains duplicate. | |
543 | |
544 Examples | |
545 -------- | |
546 >>> sizes = [75, 75, 300] | |
547 >>> probs = [[0.25, 0.05, 0.02], | |
548 ... [0.05, 0.35, 0.07], | |
549 ... [0.02, 0.07, 0.40]] | |
550 >>> g = nx.stochastic_block_model(sizes, probs, seed=0) | |
551 >>> len(g) | |
552 450 | |
553 >>> H = nx.quotient_graph(g, g.graph['partition'], relabel=True) | |
554 >>> for v in H.nodes(data=True): | |
555 ... print(round(v[1]['density'], 3)) | |
556 ... | |
557 0.245 | |
558 0.348 | |
559 0.405 | |
560 >>> for v in H.edges(data=True): | |
561 ... print(round(1.0 * v[2]['weight'] / (sizes[v[0]] * sizes[v[1]]), 3)) | |
562 ... | |
563 0.051 | |
564 0.022 | |
565 0.07 | |
566 | |
567 See Also | |
568 -------- | |
569 random_partition_graph | |
570 planted_partition_graph | |
571 gaussian_random_partition_graph | |
572 gnp_random_graph | |
573 | |
574 References | |
575 ---------- | |
576 .. [1] Holland, P. W., Laskey, K. B., & Leinhardt, S., | |
577 "Stochastic blockmodels: First steps", | |
578 Social networks, 5(2), 109-137, 1983. | |
579 """ | |
580 # Check if dimensions match | |
581 if len(sizes) != len(p): | |
582 raise nx.NetworkXException("'sizes' and 'p' do not match.") | |
583 # Check for probability symmetry (undirected) and shape (directed) | |
584 for row in p: | |
585 if len(p) != len(row): | |
586 raise nx.NetworkXException("'p' must be a square matrix.") | |
587 if not directed: | |
588 p_transpose = [list(i) for i in zip(*p)] | |
589 for i in zip(p, p_transpose): | |
590 for j in zip(i[0], i[1]): | |
591 if abs(j[0] - j[1]) > 1e-08: | |
592 raise nx.NetworkXException("'p' must be symmetric.") | |
593 # Check for probability range | |
594 for row in p: | |
595 for prob in row: | |
596 if prob < 0 or prob > 1: | |
597 raise nx.NetworkXException("Entries of 'p' not in [0,1].") | |
598 # Check for nodelist consistency | |
599 if nodelist is not None: | |
600 if len(nodelist) != sum(sizes): | |
601 raise nx.NetworkXException("'nodelist' and 'sizes' do not match.") | |
602 if len(nodelist) != len(set(nodelist)): | |
603 raise nx.NetworkXException("nodelist contains duplicate.") | |
604 else: | |
605 nodelist = range(0, sum(sizes)) | |
606 | |
607 # Setup the graph conditionally to the directed switch. | |
608 block_range = range(len(sizes)) | |
609 if directed: | |
610 g = nx.DiGraph() | |
611 block_iter = itertools.product(block_range, block_range) | |
612 else: | |
613 g = nx.Graph() | |
614 block_iter = itertools.combinations_with_replacement(block_range, 2) | |
615 # Split nodelist in a partition (list of sets). | |
616 size_cumsum = [sum(sizes[0:x]) for x in range(0, len(sizes) + 1)] | |
617 g.graph['partition'] = [set(nodelist[size_cumsum[x]:size_cumsum[x + 1]]) | |
618 for x in range(0, len(size_cumsum) - 1)] | |
619 # Setup nodes and graph name | |
620 for block_id, nodes in enumerate(g.graph['partition']): | |
621 for node in nodes: | |
622 g.add_node(node, block=block_id) | |
623 | |
624 g.name = "stochastic_block_model" | |
625 | |
626 # Test for edge existence | |
627 parts = g.graph['partition'] | |
628 for i, j in block_iter: | |
629 if i == j: | |
630 if directed: | |
631 if selfloops: | |
632 edges = itertools.product(parts[i], parts[i]) | |
633 else: | |
634 edges = itertools.permutations(parts[i], 2) | |
635 else: | |
636 edges = itertools.combinations(parts[i], 2) | |
637 if selfloops: | |
638 edges = itertools.chain(edges, zip(parts[i], parts[i])) | |
639 for e in edges: | |
640 if seed.random() < p[i][j]: | |
641 g.add_edge(*e) | |
642 else: | |
643 edges = itertools.product(parts[i], parts[j]) | |
644 if sparse: | |
645 if p[i][j] == 1: # Test edges cases p_ij = 0 or 1 | |
646 for e in edges: | |
647 g.add_edge(*e) | |
648 elif p[i][j] > 0: | |
649 while True: | |
650 try: | |
651 logrand = math.log(seed.random()) | |
652 skip = math.floor(logrand / math.log(1 - p[i][j])) | |
653 # consume "skip" edges | |
654 next(itertools.islice(edges, skip, skip), None) | |
655 e = next(edges) | |
656 g.add_edge(*e) # __safe | |
657 except StopIteration: | |
658 break | |
659 else: | |
660 for e in edges: | |
661 if seed.random() < p[i][j]: | |
662 g.add_edge(*e) # __safe | |
663 return g | |
664 | |
665 | |
666 def _zipf_rv_below(gamma, xmin, threshold, seed): | |
667 """Returns a random value chosen from the bounded Zipf distribution. | |
668 | |
669 Repeatedly draws values from the Zipf distribution until the | |
670 threshold is met, then returns that value. | |
671 """ | |
672 result = nx.utils.zipf_rv(gamma, xmin, seed) | |
673 while result > threshold: | |
674 result = nx.utils.zipf_rv(gamma, xmin, seed) | |
675 return result | |
676 | |
677 | |
678 def _powerlaw_sequence(gamma, low, high, condition, length, max_iters, seed): | |
679 """Returns a list of numbers obeying a constrained power law distribution. | |
680 | |
681 ``gamma`` and ``low`` are the parameters for the Zipf distribution. | |
682 | |
683 ``high`` is the maximum allowed value for values draw from the Zipf | |
684 distribution. For more information, see :func:`_zipf_rv_below`. | |
685 | |
686 ``condition`` and ``length`` are Boolean-valued functions on | |
687 lists. While generating the list, random values are drawn and | |
688 appended to the list until ``length`` is satisfied by the created | |
689 list. Once ``condition`` is satisfied, the sequence generated in | |
690 this way is returned. | |
691 | |
692 ``max_iters`` indicates the number of times to generate a list | |
693 satisfying ``length``. If the number of iterations exceeds this | |
694 value, :exc:`~networkx.exception.ExceededMaxIterations` is raised. | |
695 | |
696 seed : integer, random_state, or None (default) | |
697 Indicator of random number generation state. | |
698 See :ref:`Randomness<randomness>`. | |
699 """ | |
700 for i in range(max_iters): | |
701 seq = [] | |
702 while not length(seq): | |
703 seq.append(_zipf_rv_below(gamma, low, high, seed)) | |
704 if condition(seq): | |
705 return seq | |
706 raise nx.ExceededMaxIterations("Could not create power law sequence") | |
707 | |
708 | |
709 # TODO Needs documentation. | |
710 def _generate_min_degree(gamma, average_degree, max_degree, tolerance, | |
711 max_iters): | |
712 """Returns a minimum degree from the given average degree.""" | |
713 min_deg_top = max_degree | |
714 min_deg_bot = 1 | |
715 min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot | |
716 itrs = 0 | |
717 mid_avg_deg = 0 | |
718 while abs(mid_avg_deg - average_degree) > tolerance: | |
719 if itrs > max_iters: | |
720 raise nx.ExceededMaxIterations("Could not match average_degree") | |
721 mid_avg_deg = 0 | |
722 for x in range(int(min_deg_mid), max_degree + 1): | |
723 mid_avg_deg += (x ** (-gamma + 1)) / zeta(gamma, min_deg_mid, | |
724 tolerance) | |
725 if mid_avg_deg > average_degree: | |
726 min_deg_top = min_deg_mid | |
727 min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot | |
728 else: | |
729 min_deg_bot = min_deg_mid | |
730 min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot | |
731 itrs += 1 | |
732 # return int(min_deg_mid + 0.5) | |
733 return round(min_deg_mid) | |
734 | |
735 | |
736 def _generate_communities(degree_seq, community_sizes, mu, max_iters, seed): | |
737 """Returns a list of sets, each of which represents a community. | |
738 | |
739 ``degree_seq`` is the degree sequence that must be met by the | |
740 graph. | |
741 | |
742 ``community_sizes`` is the community size distribution that must be | |
743 met by the generated list of sets. | |
744 | |
745 ``mu`` is a float in the interval [0, 1] indicating the fraction of | |
746 intra-community edges incident to each node. | |
747 | |
748 ``max_iters`` is the number of times to try to add a node to a | |
749 community. This must be greater than the length of | |
750 ``degree_seq``, otherwise this function will always fail. If | |
751 the number of iterations exceeds this value, | |
752 :exc:`~networkx.exception.ExceededMaxIterations` is raised. | |
753 | |
754 seed : integer, random_state, or None (default) | |
755 Indicator of random number generation state. | |
756 See :ref:`Randomness<randomness>`. | |
757 | |
758 The communities returned by this are sets of integers in the set {0, | |
759 ..., *n* - 1}, where *n* is the length of ``degree_seq``. | |
760 | |
761 """ | |
762 # This assumes the nodes in the graph will be natural numbers. | |
763 result = [set() for _ in community_sizes] | |
764 n = len(degree_seq) | |
765 free = list(range(n)) | |
766 for i in range(max_iters): | |
767 v = free.pop() | |
768 c = seed.choice(range(len(community_sizes))) | |
769 # s = int(degree_seq[v] * (1 - mu) + 0.5) | |
770 s = round(degree_seq[v] * (1 - mu)) | |
771 # If the community is large enough, add the node to the chosen | |
772 # community. Otherwise, return it to the list of unaffiliated | |
773 # nodes. | |
774 if s < community_sizes[c]: | |
775 result[c].add(v) | |
776 else: | |
777 free.append(v) | |
778 # If the community is too big, remove a node from it. | |
779 if len(result[c]) > community_sizes[c]: | |
780 free.append(result[c].pop()) | |
781 if not free: | |
782 return result | |
783 msg = 'Could not assign communities; try increasing min_community' | |
784 raise nx.ExceededMaxIterations(msg) | |
785 | |
786 | |
787 @py_random_state(11) | |
788 def LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=None, | |
789 min_degree=None, max_degree=None, min_community=None, | |
790 max_community=None, tol=1.0e-7, max_iters=500, | |
791 seed=None): | |
792 r"""Returns the LFR benchmark graph. | |
793 | |
794 This algorithm proceeds as follows: | |
795 | |
796 1) Find a degree sequence with a power law distribution, and minimum | |
797 value ``min_degree``, which has approximate average degree | |
798 ``average_degree``. This is accomplished by either | |
799 | |
800 a) specifying ``min_degree`` and not ``average_degree``, | |
801 b) specifying ``average_degree`` and not ``min_degree``, in which | |
802 case a suitable minimum degree will be found. | |
803 | |
804 ``max_degree`` can also be specified, otherwise it will be set to | |
805 ``n``. Each node *u* will have `\mu \mathrm{deg}(u)` edges | |
806 joining it to nodes in communities other than its own and `(1 - | |
807 \mu) \mathrm{deg}(u)` edges joining it to nodes in its own | |
808 community. | |
809 2) Generate community sizes according to a power law distribution | |
810 with exponent ``tau2``. If ``min_community`` and | |
811 ``max_community`` are not specified they will be selected to be | |
812 ``min_degree`` and ``max_degree``, respectively. Community sizes | |
813 are generated until the sum of their sizes equals ``n``. | |
814 3) Each node will be randomly assigned a community with the | |
815 condition that the community is large enough for the node's | |
816 intra-community degree, `(1 - \mu) \mathrm{deg}(u)` as | |
817 described in step 2. If a community grows too large, a random node | |
818 will be selected for reassignment to a new community, until all | |
819 nodes have been assigned a community. | |
820 4) Each node *u* then adds `(1 - \mu) \mathrm{deg}(u)` | |
821 intra-community edges and `\mu \mathrm{deg}(u)` inter-community | |
822 edges. | |
823 | |
824 Parameters | |
825 ---------- | |
826 n : int | |
827 Number of nodes in the created graph. | |
828 | |
829 tau1 : float | |
830 Power law exponent for the degree distribution of the created | |
831 graph. This value must be strictly greater than one. | |
832 | |
833 tau2 : float | |
834 Power law exponent for the community size distribution in the | |
835 created graph. This value must be strictly greater than one. | |
836 | |
837 mu : float | |
838 Fraction of intra-community edges incident to each node. This | |
839 value must be in the interval [0, 1]. | |
840 | |
841 average_degree : float | |
842 Desired average degree of nodes in the created graph. This value | |
843 must be in the interval [0, *n*]. Exactly one of this and | |
844 ``min_degree`` must be specified, otherwise a | |
845 :exc:`NetworkXError` is raised. | |
846 | |
847 min_degree : int | |
848 Minimum degree of nodes in the created graph. This value must be | |
849 in the interval [0, *n*]. Exactly one of this and | |
850 ``average_degree`` must be specified, otherwise a | |
851 :exc:`NetworkXError` is raised. | |
852 | |
853 max_degree : int | |
854 Maximum degree of nodes in the created graph. If not specified, | |
855 this is set to ``n``, the total number of nodes in the graph. | |
856 | |
857 min_community : int | |
858 Minimum size of communities in the graph. If not specified, this | |
859 is set to ``min_degree``. | |
860 | |
861 max_community : int | |
862 Maximum size of communities in the graph. If not specified, this | |
863 is set to ``n``, the total number of nodes in the graph. | |
864 | |
865 tol : float | |
866 Tolerance when comparing floats, specifically when comparing | |
867 average degree values. | |
868 | |
869 max_iters : int | |
870 Maximum number of iterations to try to create the community sizes, | |
871 degree distribution, and community affiliations. | |
872 | |
873 seed : integer, random_state, or None (default) | |
874 Indicator of random number generation state. | |
875 See :ref:`Randomness<randomness>`. | |
876 | |
877 Returns | |
878 ------- | |
879 G : NetworkX graph | |
880 The LFR benchmark graph generated according to the specified | |
881 parameters. | |
882 | |
883 Each node in the graph has a node attribute ``'community'`` that | |
884 stores the community (that is, the set of nodes) that includes | |
885 it. | |
886 | |
887 Raises | |
888 ------ | |
889 NetworkXError | |
890 If any of the parameters do not meet their upper and lower bounds: | |
891 | |
892 - ``tau1`` and ``tau2`` must be strictly greater than 1. | |
893 - ``mu`` must be in [0, 1]. | |
894 - ``max_degree`` must be in {1, ..., *n*}. | |
895 - ``min_community`` and ``max_community`` must be in {0, ..., | |
896 *n*}. | |
897 | |
898 If not exactly one of ``average_degree`` and ``min_degree`` is | |
899 specified. | |
900 | |
901 If ``min_degree`` is not specified and a suitable ``min_degree`` | |
902 cannot be found. | |
903 | |
904 ExceededMaxIterations | |
905 If a valid degree sequence cannot be created within | |
906 ``max_iters`` number of iterations. | |
907 | |
908 If a valid set of community sizes cannot be created within | |
909 ``max_iters`` number of iterations. | |
910 | |
911 If a valid community assignment cannot be created within ``10 * | |
912 n * max_iters`` number of iterations. | |
913 | |
914 Examples | |
915 -------- | |
916 Basic usage:: | |
917 | |
918 >>> from networkx.generators.community import LFR_benchmark_graph | |
919 >>> n = 250 | |
920 >>> tau1 = 3 | |
921 >>> tau2 = 1.5 | |
922 >>> mu = 0.1 | |
923 >>> G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5, | |
924 ... min_community=20, seed=10) | |
925 | |
926 Continuing the example above, you can get the communities from the | |
927 node attributes of the graph:: | |
928 | |
929 >>> communities = {frozenset(G.nodes[v]['community']) for v in G} | |
930 | |
931 Notes | |
932 ----- | |
933 This algorithm differs slightly from the original way it was | |
934 presented in [1]. | |
935 | |
936 1) Rather than connecting the graph via a configuration model then | |
937 rewiring to match the intra-community and inter-community | |
938 degrees, we do this wiring explicitly at the end, which should be | |
939 equivalent. | |
940 2) The code posted on the author's website [2] calculates the random | |
941 power law distributed variables and their average using | |
942 continuous approximations, whereas we use the discrete | |
943 distributions here as both degree and community size are | |
944 discrete. | |
945 | |
946 Though the authors describe the algorithm as quite robust, testing | |
947 during development indicates that a somewhat narrower parameter set | |
948 is likely to successfully produce a graph. Some suggestions have | |
949 been provided in the event of exceptions. | |
950 | |
951 References | |
952 ---------- | |
953 .. [1] "Benchmark graphs for testing community detection algorithms", | |
954 Andrea Lancichinetti, Santo Fortunato, and Filippo Radicchi, | |
955 Phys. Rev. E 78, 046110 2008 | |
956 .. [2] http://santo.fortunato.googlepages.com/inthepress2 | |
957 | |
958 """ | |
959 # Perform some basic parameter validation. | |
960 if not tau1 > 1: | |
961 raise nx.NetworkXError("tau1 must be greater than one") | |
962 if not tau2 > 1: | |
963 raise nx.NetworkXError("tau2 must be greater than one") | |
964 if not 0 <= mu <= 1: | |
965 raise nx.NetworkXError("mu must be in the interval [0, 1]") | |
966 | |
967 # Validate parameters for generating the degree sequence. | |
968 if max_degree is None: | |
969 max_degree = n | |
970 elif not 0 < max_degree <= n: | |
971 raise nx.NetworkXError("max_degree must be in the interval (0, n]") | |
972 if not ((min_degree is None) ^ (average_degree is None)): | |
973 raise nx.NetworkXError("Must assign exactly one of min_degree and" | |
974 " average_degree") | |
975 if min_degree is None: | |
976 min_degree = _generate_min_degree(tau1, average_degree, max_degree, | |
977 tol, max_iters) | |
978 | |
979 # Generate a degree sequence with a power law distribution. | |
980 low, high = min_degree, max_degree | |
981 | |
982 def condition(seq): return sum(seq) % 2 == 0 | |
983 | |
984 def length(seq): return len(seq) >= n | |
985 deg_seq = _powerlaw_sequence(tau1, low, high, condition, | |
986 length, max_iters, seed) | |
987 | |
988 # Validate parameters for generating the community size sequence. | |
989 if min_community is None: | |
990 min_community = min(deg_seq) | |
991 if max_community is None: | |
992 max_community = max(deg_seq) | |
993 | |
994 # Generate a community size sequence with a power law distribution. | |
995 # | |
996 # TODO The original code incremented the number of iterations each | |
997 # time a new Zipf random value was drawn from the distribution. This | |
998 # differed from the way the number of iterations was incremented in | |
999 # `_powerlaw_degree_sequence`, so this code was changed to match | |
1000 # that one. As a result, this code is allowed many more chances to | |
1001 # generate a valid community size sequence. | |
1002 low, high = min_community, max_community | |
1003 | |
1004 def condition(seq): return sum(seq) == n | |
1005 | |
1006 def length(seq): return sum(seq) >= n | |
1007 comms = _powerlaw_sequence(tau2, low, high, condition, | |
1008 length, max_iters, seed) | |
1009 | |
1010 # Generate the communities based on the given degree sequence and | |
1011 # community sizes. | |
1012 max_iters *= 10 * n | |
1013 communities = _generate_communities(deg_seq, comms, mu, max_iters, seed) | |
1014 | |
1015 # Finally, generate the benchmark graph based on the given | |
1016 # communities, joining nodes according to the intra- and | |
1017 # inter-community degrees. | |
1018 G = nx.Graph() | |
1019 G.add_nodes_from(range(n)) | |
1020 for c in communities: | |
1021 for u in c: | |
1022 while G.degree(u) < round(deg_seq[u] * (1 - mu)): | |
1023 v = seed.choice(list(c)) | |
1024 G.add_edge(u, v) | |
1025 while G.degree(u) < deg_seq[u]: | |
1026 v = seed.choice(range(n)) | |
1027 if v not in c: | |
1028 G.add_edge(u, v) | |
1029 G.nodes[u]['community'] = c | |
1030 return G |