comparison planemo/lib/python3.7/site-packages/networkx/generators/community.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 # Copyright(C) 2011-2019 by
2 # Ben Edwards <bedwards@cs.unm.edu>
3 # Aric Hagberg <hagberg@lanl.gov>
4 # Konstantinos Karakatsanis <dinoskarakas@gmail.com>
5 # All rights reserved.
6 # BSD license.
7 #
8 # Authors: Ben Edwards (bedwards@cs.unm.edu)
9 # Aric Hagberg (hagberg@lanl.gov)
10 # Konstantinos Karakatsanis (dinoskarakas@gmail.com)
11 # Jean-Gabriel Young (jean.gabriel.young@gmail.com)
12 """Generators for classes of graphs used in studying social networks."""
13 import itertools
14 import math
15 import networkx as nx
16 from networkx.utils import py_random_state
17
18 # Accommodates for both SciPy and non-SciPy implementations..
19 try:
20 from scipy.special import zeta as _zeta
21
22 def zeta(x, q, tolerance):
23 return _zeta(x, q)
24 except ImportError:
25 def zeta(x, q, tolerance):
26 """The Hurwitz zeta function, or the Riemann zeta function of two
27 arguments.
28
29 ``x`` must be greater than one and ``q`` must be positive.
30
31 This function repeatedly computes subsequent partial sums until
32 convergence, as decided by ``tolerance``.
33 """
34 z = 0
35 z_prev = -float('inf')
36 k = 0
37 while abs(z - z_prev) > tolerance:
38 z_prev = z
39 z += 1 / ((k + q) ** x)
40 k += 1
41 return z
42
43 __all__ = ['caveman_graph', 'connected_caveman_graph',
44 'relaxed_caveman_graph', 'random_partition_graph',
45 'planted_partition_graph', 'gaussian_random_partition_graph',
46 'ring_of_cliques', 'windmill_graph', 'stochastic_block_model',
47 'LFR_benchmark_graph']
48
49
50 def caveman_graph(l, k):
51 """Returns a caveman graph of `l` cliques of size `k`.
52
53 Parameters
54 ----------
55 l : int
56 Number of cliques
57 k : int
58 Size of cliques
59
60 Returns
61 -------
62 G : NetworkX Graph
63 caveman graph
64
65 Notes
66 -----
67 This returns an undirected graph, it can be converted to a directed
68 graph using :func:`nx.to_directed`, or a multigraph using
69 ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is
70 described in [1]_ and it is unclear which of the directed
71 generalizations is most useful.
72
73 Examples
74 --------
75 >>> G = nx.caveman_graph(3, 3)
76
77 See also
78 --------
79
80 connected_caveman_graph
81
82 References
83 ----------
84 .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.'
85 Amer. J. Soc. 105, 493-527, 1999.
86 """
87 # l disjoint cliques of size k
88 G = nx.empty_graph(l * k)
89 if k > 1:
90 for start in range(0, l * k, k):
91 edges = itertools.combinations(range(start, start + k), 2)
92 G.add_edges_from(edges)
93 return G
94
95
96 def connected_caveman_graph(l, k):
97 """Returns a connected caveman graph of `l` cliques of size `k`.
98
99 The connected caveman graph is formed by creating `n` cliques of size
100 `k`, then a single edge in each clique is rewired to a node in an
101 adjacent clique.
102
103 Parameters
104 ----------
105 l : int
106 number of cliques
107 k : int
108 size of cliques
109
110 Returns
111 -------
112 G : NetworkX Graph
113 connected caveman graph
114
115 Notes
116 -----
117 This returns an undirected graph, it can be converted to a directed
118 graph using :func:`nx.to_directed`, or a multigraph using
119 ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is
120 described in [1]_ and it is unclear which of the directed
121 generalizations is most useful.
122
123 Examples
124 --------
125 >>> G = nx.connected_caveman_graph(3, 3)
126
127 References
128 ----------
129 .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.'
130 Amer. J. Soc. 105, 493-527, 1999.
131 """
132 G = nx.caveman_graph(l, k)
133 for start in range(0, l * k, k):
134 G.remove_edge(start, start + 1)
135 G.add_edge(start, (start - 1) % (l * k))
136 return G
137
138
139 @py_random_state(3)
140 def relaxed_caveman_graph(l, k, p, seed=None):
141 """Returns a relaxed caveman graph.
142
143 A relaxed caveman graph starts with `l` cliques of size `k`. Edges are
144 then randomly rewired with probability `p` to link different cliques.
145
146 Parameters
147 ----------
148 l : int
149 Number of groups
150 k : int
151 Size of cliques
152 p : float
153 Probabilty of rewiring each edge.
154 seed : integer, random_state, or None (default)
155 Indicator of random number generation state.
156 See :ref:`Randomness<randomness>`.
157
158 Returns
159 -------
160 G : NetworkX Graph
161 Relaxed Caveman Graph
162
163 Raises
164 ------
165 NetworkXError:
166 If p is not in [0,1]
167
168 Examples
169 --------
170 >>> G = nx.relaxed_caveman_graph(2, 3, 0.1, seed=42)
171
172 References
173 ----------
174 .. [1] Santo Fortunato, Community Detection in Graphs,
175 Physics Reports Volume 486, Issues 3-5, February 2010, Pages 75-174.
176 https://arxiv.org/abs/0906.0612
177 """
178 G = nx.caveman_graph(l, k)
179 nodes = list(G)
180 for (u, v) in G.edges():
181 if seed.random() < p: # rewire the edge
182 x = seed.choice(nodes)
183 if G.has_edge(u, x):
184 continue
185 G.remove_edge(u, v)
186 G.add_edge(u, x)
187 return G
188
189
190 @py_random_state(3)
191 def random_partition_graph(sizes, p_in, p_out, seed=None, directed=False):
192 """Returns the random partition graph with a partition of sizes.
193
194 A partition graph is a graph of communities with sizes defined by
195 s in sizes. Nodes in the same group are connected with probability
196 p_in and nodes of different groups are connected with probability
197 p_out.
198
199 Parameters
200 ----------
201 sizes : list of ints
202 Sizes of groups
203 p_in : float
204 probability of edges with in groups
205 p_out : float
206 probability of edges between groups
207 directed : boolean optional, default=False
208 Whether to create a directed graph
209 seed : integer, random_state, or None (default)
210 Indicator of random number generation state.
211 See :ref:`Randomness<randomness>`.
212
213 Returns
214 -------
215 G : NetworkX Graph or DiGraph
216 random partition graph of size sum(gs)
217
218 Raises
219 ------
220 NetworkXError
221 If p_in or p_out is not in [0,1]
222
223 Examples
224 --------
225 >>> G = nx.random_partition_graph([10,10,10],.25,.01)
226 >>> len(G)
227 30
228 >>> partition = G.graph['partition']
229 >>> len(partition)
230 3
231
232 Notes
233 -----
234 This is a generalization of the planted-l-partition described in
235 [1]_. It allows for the creation of groups of any size.
236
237 The partition is store as a graph attribute 'partition'.
238
239 References
240 ----------
241 .. [1] Santo Fortunato 'Community Detection in Graphs' Physical Reports
242 Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612
243 """
244 # Use geometric method for O(n+m) complexity algorithm
245 # partition = nx.community_sets(nx.get_node_attributes(G, 'affiliation'))
246 if not 0.0 <= p_in <= 1.0:
247 raise nx.NetworkXError("p_in must be in [0,1]")
248 if not 0.0 <= p_out <= 1.0:
249 raise nx.NetworkXError("p_out must be in [0,1]")
250
251 # create connection matrix
252 num_blocks = len(sizes)
253 p = [[p_out for s in range(num_blocks)] for r in range(num_blocks)]
254 for r in range(num_blocks):
255 p[r][r] = p_in
256
257 return stochastic_block_model(sizes, p, nodelist=None, seed=seed,
258 directed=directed, selfloops=False,
259 sparse=True)
260
261
262 @py_random_state(4)
263 def planted_partition_graph(l, k, p_in, p_out, seed=None, directed=False):
264 """Returns the planted l-partition graph.
265
266 This model partitions a graph with n=l*k vertices in
267 l groups with k vertices each. Vertices of the same
268 group are linked with a probability p_in, and vertices
269 of different groups are linked with probability p_out.
270
271 Parameters
272 ----------
273 l : int
274 Number of groups
275 k : int
276 Number of vertices in each group
277 p_in : float
278 probability of connecting vertices within a group
279 p_out : float
280 probability of connected vertices between groups
281 seed : integer, random_state, or None (default)
282 Indicator of random number generation state.
283 See :ref:`Randomness<randomness>`.
284 directed : bool,optional (default=False)
285 If True return a directed graph
286
287 Returns
288 -------
289 G : NetworkX Graph or DiGraph
290 planted l-partition graph
291
292 Raises
293 ------
294 NetworkXError:
295 If p_in,p_out are not in [0,1] or
296
297 Examples
298 --------
299 >>> G = nx.planted_partition_graph(4, 3, 0.5, 0.1, seed=42)
300
301 See Also
302 --------
303 random_partition_model
304
305 References
306 ----------
307 .. [1] A. Condon, R.M. Karp, Algorithms for graph partitioning
308 on the planted partition model,
309 Random Struct. Algor. 18 (2001) 116-140.
310
311 .. [2] Santo Fortunato 'Community Detection in Graphs' Physical Reports
312 Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612
313 """
314 return random_partition_graph([k] * l, p_in, p_out, seed, directed)
315
316
317 @py_random_state(6)
318 def gaussian_random_partition_graph(n, s, v, p_in, p_out, directed=False,
319 seed=None):
320 """Generate a Gaussian random partition graph.
321
322 A Gaussian random partition graph is created by creating k partitions
323 each with a size drawn from a normal distribution with mean s and variance
324 s/v. Nodes are connected within clusters with probability p_in and
325 between clusters with probability p_out[1]
326
327 Parameters
328 ----------
329 n : int
330 Number of nodes in the graph
331 s : float
332 Mean cluster size
333 v : float
334 Shape parameter. The variance of cluster size distribution is s/v.
335 p_in : float
336 Probabilty of intra cluster connection.
337 p_out : float
338 Probability of inter cluster connection.
339 directed : boolean, optional default=False
340 Whether to create a directed graph or not
341 seed : integer, random_state, or None (default)
342 Indicator of random number generation state.
343 See :ref:`Randomness<randomness>`.
344
345 Returns
346 -------
347 G : NetworkX Graph or DiGraph
348 gaussian random partition graph
349
350 Raises
351 ------
352 NetworkXError
353 If s is > n
354 If p_in or p_out is not in [0,1]
355
356 Notes
357 -----
358 Note the number of partitions is dependent on s,v and n, and that the
359 last partition may be considerably smaller, as it is sized to simply
360 fill out the nodes [1]
361
362 See Also
363 --------
364 random_partition_graph
365
366 Examples
367 --------
368 >>> G = nx.gaussian_random_partition_graph(100,10,10,.25,.1)
369 >>> len(G)
370 100
371
372 References
373 ----------
374 .. [1] Ulrik Brandes, Marco Gaertler, Dorothea Wagner,
375 Experiments on Graph Clustering Algorithms,
376 In the proceedings of the 11th Europ. Symp. Algorithms, 2003.
377 """
378 if s > n:
379 raise nx.NetworkXError("s must be <= n")
380 assigned = 0
381 sizes = []
382 while True:
383 size = int(seed.gauss(s, float(s) / v + 0.5))
384 if size < 1: # how to handle 0 or negative sizes?
385 continue
386 if assigned + size >= n:
387 sizes.append(n - assigned)
388 break
389 assigned += size
390 sizes.append(size)
391 return random_partition_graph(sizes, p_in, p_out, directed, seed)
392
393
394 def ring_of_cliques(num_cliques, clique_size):
395 """Defines a "ring of cliques" graph.
396
397 A ring of cliques graph is consisting of cliques, connected through single
398 links. Each clique is a complete graph.
399
400 Parameters
401 ----------
402 num_cliques : int
403 Number of cliques
404 clique_size : int
405 Size of cliques
406
407 Returns
408 -------
409 G : NetworkX Graph
410 ring of cliques graph
411
412 Raises
413 ------
414 NetworkXError
415 If the number of cliques is lower than 2 or
416 if the size of cliques is smaller than 2.
417
418 Examples
419 --------
420 >>> G = nx.ring_of_cliques(8, 4)
421
422 See Also
423 --------
424 connected_caveman_graph
425
426 Notes
427 -----
428 The `connected_caveman_graph` graph removes a link from each clique to
429 connect it with the next clique. Instead, the `ring_of_cliques` graph
430 simply adds the link without removing any link from the cliques.
431 """
432 if num_cliques < 2:
433 raise nx.NetworkXError('A ring of cliques must have at least '
434 'two cliques')
435 if clique_size < 2:
436 raise nx.NetworkXError('The cliques must have at least two nodes')
437
438 G = nx.Graph()
439 for i in range(num_cliques):
440 edges = itertools.combinations(range(i * clique_size, i * clique_size +
441 clique_size), 2)
442 G.add_edges_from(edges)
443 G.add_edge(i * clique_size + 1, (i + 1) * clique_size %
444 (num_cliques * clique_size))
445 return G
446
447
448 def windmill_graph(n, k):
449 """Generate a windmill graph.
450 A windmill graph is a graph of `n` cliques each of size `k` that are all
451 joined at one node.
452 It can be thought of as taking a disjoint union of `n` cliques of size `k`,
453 selecting one point from each, and contracting all of the selected points.
454 Alternatively, one could generate `n` cliques of size `k-1` and one node
455 that is connected to all other nodes in the graph.
456
457 Parameters
458 ----------
459 n : int
460 Number of cliques
461 k : int
462 Size of cliques
463
464 Returns
465 -------
466 G : NetworkX Graph
467 windmill graph with n cliques of size k
468
469 Raises
470 ------
471 NetworkXError
472 If the number of cliques is less than two
473 If the size of the cliques are less than two
474
475 Examples
476 --------
477 >>> G = nx.windmill_graph(4, 5)
478
479 Notes
480 -----
481 The node labeled `0` will be the node connected to all other nodes.
482 Note that windmill graphs are usually denoted `Wd(k,n)`, so the parameters
483 are in the opposite order as the parameters of this method.
484 """
485 if n < 2:
486 msg = 'A windmill graph must have at least two cliques'
487 raise nx.NetworkXError(msg)
488 if k < 2:
489 raise nx.NetworkXError('The cliques must have at least two nodes')
490
491 G = nx.disjoint_union_all(itertools.chain([nx.complete_graph(k)],
492 (nx.complete_graph(k - 1)
493 for _ in range(n - 1))))
494 G.add_edges_from((0, i) for i in range(k, G.number_of_nodes()))
495 return G
496
497
498 @py_random_state(3)
499 def stochastic_block_model(sizes, p, nodelist=None, seed=None,
500 directed=False, selfloops=False, sparse=True):
501 """Returns a stochastic block model graph.
502
503 This model partitions the nodes in blocks of arbitrary sizes, and places
504 edges between pairs of nodes independently, with a probability that depends
505 on the blocks.
506
507 Parameters
508 ----------
509 sizes : list of ints
510 Sizes of blocks
511 p : list of list of floats
512 Element (r,s) gives the density of edges going from the nodes
513 of group r to nodes of group s.
514 p must match the number of groups (len(sizes) == len(p)),
515 and it must be symmetric if the graph is undirected.
516 nodelist : list, optional
517 The block tags are assigned according to the node identifiers
518 in nodelist. If nodelist is None, then the ordering is the
519 range [0,sum(sizes)-1].
520 seed : integer, random_state, or None (default)
521 Indicator of random number generation state.
522 See :ref:`Randomness<randomness>`.
523 directed : boolean optional, default=False
524 Whether to create a directed graph or not.
525 selfloops : boolean optional, default=False
526 Whether to include self-loops or not.
527 sparse: boolean optional, default=True
528 Use the sparse heuristic to speed up the generator.
529
530 Returns
531 -------
532 g : NetworkX Graph or DiGraph
533 Stochastic block model graph of size sum(sizes)
534
535 Raises
536 ------
537 NetworkXError
538 If probabilities are not in [0,1].
539 If the probability matrix is not square (directed case).
540 If the probability matrix is not symmetric (undirected case).
541 If the sizes list does not match nodelist or the probability matrix.
542 If nodelist contains duplicate.
543
544 Examples
545 --------
546 >>> sizes = [75, 75, 300]
547 >>> probs = [[0.25, 0.05, 0.02],
548 ... [0.05, 0.35, 0.07],
549 ... [0.02, 0.07, 0.40]]
550 >>> g = nx.stochastic_block_model(sizes, probs, seed=0)
551 >>> len(g)
552 450
553 >>> H = nx.quotient_graph(g, g.graph['partition'], relabel=True)
554 >>> for v in H.nodes(data=True):
555 ... print(round(v[1]['density'], 3))
556 ...
557 0.245
558 0.348
559 0.405
560 >>> for v in H.edges(data=True):
561 ... print(round(1.0 * v[2]['weight'] / (sizes[v[0]] * sizes[v[1]]), 3))
562 ...
563 0.051
564 0.022
565 0.07
566
567 See Also
568 --------
569 random_partition_graph
570 planted_partition_graph
571 gaussian_random_partition_graph
572 gnp_random_graph
573
574 References
575 ----------
576 .. [1] Holland, P. W., Laskey, K. B., & Leinhardt, S.,
577 "Stochastic blockmodels: First steps",
578 Social networks, 5(2), 109-137, 1983.
579 """
580 # Check if dimensions match
581 if len(sizes) != len(p):
582 raise nx.NetworkXException("'sizes' and 'p' do not match.")
583 # Check for probability symmetry (undirected) and shape (directed)
584 for row in p:
585 if len(p) != len(row):
586 raise nx.NetworkXException("'p' must be a square matrix.")
587 if not directed:
588 p_transpose = [list(i) for i in zip(*p)]
589 for i in zip(p, p_transpose):
590 for j in zip(i[0], i[1]):
591 if abs(j[0] - j[1]) > 1e-08:
592 raise nx.NetworkXException("'p' must be symmetric.")
593 # Check for probability range
594 for row in p:
595 for prob in row:
596 if prob < 0 or prob > 1:
597 raise nx.NetworkXException("Entries of 'p' not in [0,1].")
598 # Check for nodelist consistency
599 if nodelist is not None:
600 if len(nodelist) != sum(sizes):
601 raise nx.NetworkXException("'nodelist' and 'sizes' do not match.")
602 if len(nodelist) != len(set(nodelist)):
603 raise nx.NetworkXException("nodelist contains duplicate.")
604 else:
605 nodelist = range(0, sum(sizes))
606
607 # Setup the graph conditionally to the directed switch.
608 block_range = range(len(sizes))
609 if directed:
610 g = nx.DiGraph()
611 block_iter = itertools.product(block_range, block_range)
612 else:
613 g = nx.Graph()
614 block_iter = itertools.combinations_with_replacement(block_range, 2)
615 # Split nodelist in a partition (list of sets).
616 size_cumsum = [sum(sizes[0:x]) for x in range(0, len(sizes) + 1)]
617 g.graph['partition'] = [set(nodelist[size_cumsum[x]:size_cumsum[x + 1]])
618 for x in range(0, len(size_cumsum) - 1)]
619 # Setup nodes and graph name
620 for block_id, nodes in enumerate(g.graph['partition']):
621 for node in nodes:
622 g.add_node(node, block=block_id)
623
624 g.name = "stochastic_block_model"
625
626 # Test for edge existence
627 parts = g.graph['partition']
628 for i, j in block_iter:
629 if i == j:
630 if directed:
631 if selfloops:
632 edges = itertools.product(parts[i], parts[i])
633 else:
634 edges = itertools.permutations(parts[i], 2)
635 else:
636 edges = itertools.combinations(parts[i], 2)
637 if selfloops:
638 edges = itertools.chain(edges, zip(parts[i], parts[i]))
639 for e in edges:
640 if seed.random() < p[i][j]:
641 g.add_edge(*e)
642 else:
643 edges = itertools.product(parts[i], parts[j])
644 if sparse:
645 if p[i][j] == 1: # Test edges cases p_ij = 0 or 1
646 for e in edges:
647 g.add_edge(*e)
648 elif p[i][j] > 0:
649 while True:
650 try:
651 logrand = math.log(seed.random())
652 skip = math.floor(logrand / math.log(1 - p[i][j]))
653 # consume "skip" edges
654 next(itertools.islice(edges, skip, skip), None)
655 e = next(edges)
656 g.add_edge(*e) # __safe
657 except StopIteration:
658 break
659 else:
660 for e in edges:
661 if seed.random() < p[i][j]:
662 g.add_edge(*e) # __safe
663 return g
664
665
666 def _zipf_rv_below(gamma, xmin, threshold, seed):
667 """Returns a random value chosen from the bounded Zipf distribution.
668
669 Repeatedly draws values from the Zipf distribution until the
670 threshold is met, then returns that value.
671 """
672 result = nx.utils.zipf_rv(gamma, xmin, seed)
673 while result > threshold:
674 result = nx.utils.zipf_rv(gamma, xmin, seed)
675 return result
676
677
678 def _powerlaw_sequence(gamma, low, high, condition, length, max_iters, seed):
679 """Returns a list of numbers obeying a constrained power law distribution.
680
681 ``gamma`` and ``low`` are the parameters for the Zipf distribution.
682
683 ``high`` is the maximum allowed value for values draw from the Zipf
684 distribution. For more information, see :func:`_zipf_rv_below`.
685
686 ``condition`` and ``length`` are Boolean-valued functions on
687 lists. While generating the list, random values are drawn and
688 appended to the list until ``length`` is satisfied by the created
689 list. Once ``condition`` is satisfied, the sequence generated in
690 this way is returned.
691
692 ``max_iters`` indicates the number of times to generate a list
693 satisfying ``length``. If the number of iterations exceeds this
694 value, :exc:`~networkx.exception.ExceededMaxIterations` is raised.
695
696 seed : integer, random_state, or None (default)
697 Indicator of random number generation state.
698 See :ref:`Randomness<randomness>`.
699 """
700 for i in range(max_iters):
701 seq = []
702 while not length(seq):
703 seq.append(_zipf_rv_below(gamma, low, high, seed))
704 if condition(seq):
705 return seq
706 raise nx.ExceededMaxIterations("Could not create power law sequence")
707
708
709 # TODO Needs documentation.
710 def _generate_min_degree(gamma, average_degree, max_degree, tolerance,
711 max_iters):
712 """Returns a minimum degree from the given average degree."""
713 min_deg_top = max_degree
714 min_deg_bot = 1
715 min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
716 itrs = 0
717 mid_avg_deg = 0
718 while abs(mid_avg_deg - average_degree) > tolerance:
719 if itrs > max_iters:
720 raise nx.ExceededMaxIterations("Could not match average_degree")
721 mid_avg_deg = 0
722 for x in range(int(min_deg_mid), max_degree + 1):
723 mid_avg_deg += (x ** (-gamma + 1)) / zeta(gamma, min_deg_mid,
724 tolerance)
725 if mid_avg_deg > average_degree:
726 min_deg_top = min_deg_mid
727 min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
728 else:
729 min_deg_bot = min_deg_mid
730 min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
731 itrs += 1
732 # return int(min_deg_mid + 0.5)
733 return round(min_deg_mid)
734
735
736 def _generate_communities(degree_seq, community_sizes, mu, max_iters, seed):
737 """Returns a list of sets, each of which represents a community.
738
739 ``degree_seq`` is the degree sequence that must be met by the
740 graph.
741
742 ``community_sizes`` is the community size distribution that must be
743 met by the generated list of sets.
744
745 ``mu`` is a float in the interval [0, 1] indicating the fraction of
746 intra-community edges incident to each node.
747
748 ``max_iters`` is the number of times to try to add a node to a
749 community. This must be greater than the length of
750 ``degree_seq``, otherwise this function will always fail. If
751 the number of iterations exceeds this value,
752 :exc:`~networkx.exception.ExceededMaxIterations` is raised.
753
754 seed : integer, random_state, or None (default)
755 Indicator of random number generation state.
756 See :ref:`Randomness<randomness>`.
757
758 The communities returned by this are sets of integers in the set {0,
759 ..., *n* - 1}, where *n* is the length of ``degree_seq``.
760
761 """
762 # This assumes the nodes in the graph will be natural numbers.
763 result = [set() for _ in community_sizes]
764 n = len(degree_seq)
765 free = list(range(n))
766 for i in range(max_iters):
767 v = free.pop()
768 c = seed.choice(range(len(community_sizes)))
769 # s = int(degree_seq[v] * (1 - mu) + 0.5)
770 s = round(degree_seq[v] * (1 - mu))
771 # If the community is large enough, add the node to the chosen
772 # community. Otherwise, return it to the list of unaffiliated
773 # nodes.
774 if s < community_sizes[c]:
775 result[c].add(v)
776 else:
777 free.append(v)
778 # If the community is too big, remove a node from it.
779 if len(result[c]) > community_sizes[c]:
780 free.append(result[c].pop())
781 if not free:
782 return result
783 msg = 'Could not assign communities; try increasing min_community'
784 raise nx.ExceededMaxIterations(msg)
785
786
787 @py_random_state(11)
788 def LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=None,
789 min_degree=None, max_degree=None, min_community=None,
790 max_community=None, tol=1.0e-7, max_iters=500,
791 seed=None):
792 r"""Returns the LFR benchmark graph.
793
794 This algorithm proceeds as follows:
795
796 1) Find a degree sequence with a power law distribution, and minimum
797 value ``min_degree``, which has approximate average degree
798 ``average_degree``. This is accomplished by either
799
800 a) specifying ``min_degree`` and not ``average_degree``,
801 b) specifying ``average_degree`` and not ``min_degree``, in which
802 case a suitable minimum degree will be found.
803
804 ``max_degree`` can also be specified, otherwise it will be set to
805 ``n``. Each node *u* will have `\mu \mathrm{deg}(u)` edges
806 joining it to nodes in communities other than its own and `(1 -
807 \mu) \mathrm{deg}(u)` edges joining it to nodes in its own
808 community.
809 2) Generate community sizes according to a power law distribution
810 with exponent ``tau2``. If ``min_community`` and
811 ``max_community`` are not specified they will be selected to be
812 ``min_degree`` and ``max_degree``, respectively. Community sizes
813 are generated until the sum of their sizes equals ``n``.
814 3) Each node will be randomly assigned a community with the
815 condition that the community is large enough for the node's
816 intra-community degree, `(1 - \mu) \mathrm{deg}(u)` as
817 described in step 2. If a community grows too large, a random node
818 will be selected for reassignment to a new community, until all
819 nodes have been assigned a community.
820 4) Each node *u* then adds `(1 - \mu) \mathrm{deg}(u)`
821 intra-community edges and `\mu \mathrm{deg}(u)` inter-community
822 edges.
823
824 Parameters
825 ----------
826 n : int
827 Number of nodes in the created graph.
828
829 tau1 : float
830 Power law exponent for the degree distribution of the created
831 graph. This value must be strictly greater than one.
832
833 tau2 : float
834 Power law exponent for the community size distribution in the
835 created graph. This value must be strictly greater than one.
836
837 mu : float
838 Fraction of intra-community edges incident to each node. This
839 value must be in the interval [0, 1].
840
841 average_degree : float
842 Desired average degree of nodes in the created graph. This value
843 must be in the interval [0, *n*]. Exactly one of this and
844 ``min_degree`` must be specified, otherwise a
845 :exc:`NetworkXError` is raised.
846
847 min_degree : int
848 Minimum degree of nodes in the created graph. This value must be
849 in the interval [0, *n*]. Exactly one of this and
850 ``average_degree`` must be specified, otherwise a
851 :exc:`NetworkXError` is raised.
852
853 max_degree : int
854 Maximum degree of nodes in the created graph. If not specified,
855 this is set to ``n``, the total number of nodes in the graph.
856
857 min_community : int
858 Minimum size of communities in the graph. If not specified, this
859 is set to ``min_degree``.
860
861 max_community : int
862 Maximum size of communities in the graph. If not specified, this
863 is set to ``n``, the total number of nodes in the graph.
864
865 tol : float
866 Tolerance when comparing floats, specifically when comparing
867 average degree values.
868
869 max_iters : int
870 Maximum number of iterations to try to create the community sizes,
871 degree distribution, and community affiliations.
872
873 seed : integer, random_state, or None (default)
874 Indicator of random number generation state.
875 See :ref:`Randomness<randomness>`.
876
877 Returns
878 -------
879 G : NetworkX graph
880 The LFR benchmark graph generated according to the specified
881 parameters.
882
883 Each node in the graph has a node attribute ``'community'`` that
884 stores the community (that is, the set of nodes) that includes
885 it.
886
887 Raises
888 ------
889 NetworkXError
890 If any of the parameters do not meet their upper and lower bounds:
891
892 - ``tau1`` and ``tau2`` must be strictly greater than 1.
893 - ``mu`` must be in [0, 1].
894 - ``max_degree`` must be in {1, ..., *n*}.
895 - ``min_community`` and ``max_community`` must be in {0, ...,
896 *n*}.
897
898 If not exactly one of ``average_degree`` and ``min_degree`` is
899 specified.
900
901 If ``min_degree`` is not specified and a suitable ``min_degree``
902 cannot be found.
903
904 ExceededMaxIterations
905 If a valid degree sequence cannot be created within
906 ``max_iters`` number of iterations.
907
908 If a valid set of community sizes cannot be created within
909 ``max_iters`` number of iterations.
910
911 If a valid community assignment cannot be created within ``10 *
912 n * max_iters`` number of iterations.
913
914 Examples
915 --------
916 Basic usage::
917
918 >>> from networkx.generators.community import LFR_benchmark_graph
919 >>> n = 250
920 >>> tau1 = 3
921 >>> tau2 = 1.5
922 >>> mu = 0.1
923 >>> G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5,
924 ... min_community=20, seed=10)
925
926 Continuing the example above, you can get the communities from the
927 node attributes of the graph::
928
929 >>> communities = {frozenset(G.nodes[v]['community']) for v in G}
930
931 Notes
932 -----
933 This algorithm differs slightly from the original way it was
934 presented in [1].
935
936 1) Rather than connecting the graph via a configuration model then
937 rewiring to match the intra-community and inter-community
938 degrees, we do this wiring explicitly at the end, which should be
939 equivalent.
940 2) The code posted on the author's website [2] calculates the random
941 power law distributed variables and their average using
942 continuous approximations, whereas we use the discrete
943 distributions here as both degree and community size are
944 discrete.
945
946 Though the authors describe the algorithm as quite robust, testing
947 during development indicates that a somewhat narrower parameter set
948 is likely to successfully produce a graph. Some suggestions have
949 been provided in the event of exceptions.
950
951 References
952 ----------
953 .. [1] "Benchmark graphs for testing community detection algorithms",
954 Andrea Lancichinetti, Santo Fortunato, and Filippo Radicchi,
955 Phys. Rev. E 78, 046110 2008
956 .. [2] http://santo.fortunato.googlepages.com/inthepress2
957
958 """
959 # Perform some basic parameter validation.
960 if not tau1 > 1:
961 raise nx.NetworkXError("tau1 must be greater than one")
962 if not tau2 > 1:
963 raise nx.NetworkXError("tau2 must be greater than one")
964 if not 0 <= mu <= 1:
965 raise nx.NetworkXError("mu must be in the interval [0, 1]")
966
967 # Validate parameters for generating the degree sequence.
968 if max_degree is None:
969 max_degree = n
970 elif not 0 < max_degree <= n:
971 raise nx.NetworkXError("max_degree must be in the interval (0, n]")
972 if not ((min_degree is None) ^ (average_degree is None)):
973 raise nx.NetworkXError("Must assign exactly one of min_degree and"
974 " average_degree")
975 if min_degree is None:
976 min_degree = _generate_min_degree(tau1, average_degree, max_degree,
977 tol, max_iters)
978
979 # Generate a degree sequence with a power law distribution.
980 low, high = min_degree, max_degree
981
982 def condition(seq): return sum(seq) % 2 == 0
983
984 def length(seq): return len(seq) >= n
985 deg_seq = _powerlaw_sequence(tau1, low, high, condition,
986 length, max_iters, seed)
987
988 # Validate parameters for generating the community size sequence.
989 if min_community is None:
990 min_community = min(deg_seq)
991 if max_community is None:
992 max_community = max(deg_seq)
993
994 # Generate a community size sequence with a power law distribution.
995 #
996 # TODO The original code incremented the number of iterations each
997 # time a new Zipf random value was drawn from the distribution. This
998 # differed from the way the number of iterations was incremented in
999 # `_powerlaw_degree_sequence`, so this code was changed to match
1000 # that one. As a result, this code is allowed many more chances to
1001 # generate a valid community size sequence.
1002 low, high = min_community, max_community
1003
1004 def condition(seq): return sum(seq) == n
1005
1006 def length(seq): return sum(seq) >= n
1007 comms = _powerlaw_sequence(tau2, low, high, condition,
1008 length, max_iters, seed)
1009
1010 # Generate the communities based on the given degree sequence and
1011 # community sizes.
1012 max_iters *= 10 * n
1013 communities = _generate_communities(deg_seq, comms, mu, max_iters, seed)
1014
1015 # Finally, generate the benchmark graph based on the given
1016 # communities, joining nodes according to the intra- and
1017 # inter-community degrees.
1018 G = nx.Graph()
1019 G.add_nodes_from(range(n))
1020 for c in communities:
1021 for u in c:
1022 while G.degree(u) < round(deg_seq[u] * (1 - mu)):
1023 v = seed.choice(list(c))
1024 G.add_edge(u, v)
1025 while G.degree(u) < deg_seq[u]:
1026 v = seed.choice(range(n))
1027 if v not in c:
1028 G.add_edge(u, v)
1029 G.nodes[u]['community'] = c
1030 return G