Mercurial > repos > guerler > springsuite
diff planemo/lib/python3.7/site-packages/networkx/generators/community.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/planemo/lib/python3.7/site-packages/networkx/generators/community.py Fri Jul 31 00:32:28 2020 -0400 @@ -0,0 +1,1030 @@ +# Copyright(C) 2011-2019 by +# Ben Edwards <bedwards@cs.unm.edu> +# Aric Hagberg <hagberg@lanl.gov> +# Konstantinos Karakatsanis <dinoskarakas@gmail.com> +# All rights reserved. +# BSD license. +# +# Authors: Ben Edwards (bedwards@cs.unm.edu) +# Aric Hagberg (hagberg@lanl.gov) +# Konstantinos Karakatsanis (dinoskarakas@gmail.com) +# Jean-Gabriel Young (jean.gabriel.young@gmail.com) +"""Generators for classes of graphs used in studying social networks.""" +import itertools +import math +import networkx as nx +from networkx.utils import py_random_state + +# Accommodates for both SciPy and non-SciPy implementations.. +try: + from scipy.special import zeta as _zeta + + def zeta(x, q, tolerance): + return _zeta(x, q) +except ImportError: + def zeta(x, q, tolerance): + """The Hurwitz zeta function, or the Riemann zeta function of two + arguments. + + ``x`` must be greater than one and ``q`` must be positive. + + This function repeatedly computes subsequent partial sums until + convergence, as decided by ``tolerance``. + """ + z = 0 + z_prev = -float('inf') + k = 0 + while abs(z - z_prev) > tolerance: + z_prev = z + z += 1 / ((k + q) ** x) + k += 1 + return z + +__all__ = ['caveman_graph', 'connected_caveman_graph', + 'relaxed_caveman_graph', 'random_partition_graph', + 'planted_partition_graph', 'gaussian_random_partition_graph', + 'ring_of_cliques', 'windmill_graph', 'stochastic_block_model', + 'LFR_benchmark_graph'] + + +def caveman_graph(l, k): + """Returns a caveman graph of `l` cliques of size `k`. + + Parameters + ---------- + l : int + Number of cliques + k : int + Size of cliques + + Returns + ------- + G : NetworkX Graph + caveman graph + + Notes + ----- + This returns an undirected graph, it can be converted to a directed + graph using :func:`nx.to_directed`, or a multigraph using + ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is + described in [1]_ and it is unclear which of the directed + generalizations is most useful. + + Examples + -------- + >>> G = nx.caveman_graph(3, 3) + + See also + -------- + + connected_caveman_graph + + References + ---------- + .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.' + Amer. J. Soc. 105, 493-527, 1999. + """ + # l disjoint cliques of size k + G = nx.empty_graph(l * k) + if k > 1: + for start in range(0, l * k, k): + edges = itertools.combinations(range(start, start + k), 2) + G.add_edges_from(edges) + return G + + +def connected_caveman_graph(l, k): + """Returns a connected caveman graph of `l` cliques of size `k`. + + The connected caveman graph is formed by creating `n` cliques of size + `k`, then a single edge in each clique is rewired to a node in an + adjacent clique. + + Parameters + ---------- + l : int + number of cliques + k : int + size of cliques + + Returns + ------- + G : NetworkX Graph + connected caveman graph + + Notes + ----- + This returns an undirected graph, it can be converted to a directed + graph using :func:`nx.to_directed`, or a multigraph using + ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is + described in [1]_ and it is unclear which of the directed + generalizations is most useful. + + Examples + -------- + >>> G = nx.connected_caveman_graph(3, 3) + + References + ---------- + .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.' + Amer. J. Soc. 105, 493-527, 1999. + """ + G = nx.caveman_graph(l, k) + for start in range(0, l * k, k): + G.remove_edge(start, start + 1) + G.add_edge(start, (start - 1) % (l * k)) + return G + + +@py_random_state(3) +def relaxed_caveman_graph(l, k, p, seed=None): + """Returns a relaxed caveman graph. + + A relaxed caveman graph starts with `l` cliques of size `k`. Edges are + then randomly rewired with probability `p` to link different cliques. + + Parameters + ---------- + l : int + Number of groups + k : int + Size of cliques + p : float + Probabilty of rewiring each edge. + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness<randomness>`. + + Returns + ------- + G : NetworkX Graph + Relaxed Caveman Graph + + Raises + ------ + NetworkXError: + If p is not in [0,1] + + Examples + -------- + >>> G = nx.relaxed_caveman_graph(2, 3, 0.1, seed=42) + + References + ---------- + .. [1] Santo Fortunato, Community Detection in Graphs, + Physics Reports Volume 486, Issues 3-5, February 2010, Pages 75-174. + https://arxiv.org/abs/0906.0612 + """ + G = nx.caveman_graph(l, k) + nodes = list(G) + for (u, v) in G.edges(): + if seed.random() < p: # rewire the edge + x = seed.choice(nodes) + if G.has_edge(u, x): + continue + G.remove_edge(u, v) + G.add_edge(u, x) + return G + + +@py_random_state(3) +def random_partition_graph(sizes, p_in, p_out, seed=None, directed=False): + """Returns the random partition graph with a partition of sizes. + + A partition graph is a graph of communities with sizes defined by + s in sizes. Nodes in the same group are connected with probability + p_in and nodes of different groups are connected with probability + p_out. + + Parameters + ---------- + sizes : list of ints + Sizes of groups + p_in : float + probability of edges with in groups + p_out : float + probability of edges between groups + directed : boolean optional, default=False + Whether to create a directed graph + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness<randomness>`. + + Returns + ------- + G : NetworkX Graph or DiGraph + random partition graph of size sum(gs) + + Raises + ------ + NetworkXError + If p_in or p_out is not in [0,1] + + Examples + -------- + >>> G = nx.random_partition_graph([10,10,10],.25,.01) + >>> len(G) + 30 + >>> partition = G.graph['partition'] + >>> len(partition) + 3 + + Notes + ----- + This is a generalization of the planted-l-partition described in + [1]_. It allows for the creation of groups of any size. + + The partition is store as a graph attribute 'partition'. + + References + ---------- + .. [1] Santo Fortunato 'Community Detection in Graphs' Physical Reports + Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612 + """ + # Use geometric method for O(n+m) complexity algorithm + # partition = nx.community_sets(nx.get_node_attributes(G, 'affiliation')) + if not 0.0 <= p_in <= 1.0: + raise nx.NetworkXError("p_in must be in [0,1]") + if not 0.0 <= p_out <= 1.0: + raise nx.NetworkXError("p_out must be in [0,1]") + + # create connection matrix + num_blocks = len(sizes) + p = [[p_out for s in range(num_blocks)] for r in range(num_blocks)] + for r in range(num_blocks): + p[r][r] = p_in + + return stochastic_block_model(sizes, p, nodelist=None, seed=seed, + directed=directed, selfloops=False, + sparse=True) + + +@py_random_state(4) +def planted_partition_graph(l, k, p_in, p_out, seed=None, directed=False): + """Returns the planted l-partition graph. + + This model partitions a graph with n=l*k vertices in + l groups with k vertices each. Vertices of the same + group are linked with a probability p_in, and vertices + of different groups are linked with probability p_out. + + Parameters + ---------- + l : int + Number of groups + k : int + Number of vertices in each group + p_in : float + probability of connecting vertices within a group + p_out : float + probability of connected vertices between groups + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness<randomness>`. + directed : bool,optional (default=False) + If True return a directed graph + + Returns + ------- + G : NetworkX Graph or DiGraph + planted l-partition graph + + Raises + ------ + NetworkXError: + If p_in,p_out are not in [0,1] or + + Examples + -------- + >>> G = nx.planted_partition_graph(4, 3, 0.5, 0.1, seed=42) + + See Also + -------- + random_partition_model + + References + ---------- + .. [1] A. Condon, R.M. Karp, Algorithms for graph partitioning + on the planted partition model, + Random Struct. Algor. 18 (2001) 116-140. + + .. [2] Santo Fortunato 'Community Detection in Graphs' Physical Reports + Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612 + """ + return random_partition_graph([k] * l, p_in, p_out, seed, directed) + + +@py_random_state(6) +def gaussian_random_partition_graph(n, s, v, p_in, p_out, directed=False, + seed=None): + """Generate a Gaussian random partition graph. + + A Gaussian random partition graph is created by creating k partitions + each with a size drawn from a normal distribution with mean s and variance + s/v. Nodes are connected within clusters with probability p_in and + between clusters with probability p_out[1] + + Parameters + ---------- + n : int + Number of nodes in the graph + s : float + Mean cluster size + v : float + Shape parameter. The variance of cluster size distribution is s/v. + p_in : float + Probabilty of intra cluster connection. + p_out : float + Probability of inter cluster connection. + directed : boolean, optional default=False + Whether to create a directed graph or not + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness<randomness>`. + + Returns + ------- + G : NetworkX Graph or DiGraph + gaussian random partition graph + + Raises + ------ + NetworkXError + If s is > n + If p_in or p_out is not in [0,1] + + Notes + ----- + Note the number of partitions is dependent on s,v and n, and that the + last partition may be considerably smaller, as it is sized to simply + fill out the nodes [1] + + See Also + -------- + random_partition_graph + + Examples + -------- + >>> G = nx.gaussian_random_partition_graph(100,10,10,.25,.1) + >>> len(G) + 100 + + References + ---------- + .. [1] Ulrik Brandes, Marco Gaertler, Dorothea Wagner, + Experiments on Graph Clustering Algorithms, + In the proceedings of the 11th Europ. Symp. Algorithms, 2003. + """ + if s > n: + raise nx.NetworkXError("s must be <= n") + assigned = 0 + sizes = [] + while True: + size = int(seed.gauss(s, float(s) / v + 0.5)) + if size < 1: # how to handle 0 or negative sizes? + continue + if assigned + size >= n: + sizes.append(n - assigned) + break + assigned += size + sizes.append(size) + return random_partition_graph(sizes, p_in, p_out, directed, seed) + + +def ring_of_cliques(num_cliques, clique_size): + """Defines a "ring of cliques" graph. + + A ring of cliques graph is consisting of cliques, connected through single + links. Each clique is a complete graph. + + Parameters + ---------- + num_cliques : int + Number of cliques + clique_size : int + Size of cliques + + Returns + ------- + G : NetworkX Graph + ring of cliques graph + + Raises + ------ + NetworkXError + If the number of cliques is lower than 2 or + if the size of cliques is smaller than 2. + + Examples + -------- + >>> G = nx.ring_of_cliques(8, 4) + + See Also + -------- + connected_caveman_graph + + Notes + ----- + The `connected_caveman_graph` graph removes a link from each clique to + connect it with the next clique. Instead, the `ring_of_cliques` graph + simply adds the link without removing any link from the cliques. + """ + if num_cliques < 2: + raise nx.NetworkXError('A ring of cliques must have at least ' + 'two cliques') + if clique_size < 2: + raise nx.NetworkXError('The cliques must have at least two nodes') + + G = nx.Graph() + for i in range(num_cliques): + edges = itertools.combinations(range(i * clique_size, i * clique_size + + clique_size), 2) + G.add_edges_from(edges) + G.add_edge(i * clique_size + 1, (i + 1) * clique_size % + (num_cliques * clique_size)) + return G + + +def windmill_graph(n, k): + """Generate a windmill graph. + A windmill graph is a graph of `n` cliques each of size `k` that are all + joined at one node. + It can be thought of as taking a disjoint union of `n` cliques of size `k`, + selecting one point from each, and contracting all of the selected points. + Alternatively, one could generate `n` cliques of size `k-1` and one node + that is connected to all other nodes in the graph. + + Parameters + ---------- + n : int + Number of cliques + k : int + Size of cliques + + Returns + ------- + G : NetworkX Graph + windmill graph with n cliques of size k + + Raises + ------ + NetworkXError + If the number of cliques is less than two + If the size of the cliques are less than two + + Examples + -------- + >>> G = nx.windmill_graph(4, 5) + + Notes + ----- + The node labeled `0` will be the node connected to all other nodes. + Note that windmill graphs are usually denoted `Wd(k,n)`, so the parameters + are in the opposite order as the parameters of this method. + """ + if n < 2: + msg = 'A windmill graph must have at least two cliques' + raise nx.NetworkXError(msg) + if k < 2: + raise nx.NetworkXError('The cliques must have at least two nodes') + + G = nx.disjoint_union_all(itertools.chain([nx.complete_graph(k)], + (nx.complete_graph(k - 1) + for _ in range(n - 1)))) + G.add_edges_from((0, i) for i in range(k, G.number_of_nodes())) + return G + + +@py_random_state(3) +def stochastic_block_model(sizes, p, nodelist=None, seed=None, + directed=False, selfloops=False, sparse=True): + """Returns a stochastic block model graph. + + This model partitions the nodes in blocks of arbitrary sizes, and places + edges between pairs of nodes independently, with a probability that depends + on the blocks. + + Parameters + ---------- + sizes : list of ints + Sizes of blocks + p : list of list of floats + Element (r,s) gives the density of edges going from the nodes + of group r to nodes of group s. + p must match the number of groups (len(sizes) == len(p)), + and it must be symmetric if the graph is undirected. + nodelist : list, optional + The block tags are assigned according to the node identifiers + in nodelist. If nodelist is None, then the ordering is the + range [0,sum(sizes)-1]. + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness<randomness>`. + directed : boolean optional, default=False + Whether to create a directed graph or not. + selfloops : boolean optional, default=False + Whether to include self-loops or not. + sparse: boolean optional, default=True + Use the sparse heuristic to speed up the generator. + + Returns + ------- + g : NetworkX Graph or DiGraph + Stochastic block model graph of size sum(sizes) + + Raises + ------ + NetworkXError + If probabilities are not in [0,1]. + If the probability matrix is not square (directed case). + If the probability matrix is not symmetric (undirected case). + If the sizes list does not match nodelist or the probability matrix. + If nodelist contains duplicate. + + Examples + -------- + >>> sizes = [75, 75, 300] + >>> probs = [[0.25, 0.05, 0.02], + ... [0.05, 0.35, 0.07], + ... [0.02, 0.07, 0.40]] + >>> g = nx.stochastic_block_model(sizes, probs, seed=0) + >>> len(g) + 450 + >>> H = nx.quotient_graph(g, g.graph['partition'], relabel=True) + >>> for v in H.nodes(data=True): + ... print(round(v[1]['density'], 3)) + ... + 0.245 + 0.348 + 0.405 + >>> for v in H.edges(data=True): + ... print(round(1.0 * v[2]['weight'] / (sizes[v[0]] * sizes[v[1]]), 3)) + ... + 0.051 + 0.022 + 0.07 + + See Also + -------- + random_partition_graph + planted_partition_graph + gaussian_random_partition_graph + gnp_random_graph + + References + ---------- + .. [1] Holland, P. W., Laskey, K. B., & Leinhardt, S., + "Stochastic blockmodels: First steps", + Social networks, 5(2), 109-137, 1983. + """ + # Check if dimensions match + if len(sizes) != len(p): + raise nx.NetworkXException("'sizes' and 'p' do not match.") + # Check for probability symmetry (undirected) and shape (directed) + for row in p: + if len(p) != len(row): + raise nx.NetworkXException("'p' must be a square matrix.") + if not directed: + p_transpose = [list(i) for i in zip(*p)] + for i in zip(p, p_transpose): + for j in zip(i[0], i[1]): + if abs(j[0] - j[1]) > 1e-08: + raise nx.NetworkXException("'p' must be symmetric.") + # Check for probability range + for row in p: + for prob in row: + if prob < 0 or prob > 1: + raise nx.NetworkXException("Entries of 'p' not in [0,1].") + # Check for nodelist consistency + if nodelist is not None: + if len(nodelist) != sum(sizes): + raise nx.NetworkXException("'nodelist' and 'sizes' do not match.") + if len(nodelist) != len(set(nodelist)): + raise nx.NetworkXException("nodelist contains duplicate.") + else: + nodelist = range(0, sum(sizes)) + + # Setup the graph conditionally to the directed switch. + block_range = range(len(sizes)) + if directed: + g = nx.DiGraph() + block_iter = itertools.product(block_range, block_range) + else: + g = nx.Graph() + block_iter = itertools.combinations_with_replacement(block_range, 2) + # Split nodelist in a partition (list of sets). + size_cumsum = [sum(sizes[0:x]) for x in range(0, len(sizes) + 1)] + g.graph['partition'] = [set(nodelist[size_cumsum[x]:size_cumsum[x + 1]]) + for x in range(0, len(size_cumsum) - 1)] + # Setup nodes and graph name + for block_id, nodes in enumerate(g.graph['partition']): + for node in nodes: + g.add_node(node, block=block_id) + + g.name = "stochastic_block_model" + + # Test for edge existence + parts = g.graph['partition'] + for i, j in block_iter: + if i == j: + if directed: + if selfloops: + edges = itertools.product(parts[i], parts[i]) + else: + edges = itertools.permutations(parts[i], 2) + else: + edges = itertools.combinations(parts[i], 2) + if selfloops: + edges = itertools.chain(edges, zip(parts[i], parts[i])) + for e in edges: + if seed.random() < p[i][j]: + g.add_edge(*e) + else: + edges = itertools.product(parts[i], parts[j]) + if sparse: + if p[i][j] == 1: # Test edges cases p_ij = 0 or 1 + for e in edges: + g.add_edge(*e) + elif p[i][j] > 0: + while True: + try: + logrand = math.log(seed.random()) + skip = math.floor(logrand / math.log(1 - p[i][j])) + # consume "skip" edges + next(itertools.islice(edges, skip, skip), None) + e = next(edges) + g.add_edge(*e) # __safe + except StopIteration: + break + else: + for e in edges: + if seed.random() < p[i][j]: + g.add_edge(*e) # __safe + return g + + +def _zipf_rv_below(gamma, xmin, threshold, seed): + """Returns a random value chosen from the bounded Zipf distribution. + + Repeatedly draws values from the Zipf distribution until the + threshold is met, then returns that value. + """ + result = nx.utils.zipf_rv(gamma, xmin, seed) + while result > threshold: + result = nx.utils.zipf_rv(gamma, xmin, seed) + return result + + +def _powerlaw_sequence(gamma, low, high, condition, length, max_iters, seed): + """Returns a list of numbers obeying a constrained power law distribution. + + ``gamma`` and ``low`` are the parameters for the Zipf distribution. + + ``high`` is the maximum allowed value for values draw from the Zipf + distribution. For more information, see :func:`_zipf_rv_below`. + + ``condition`` and ``length`` are Boolean-valued functions on + lists. While generating the list, random values are drawn and + appended to the list until ``length`` is satisfied by the created + list. Once ``condition`` is satisfied, the sequence generated in + this way is returned. + + ``max_iters`` indicates the number of times to generate a list + satisfying ``length``. If the number of iterations exceeds this + value, :exc:`~networkx.exception.ExceededMaxIterations` is raised. + + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness<randomness>`. + """ + for i in range(max_iters): + seq = [] + while not length(seq): + seq.append(_zipf_rv_below(gamma, low, high, seed)) + if condition(seq): + return seq + raise nx.ExceededMaxIterations("Could not create power law sequence") + + +# TODO Needs documentation. +def _generate_min_degree(gamma, average_degree, max_degree, tolerance, + max_iters): + """Returns a minimum degree from the given average degree.""" + min_deg_top = max_degree + min_deg_bot = 1 + min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot + itrs = 0 + mid_avg_deg = 0 + while abs(mid_avg_deg - average_degree) > tolerance: + if itrs > max_iters: + raise nx.ExceededMaxIterations("Could not match average_degree") + mid_avg_deg = 0 + for x in range(int(min_deg_mid), max_degree + 1): + mid_avg_deg += (x ** (-gamma + 1)) / zeta(gamma, min_deg_mid, + tolerance) + if mid_avg_deg > average_degree: + min_deg_top = min_deg_mid + min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot + else: + min_deg_bot = min_deg_mid + min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot + itrs += 1 + # return int(min_deg_mid + 0.5) + return round(min_deg_mid) + + +def _generate_communities(degree_seq, community_sizes, mu, max_iters, seed): + """Returns a list of sets, each of which represents a community. + + ``degree_seq`` is the degree sequence that must be met by the + graph. + + ``community_sizes`` is the community size distribution that must be + met by the generated list of sets. + + ``mu`` is a float in the interval [0, 1] indicating the fraction of + intra-community edges incident to each node. + + ``max_iters`` is the number of times to try to add a node to a + community. This must be greater than the length of + ``degree_seq``, otherwise this function will always fail. If + the number of iterations exceeds this value, + :exc:`~networkx.exception.ExceededMaxIterations` is raised. + + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness<randomness>`. + + The communities returned by this are sets of integers in the set {0, + ..., *n* - 1}, where *n* is the length of ``degree_seq``. + + """ + # This assumes the nodes in the graph will be natural numbers. + result = [set() for _ in community_sizes] + n = len(degree_seq) + free = list(range(n)) + for i in range(max_iters): + v = free.pop() + c = seed.choice(range(len(community_sizes))) + # s = int(degree_seq[v] * (1 - mu) + 0.5) + s = round(degree_seq[v] * (1 - mu)) + # If the community is large enough, add the node to the chosen + # community. Otherwise, return it to the list of unaffiliated + # nodes. + if s < community_sizes[c]: + result[c].add(v) + else: + free.append(v) + # If the community is too big, remove a node from it. + if len(result[c]) > community_sizes[c]: + free.append(result[c].pop()) + if not free: + return result + msg = 'Could not assign communities; try increasing min_community' + raise nx.ExceededMaxIterations(msg) + + +@py_random_state(11) +def LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=None, + min_degree=None, max_degree=None, min_community=None, + max_community=None, tol=1.0e-7, max_iters=500, + seed=None): + r"""Returns the LFR benchmark graph. + + This algorithm proceeds as follows: + + 1) Find a degree sequence with a power law distribution, and minimum + value ``min_degree``, which has approximate average degree + ``average_degree``. This is accomplished by either + + a) specifying ``min_degree`` and not ``average_degree``, + b) specifying ``average_degree`` and not ``min_degree``, in which + case a suitable minimum degree will be found. + + ``max_degree`` can also be specified, otherwise it will be set to + ``n``. Each node *u* will have `\mu \mathrm{deg}(u)` edges + joining it to nodes in communities other than its own and `(1 - + \mu) \mathrm{deg}(u)` edges joining it to nodes in its own + community. + 2) Generate community sizes according to a power law distribution + with exponent ``tau2``. If ``min_community`` and + ``max_community`` are not specified they will be selected to be + ``min_degree`` and ``max_degree``, respectively. Community sizes + are generated until the sum of their sizes equals ``n``. + 3) Each node will be randomly assigned a community with the + condition that the community is large enough for the node's + intra-community degree, `(1 - \mu) \mathrm{deg}(u)` as + described in step 2. If a community grows too large, a random node + will be selected for reassignment to a new community, until all + nodes have been assigned a community. + 4) Each node *u* then adds `(1 - \mu) \mathrm{deg}(u)` + intra-community edges and `\mu \mathrm{deg}(u)` inter-community + edges. + + Parameters + ---------- + n : int + Number of nodes in the created graph. + + tau1 : float + Power law exponent for the degree distribution of the created + graph. This value must be strictly greater than one. + + tau2 : float + Power law exponent for the community size distribution in the + created graph. This value must be strictly greater than one. + + mu : float + Fraction of intra-community edges incident to each node. This + value must be in the interval [0, 1]. + + average_degree : float + Desired average degree of nodes in the created graph. This value + must be in the interval [0, *n*]. Exactly one of this and + ``min_degree`` must be specified, otherwise a + :exc:`NetworkXError` is raised. + + min_degree : int + Minimum degree of nodes in the created graph. This value must be + in the interval [0, *n*]. Exactly one of this and + ``average_degree`` must be specified, otherwise a + :exc:`NetworkXError` is raised. + + max_degree : int + Maximum degree of nodes in the created graph. If not specified, + this is set to ``n``, the total number of nodes in the graph. + + min_community : int + Minimum size of communities in the graph. If not specified, this + is set to ``min_degree``. + + max_community : int + Maximum size of communities in the graph. If not specified, this + is set to ``n``, the total number of nodes in the graph. + + tol : float + Tolerance when comparing floats, specifically when comparing + average degree values. + + max_iters : int + Maximum number of iterations to try to create the community sizes, + degree distribution, and community affiliations. + + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness<randomness>`. + + Returns + ------- + G : NetworkX graph + The LFR benchmark graph generated according to the specified + parameters. + + Each node in the graph has a node attribute ``'community'`` that + stores the community (that is, the set of nodes) that includes + it. + + Raises + ------ + NetworkXError + If any of the parameters do not meet their upper and lower bounds: + + - ``tau1`` and ``tau2`` must be strictly greater than 1. + - ``mu`` must be in [0, 1]. + - ``max_degree`` must be in {1, ..., *n*}. + - ``min_community`` and ``max_community`` must be in {0, ..., + *n*}. + + If not exactly one of ``average_degree`` and ``min_degree`` is + specified. + + If ``min_degree`` is not specified and a suitable ``min_degree`` + cannot be found. + + ExceededMaxIterations + If a valid degree sequence cannot be created within + ``max_iters`` number of iterations. + + If a valid set of community sizes cannot be created within + ``max_iters`` number of iterations. + + If a valid community assignment cannot be created within ``10 * + n * max_iters`` number of iterations. + + Examples + -------- + Basic usage:: + + >>> from networkx.generators.community import LFR_benchmark_graph + >>> n = 250 + >>> tau1 = 3 + >>> tau2 = 1.5 + >>> mu = 0.1 + >>> G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5, + ... min_community=20, seed=10) + + Continuing the example above, you can get the communities from the + node attributes of the graph:: + + >>> communities = {frozenset(G.nodes[v]['community']) for v in G} + + Notes + ----- + This algorithm differs slightly from the original way it was + presented in [1]. + + 1) Rather than connecting the graph via a configuration model then + rewiring to match the intra-community and inter-community + degrees, we do this wiring explicitly at the end, which should be + equivalent. + 2) The code posted on the author's website [2] calculates the random + power law distributed variables and their average using + continuous approximations, whereas we use the discrete + distributions here as both degree and community size are + discrete. + + Though the authors describe the algorithm as quite robust, testing + during development indicates that a somewhat narrower parameter set + is likely to successfully produce a graph. Some suggestions have + been provided in the event of exceptions. + + References + ---------- + .. [1] "Benchmark graphs for testing community detection algorithms", + Andrea Lancichinetti, Santo Fortunato, and Filippo Radicchi, + Phys. Rev. E 78, 046110 2008 + .. [2] http://santo.fortunato.googlepages.com/inthepress2 + + """ + # Perform some basic parameter validation. + if not tau1 > 1: + raise nx.NetworkXError("tau1 must be greater than one") + if not tau2 > 1: + raise nx.NetworkXError("tau2 must be greater than one") + if not 0 <= mu <= 1: + raise nx.NetworkXError("mu must be in the interval [0, 1]") + + # Validate parameters for generating the degree sequence. + if max_degree is None: + max_degree = n + elif not 0 < max_degree <= n: + raise nx.NetworkXError("max_degree must be in the interval (0, n]") + if not ((min_degree is None) ^ (average_degree is None)): + raise nx.NetworkXError("Must assign exactly one of min_degree and" + " average_degree") + if min_degree is None: + min_degree = _generate_min_degree(tau1, average_degree, max_degree, + tol, max_iters) + + # Generate a degree sequence with a power law distribution. + low, high = min_degree, max_degree + + def condition(seq): return sum(seq) % 2 == 0 + + def length(seq): return len(seq) >= n + deg_seq = _powerlaw_sequence(tau1, low, high, condition, + length, max_iters, seed) + + # Validate parameters for generating the community size sequence. + if min_community is None: + min_community = min(deg_seq) + if max_community is None: + max_community = max(deg_seq) + + # Generate a community size sequence with a power law distribution. + # + # TODO The original code incremented the number of iterations each + # time a new Zipf random value was drawn from the distribution. This + # differed from the way the number of iterations was incremented in + # `_powerlaw_degree_sequence`, so this code was changed to match + # that one. As a result, this code is allowed many more chances to + # generate a valid community size sequence. + low, high = min_community, max_community + + def condition(seq): return sum(seq) == n + + def length(seq): return sum(seq) >= n + comms = _powerlaw_sequence(tau2, low, high, condition, + length, max_iters, seed) + + # Generate the communities based on the given degree sequence and + # community sizes. + max_iters *= 10 * n + communities = _generate_communities(deg_seq, comms, mu, max_iters, seed) + + # Finally, generate the benchmark graph based on the given + # communities, joining nodes according to the intra- and + # inter-community degrees. + G = nx.Graph() + G.add_nodes_from(range(n)) + for c in communities: + for u in c: + while G.degree(u) < round(deg_seq[u] * (1 - mu)): + v = seed.choice(list(c)) + G.add_edge(u, v) + while G.degree(u) < deg_seq[u]: + v = seed.choice(range(n)) + if v not in c: + G.add_edge(u, v) + G.nodes[u]['community'] = c + return G