Mercurial > repos > guerler > springsuite

diff planemo/lib/python3.7/site-packages/networkx/generators/community.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author: guerler
date: Fri, 31 Jul 2020 00:32:28 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/planemo/lib/python3.7/site-packages/networkx/generators/community.py	Fri Jul 31 00:32:28 2020 -0400
@@ -0,0 +1,1030 @@
+#    Copyright(C) 2011-2019 by
+#    Ben Edwards <bedwards@cs.unm.edu>
+#    Aric Hagberg <hagberg@lanl.gov>
+#    Konstantinos Karakatsanis <dinoskarakas@gmail.com>
+#    All rights reserved.
+#    BSD license.
+#
+# Authors:  Ben Edwards (bedwards@cs.unm.edu)
+#           Aric Hagberg (hagberg@lanl.gov)
+#           Konstantinos Karakatsanis (dinoskarakas@gmail.com)
+#           Jean-Gabriel Young (jean.gabriel.young@gmail.com)
+"""Generators for classes of graphs used in studying social networks."""
+import itertools
+import math
+import networkx as nx
+from networkx.utils import py_random_state
+
+# Accommodates for both SciPy and non-SciPy implementations..
+try:
+    from scipy.special import zeta as _zeta
+
+    def zeta(x, q, tolerance):
+        return _zeta(x, q)
+except ImportError:
+    def zeta(x, q, tolerance):
+        """The Hurwitz zeta function, or the Riemann zeta function of two
+        arguments.
+
+        ``x`` must be greater than one and ``q`` must be positive.
+
+        This function repeatedly computes subsequent partial sums until
+        convergence, as decided by ``tolerance``.
+        """
+        z = 0
+        z_prev = -float('inf')
+        k = 0
+        while abs(z - z_prev) > tolerance:
+            z_prev = z
+            z += 1 / ((k + q) ** x)
+            k += 1
+        return z
+
+__all__ = ['caveman_graph', 'connected_caveman_graph',
+           'relaxed_caveman_graph', 'random_partition_graph',
+           'planted_partition_graph', 'gaussian_random_partition_graph',
+           'ring_of_cliques', 'windmill_graph', 'stochastic_block_model',
+           'LFR_benchmark_graph']
+
+
+def caveman_graph(l, k):
+    """Returns a caveman graph of `l` cliques of size `k`.
+
+    Parameters
+    ----------
+    l : int
+      Number of cliques
+    k : int
+      Size of cliques
+
+    Returns
+    -------
+    G : NetworkX Graph
+      caveman graph
+
+    Notes
+    -----
+    This returns an undirected graph, it can be converted to a directed
+    graph using :func:`nx.to_directed`, or a multigraph using
+    ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is
+    described in [1]_ and it is unclear which of the directed
+    generalizations is most useful.
+
+    Examples
+    --------
+    >>> G = nx.caveman_graph(3, 3)
+
+    See also
+    --------
+
+    connected_caveman_graph
+
+    References
+    ----------
+    .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.'
+       Amer. J. Soc. 105, 493-527, 1999.
+    """
+    # l disjoint cliques of size k
+    G = nx.empty_graph(l * k)
+    if k > 1:
+        for start in range(0, l * k, k):
+            edges = itertools.combinations(range(start, start + k), 2)
+            G.add_edges_from(edges)
+    return G
+
+
+def connected_caveman_graph(l, k):
+    """Returns a connected caveman graph of `l` cliques of size `k`.
+
+    The connected caveman graph is formed by creating `n` cliques of size
+    `k`, then a single edge in each clique is rewired to a node in an
+    adjacent clique.
+
+    Parameters
+    ----------
+    l : int
+      number of cliques
+    k : int
+      size of cliques
+
+    Returns
+    -------
+    G : NetworkX Graph
+      connected caveman graph
+
+    Notes
+    -----
+    This returns an undirected graph, it can be converted to a directed
+    graph using :func:`nx.to_directed`, or a multigraph using
+    ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is
+    described in [1]_ and it is unclear which of the directed
+    generalizations is most useful.
+
+    Examples
+    --------
+    >>> G = nx.connected_caveman_graph(3, 3)
+
+    References
+    ----------
+    .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.'
+       Amer. J. Soc. 105, 493-527, 1999.
+    """
+    G = nx.caveman_graph(l, k)
+    for start in range(0, l * k, k):
+        G.remove_edge(start, start + 1)
+        G.add_edge(start, (start - 1) % (l * k))
+    return G
+
+
+@py_random_state(3)
+def relaxed_caveman_graph(l, k, p, seed=None):
+    """Returns a relaxed caveman graph.
+
+    A relaxed caveman graph starts with `l` cliques of size `k`.  Edges are
+    then randomly rewired with probability `p` to link different cliques.
+
+    Parameters
+    ----------
+    l : int
+      Number of groups
+    k : int
+      Size of cliques
+    p : float
+      Probabilty of rewiring each edge.
+    seed : integer, random_state, or None (default)
+        Indicator of random number generation state.
+        See :ref:`Randomness<randomness>`.
+
+    Returns
+    -------
+    G : NetworkX Graph
+      Relaxed Caveman Graph
+
+    Raises
+    ------
+    NetworkXError:
+     If p is not in [0,1]
+
+    Examples
+    --------
+    >>> G = nx.relaxed_caveman_graph(2, 3, 0.1, seed=42)
+
+    References
+    ----------
+    .. [1] Santo Fortunato, Community Detection in Graphs,
+       Physics Reports Volume 486, Issues 3-5, February 2010, Pages 75-174.
+       https://arxiv.org/abs/0906.0612
+    """
+    G = nx.caveman_graph(l, k)
+    nodes = list(G)
+    for (u, v) in G.edges():
+        if seed.random() < p:  # rewire the edge
+            x = seed.choice(nodes)
+            if G.has_edge(u, x):
+                continue
+            G.remove_edge(u, v)
+            G.add_edge(u, x)
+    return G
+
+
+@py_random_state(3)
+def random_partition_graph(sizes, p_in, p_out, seed=None, directed=False):
+    """Returns the random partition graph with a partition of sizes.
+
+    A partition graph is a graph of communities with sizes defined by
+    s in sizes. Nodes in the same group are connected with probability
+    p_in and nodes of different groups are connected with probability
+    p_out.
+
+    Parameters
+    ----------
+    sizes : list of ints
+      Sizes of groups
+    p_in : float
+      probability of edges with in groups
+    p_out : float
+      probability of edges between groups
+    directed : boolean optional, default=False
+      Whether to create a directed graph
+    seed : integer, random_state, or None (default)
+        Indicator of random number generation state.
+        See :ref:`Randomness<randomness>`.
+
+    Returns
+    -------
+    G : NetworkX Graph or DiGraph
+      random partition graph of size sum(gs)
+
+    Raises
+    ------
+    NetworkXError
+      If p_in or p_out is not in [0,1]
+
+    Examples
+    --------
+    >>> G = nx.random_partition_graph([10,10,10],.25,.01)
+    >>> len(G)
+    30
+    >>> partition = G.graph['partition']
+    >>> len(partition)
+    3
+
+    Notes
+    -----
+    This is a generalization of the planted-l-partition described in
+    [1]_.  It allows for the creation of groups of any size.
+
+    The partition is store as a graph attribute 'partition'.
+
+    References
+    ----------
+    .. [1] Santo Fortunato 'Community Detection in Graphs' Physical Reports
+       Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612
+    """
+    # Use geometric method for O(n+m) complexity algorithm
+    # partition = nx.community_sets(nx.get_node_attributes(G, 'affiliation'))
+    if not 0.0 <= p_in <= 1.0:
+        raise nx.NetworkXError("p_in must be in [0,1]")
+    if not 0.0 <= p_out <= 1.0:
+        raise nx.NetworkXError("p_out must be in [0,1]")
+
+    # create connection matrix
+    num_blocks = len(sizes)
+    p = [[p_out for s in range(num_blocks)] for r in range(num_blocks)]
+    for r in range(num_blocks):
+        p[r][r] = p_in
+
+    return stochastic_block_model(sizes, p, nodelist=None, seed=seed,
+                                  directed=directed, selfloops=False,
+                                  sparse=True)
+
+
+@py_random_state(4)
+def planted_partition_graph(l, k, p_in, p_out, seed=None, directed=False):
+    """Returns the planted l-partition graph.
+
+    This model partitions a graph with n=l*k vertices in
+    l groups with k vertices each. Vertices of the same
+    group are linked with a probability p_in, and vertices
+    of different groups are linked with probability p_out.
+
+    Parameters
+    ----------
+    l : int
+      Number of groups
+    k : int
+      Number of vertices in each group
+    p_in : float
+      probability of connecting vertices within a group
+    p_out : float
+      probability of connected vertices between groups
+    seed : integer, random_state, or None (default)
+        Indicator of random number generation state.
+        See :ref:`Randomness<randomness>`.
+    directed : bool,optional (default=False)
+      If True return a directed graph
+
+    Returns
+    -------
+    G : NetworkX Graph or DiGraph
+      planted l-partition graph
+
+    Raises
+    ------
+    NetworkXError:
+      If p_in,p_out are not in [0,1] or
+
+    Examples
+    --------
+    >>> G = nx.planted_partition_graph(4, 3, 0.5, 0.1, seed=42)
+
+    See Also
+    --------
+    random_partition_model
+
+    References
+    ----------
+    .. [1] A. Condon, R.M. Karp, Algorithms for graph partitioning
+        on the planted partition model,
+        Random Struct. Algor. 18 (2001) 116-140.
+
+    .. [2] Santo Fortunato 'Community Detection in Graphs' Physical Reports
+       Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612
+    """
+    return random_partition_graph([k] * l, p_in, p_out, seed, directed)
+
+
+@py_random_state(6)
+def gaussian_random_partition_graph(n, s, v, p_in, p_out, directed=False,
+                                    seed=None):
+    """Generate a Gaussian random partition graph.
+
+    A Gaussian random partition graph is created by creating k partitions
+    each with a size drawn from a normal distribution with mean s and variance
+    s/v. Nodes are connected within clusters with probability p_in and
+    between clusters with probability p_out[1]
+
+    Parameters
+    ----------
+    n : int
+      Number of nodes in the graph
+    s : float
+      Mean cluster size
+    v : float
+      Shape parameter. The variance of cluster size distribution is s/v.
+    p_in : float
+      Probabilty of intra cluster connection.
+    p_out : float
+      Probability of inter cluster connection.
+    directed : boolean, optional default=False
+      Whether to create a directed graph or not
+    seed : integer, random_state, or None (default)
+        Indicator of random number generation state.
+        See :ref:`Randomness<randomness>`.
+
+    Returns
+    -------
+    G : NetworkX Graph or DiGraph
+      gaussian random partition graph
+
+    Raises
+    ------
+    NetworkXError
+      If s is > n
+      If p_in or p_out is not in [0,1]
+
+    Notes
+    -----
+    Note the number of partitions is dependent on s,v and n, and that the
+    last partition may be considerably smaller, as it is sized to simply
+    fill out the nodes [1]
+
+    See Also
+    --------
+    random_partition_graph
+
+    Examples
+    --------
+    >>> G = nx.gaussian_random_partition_graph(100,10,10,.25,.1)
+    >>> len(G)
+    100
+
+    References
+    ----------
+    .. [1] Ulrik Brandes, Marco Gaertler, Dorothea Wagner,
+       Experiments on Graph Clustering Algorithms,
+       In the proceedings of the 11th Europ. Symp. Algorithms, 2003.
+    """
+    if s > n:
+        raise nx.NetworkXError("s must be <= n")
+    assigned = 0
+    sizes = []
+    while True:
+        size = int(seed.gauss(s, float(s) / v + 0.5))
+        if size < 1:  # how to handle 0 or negative sizes?
+            continue
+        if assigned + size >= n:
+            sizes.append(n - assigned)
+            break
+        assigned += size
+        sizes.append(size)
+    return random_partition_graph(sizes, p_in, p_out, directed, seed)
+
+
+def ring_of_cliques(num_cliques, clique_size):
+    """Defines a "ring of cliques" graph.
+
+    A ring of cliques graph is consisting of cliques, connected through single
+    links. Each clique is a complete graph.
+
+    Parameters
+    ----------
+    num_cliques : int
+        Number of cliques
+    clique_size : int
+        Size of cliques
+
+    Returns
+    -------
+    G : NetworkX Graph
+        ring of cliques graph
+
+    Raises
+    ------
+    NetworkXError
+        If the number of cliques is lower than 2 or
+        if the size of cliques is smaller than 2.
+
+    Examples
+    --------
+    >>> G = nx.ring_of_cliques(8, 4)
+
+    See Also
+    --------
+    connected_caveman_graph
+
+    Notes
+    -----
+    The `connected_caveman_graph` graph removes a link from each clique to
+    connect it with the next clique. Instead, the `ring_of_cliques` graph
+    simply adds the link without removing any link from the cliques.
+    """
+    if num_cliques < 2:
+        raise nx.NetworkXError('A ring of cliques must have at least '
+                               'two cliques')
+    if clique_size < 2:
+        raise nx.NetworkXError('The cliques must have at least two nodes')
+
+    G = nx.Graph()
+    for i in range(num_cliques):
+        edges = itertools.combinations(range(i * clique_size, i * clique_size +
+                                             clique_size), 2)
+        G.add_edges_from(edges)
+        G.add_edge(i * clique_size + 1, (i + 1) * clique_size %
+                   (num_cliques * clique_size))
+    return G
+
+
+def windmill_graph(n, k):
+    """Generate a windmill graph.
+    A windmill graph is a graph of `n` cliques each of size `k` that are all
+    joined at one node.
+    It can be thought of as taking a disjoint union of `n` cliques of size `k`,
+    selecting one point from each, and contracting all of the selected points.
+    Alternatively, one could generate `n` cliques of size `k-1` and one node
+    that is connected to all other nodes in the graph.
+
+    Parameters
+    ----------
+    n : int
+        Number of cliques
+    k : int
+        Size of cliques
+
+    Returns
+    -------
+    G : NetworkX Graph
+        windmill graph with n cliques of size k
+
+    Raises
+    ------
+    NetworkXError
+        If the number of cliques is less than two
+        If the size of the cliques are less than two
+
+    Examples
+    --------
+    >>> G = nx.windmill_graph(4, 5)
+
+    Notes
+    -----
+    The node labeled `0` will be the node connected to all other nodes.
+    Note that windmill graphs are usually denoted `Wd(k,n)`, so the parameters
+    are in the opposite order as the parameters of this method.
+    """
+    if n < 2:
+        msg = 'A windmill graph must have at least two cliques'
+        raise nx.NetworkXError(msg)
+    if k < 2:
+        raise nx.NetworkXError('The cliques must have at least two nodes')
+
+    G = nx.disjoint_union_all(itertools.chain([nx.complete_graph(k)],
+                                              (nx.complete_graph(k - 1)
+                                               for _ in range(n - 1))))
+    G.add_edges_from((0, i) for i in range(k, G.number_of_nodes()))
+    return G
+
+
+@py_random_state(3)
+def stochastic_block_model(sizes, p, nodelist=None, seed=None,
+                           directed=False, selfloops=False, sparse=True):
+    """Returns a stochastic block model graph.
+
+    This model partitions the nodes in blocks of arbitrary sizes, and places
+    edges between pairs of nodes independently, with a probability that depends
+    on the blocks.
+
+    Parameters
+    ----------
+    sizes : list of ints
+        Sizes of blocks
+    p : list of list of floats
+        Element (r,s) gives the density of edges going from the nodes
+        of group r to nodes of group s.
+        p must match the number of groups (len(sizes) == len(p)),
+        and it must be symmetric if the graph is undirected.
+    nodelist : list, optional
+        The block tags are assigned according to the node identifiers
+        in nodelist. If nodelist is None, then the ordering is the
+        range [0,sum(sizes)-1].
+    seed : integer, random_state, or None (default)
+        Indicator of random number generation state.
+        See :ref:`Randomness<randomness>`.
+    directed : boolean optional, default=False
+        Whether to create a directed graph or not.
+    selfloops : boolean optional, default=False
+        Whether to include self-loops or not.
+    sparse: boolean optional, default=True
+        Use the sparse heuristic to speed up the generator.
+
+    Returns
+    -------
+    g : NetworkX Graph or DiGraph
+        Stochastic block model graph of size sum(sizes)
+
+    Raises
+    ------
+    NetworkXError
+      If probabilities are not in [0,1].
+      If the probability matrix is not square (directed case).
+      If the probability matrix is not symmetric (undirected case).
+      If the sizes list does not match nodelist or the probability matrix.
+      If nodelist contains duplicate.
+
+    Examples
+    --------
+    >>> sizes = [75, 75, 300]
+    >>> probs = [[0.25, 0.05, 0.02],
+    ...          [0.05, 0.35, 0.07],
+    ...          [0.02, 0.07, 0.40]]
+    >>> g = nx.stochastic_block_model(sizes, probs, seed=0)
+    >>> len(g)
+    450
+    >>> H = nx.quotient_graph(g, g.graph['partition'], relabel=True)
+    >>> for v in H.nodes(data=True):
+    ...     print(round(v[1]['density'], 3))
+    ...
+    0.245
+    0.348
+    0.405
+    >>> for v in H.edges(data=True):
+    ...     print(round(1.0 * v[2]['weight'] / (sizes[v[0]] * sizes[v[1]]), 3))
+    ...
+    0.051
+    0.022
+    0.07
+
+    See Also
+    --------
+    random_partition_graph
+    planted_partition_graph
+    gaussian_random_partition_graph
+    gnp_random_graph
+
+    References
+    ----------
+    .. [1] Holland, P. W., Laskey, K. B., & Leinhardt, S.,
+           "Stochastic blockmodels: First steps",
+           Social networks, 5(2), 109-137, 1983.
+    """
+    # Check if dimensions match
+    if len(sizes) != len(p):
+        raise nx.NetworkXException("'sizes' and 'p' do not match.")
+    # Check for probability symmetry (undirected) and shape (directed)
+    for row in p:
+        if len(p) != len(row):
+            raise nx.NetworkXException("'p' must be a square matrix.")
+    if not directed:
+        p_transpose = [list(i) for i in zip(*p)]
+        for i in zip(p, p_transpose):
+            for j in zip(i[0], i[1]):
+                if abs(j[0] - j[1]) > 1e-08:
+                    raise nx.NetworkXException("'p' must be symmetric.")
+    # Check for probability range
+    for row in p:
+        for prob in row:
+            if prob < 0 or prob > 1:
+                raise nx.NetworkXException("Entries of 'p' not in [0,1].")
+    # Check for nodelist consistency
+    if nodelist is not None:
+        if len(nodelist) != sum(sizes):
+            raise nx.NetworkXException("'nodelist' and 'sizes' do not match.")
+        if len(nodelist) != len(set(nodelist)):
+            raise nx.NetworkXException("nodelist contains duplicate.")
+    else:
+        nodelist = range(0, sum(sizes))
+
+    # Setup the graph conditionally to the directed switch.
+    block_range = range(len(sizes))
+    if directed:
+        g = nx.DiGraph()
+        block_iter = itertools.product(block_range, block_range)
+    else:
+        g = nx.Graph()
+        block_iter = itertools.combinations_with_replacement(block_range, 2)
+    # Split nodelist in a partition (list of sets).
+    size_cumsum = [sum(sizes[0:x]) for x in range(0, len(sizes) + 1)]
+    g.graph['partition'] = [set(nodelist[size_cumsum[x]:size_cumsum[x + 1]])
+                            for x in range(0, len(size_cumsum) - 1)]
+    # Setup nodes and graph name
+    for block_id, nodes in enumerate(g.graph['partition']):
+        for node in nodes:
+            g.add_node(node, block=block_id)
+
+    g.name = "stochastic_block_model"
+
+    # Test for edge existence
+    parts = g.graph['partition']
+    for i, j in block_iter:
+        if i == j:
+            if directed:
+                if selfloops:
+                    edges = itertools.product(parts[i], parts[i])
+                else:
+                    edges = itertools.permutations(parts[i], 2)
+            else:
+                edges = itertools.combinations(parts[i], 2)
+                if selfloops:
+                    edges = itertools.chain(edges, zip(parts[i], parts[i]))
+            for e in edges:
+                if seed.random() < p[i][j]:
+                    g.add_edge(*e)
+        else:
+            edges = itertools.product(parts[i], parts[j])
+        if sparse:
+            if p[i][j] == 1:  # Test edges cases p_ij = 0 or 1
+                for e in edges:
+                    g.add_edge(*e)
+            elif p[i][j] > 0:
+                while True:
+                    try:
+                        logrand = math.log(seed.random())
+                        skip = math.floor(logrand / math.log(1 - p[i][j]))
+                        # consume "skip" edges
+                        next(itertools.islice(edges, skip, skip), None)
+                        e = next(edges)
+                        g.add_edge(*e)  # __safe
+                    except StopIteration:
+                        break
+        else:
+            for e in edges:
+                if seed.random() < p[i][j]:
+                    g.add_edge(*e)  # __safe
+    return g
+
+
+def _zipf_rv_below(gamma, xmin, threshold, seed):
+    """Returns a random value chosen from the bounded Zipf distribution.
+
+    Repeatedly draws values from the Zipf distribution until the
+    threshold is met, then returns that value.
+    """
+    result = nx.utils.zipf_rv(gamma, xmin, seed)
+    while result > threshold:
+        result = nx.utils.zipf_rv(gamma, xmin, seed)
+    return result
+
+
+def _powerlaw_sequence(gamma, low, high, condition, length, max_iters, seed):
+    """Returns a list of numbers obeying a constrained power law distribution.
+
+    ``gamma`` and ``low`` are the parameters for the Zipf distribution.
+
+    ``high`` is the maximum allowed value for values draw from the Zipf
+    distribution. For more information, see :func:`_zipf_rv_below`.
+
+    ``condition`` and ``length`` are Boolean-valued functions on
+    lists. While generating the list, random values are drawn and
+    appended to the list until ``length`` is satisfied by the created
+    list. Once ``condition`` is satisfied, the sequence generated in
+    this way is returned.
+
+    ``max_iters`` indicates the number of times to generate a list
+    satisfying ``length``. If the number of iterations exceeds this
+    value, :exc:`~networkx.exception.ExceededMaxIterations` is raised.
+
+    seed : integer, random_state, or None (default)
+        Indicator of random number generation state.
+        See :ref:`Randomness<randomness>`.
+    """
+    for i in range(max_iters):
+        seq = []
+        while not length(seq):
+            seq.append(_zipf_rv_below(gamma, low, high, seed))
+        if condition(seq):
+            return seq
+    raise nx.ExceededMaxIterations("Could not create power law sequence")
+
+
+# TODO Needs documentation.
+def _generate_min_degree(gamma, average_degree, max_degree, tolerance,
+                         max_iters):
+    """Returns a minimum degree from the given average degree."""
+    min_deg_top = max_degree
+    min_deg_bot = 1
+    min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
+    itrs = 0
+    mid_avg_deg = 0
+    while abs(mid_avg_deg - average_degree) > tolerance:
+        if itrs > max_iters:
+            raise nx.ExceededMaxIterations("Could not match average_degree")
+        mid_avg_deg = 0
+        for x in range(int(min_deg_mid), max_degree + 1):
+            mid_avg_deg += (x ** (-gamma + 1)) / zeta(gamma, min_deg_mid,
+                                                      tolerance)
+        if mid_avg_deg > average_degree:
+            min_deg_top = min_deg_mid
+            min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
+        else:
+            min_deg_bot = min_deg_mid
+            min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
+        itrs += 1
+    # return int(min_deg_mid + 0.5)
+    return round(min_deg_mid)
+
+
+def _generate_communities(degree_seq, community_sizes, mu, max_iters, seed):
+    """Returns a list of sets, each of which represents a community.
+
+    ``degree_seq`` is the degree sequence that must be met by the
+    graph.
+
+    ``community_sizes`` is the community size distribution that must be
+    met by the generated list of sets.
+
+    ``mu`` is a float in the interval [0, 1] indicating the fraction of
+    intra-community edges incident to each node.
+
+    ``max_iters`` is the number of times to try to add a node to a
+    community. This must be greater than the length of
+    ``degree_seq``, otherwise this function will always fail. If
+    the number of iterations exceeds this value,
+    :exc:`~networkx.exception.ExceededMaxIterations` is raised.
+
+    seed : integer, random_state, or None (default)
+        Indicator of random number generation state.
+        See :ref:`Randomness<randomness>`.
+
+    The communities returned by this are sets of integers in the set {0,
+    ..., *n* - 1}, where *n* is the length of ``degree_seq``.
+
+    """
+    # This assumes the nodes in the graph will be natural numbers.
+    result = [set() for _ in community_sizes]
+    n = len(degree_seq)
+    free = list(range(n))
+    for i in range(max_iters):
+        v = free.pop()
+        c = seed.choice(range(len(community_sizes)))
+        # s = int(degree_seq[v] * (1 - mu) + 0.5)
+        s = round(degree_seq[v] * (1 - mu))
+        # If the community is large enough, add the node to the chosen
+        # community. Otherwise, return it to the list of unaffiliated
+        # nodes.
+        if s < community_sizes[c]:
+            result[c].add(v)
+        else:
+            free.append(v)
+        # If the community is too big, remove a node from it.
+        if len(result[c]) > community_sizes[c]:
+            free.append(result[c].pop())
+        if not free:
+            return result
+    msg = 'Could not assign communities; try increasing min_community'
+    raise nx.ExceededMaxIterations(msg)
+
+
+@py_random_state(11)
+def LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=None,
+                        min_degree=None, max_degree=None, min_community=None,
+                        max_community=None, tol=1.0e-7, max_iters=500,
+                        seed=None):
+    r"""Returns the LFR benchmark graph.
+
+    This algorithm proceeds as follows:
+
+    1) Find a degree sequence with a power law distribution, and minimum
+       value ``min_degree``, which has approximate average degree
+       ``average_degree``. This is accomplished by either
+
+       a) specifying ``min_degree`` and not ``average_degree``,
+       b) specifying ``average_degree`` and not ``min_degree``, in which
+          case a suitable minimum degree will be found.
+
+       ``max_degree`` can also be specified, otherwise it will be set to
+       ``n``. Each node *u* will have `\mu \mathrm{deg}(u)` edges
+       joining it to nodes in communities other than its own and `(1 -
+       \mu) \mathrm{deg}(u)` edges joining it to nodes in its own
+       community.
+    2) Generate community sizes according to a power law distribution
+       with exponent ``tau2``. If ``min_community`` and
+       ``max_community`` are not specified they will be selected to be
+       ``min_degree`` and ``max_degree``, respectively.  Community sizes
+       are generated until the sum of their sizes equals ``n``.
+    3) Each node will be randomly assigned a community with the
+       condition that the community is large enough for the node's
+       intra-community degree, `(1 - \mu) \mathrm{deg}(u)` as
+       described in step 2. If a community grows too large, a random node
+       will be selected for reassignment to a new community, until all
+       nodes have been assigned a community.
+    4) Each node *u* then adds `(1 - \mu) \mathrm{deg}(u)`
+       intra-community edges and `\mu \mathrm{deg}(u)` inter-community
+       edges.
+
+    Parameters
+    ----------
+    n : int
+        Number of nodes in the created graph.
+
+    tau1 : float
+        Power law exponent for the degree distribution of the created
+        graph. This value must be strictly greater than one.
+
+    tau2 : float
+        Power law exponent for the community size distribution in the
+        created graph. This value must be strictly greater than one.
+
+    mu : float
+        Fraction of intra-community edges incident to each node. This
+        value must be in the interval [0, 1].
+
+    average_degree : float
+        Desired average degree of nodes in the created graph. This value
+        must be in the interval [0, *n*]. Exactly one of this and
+        ``min_degree`` must be specified, otherwise a
+        :exc:`NetworkXError` is raised.
+
+    min_degree : int
+        Minimum degree of nodes in the created graph. This value must be
+        in the interval [0, *n*]. Exactly one of this and
+        ``average_degree`` must be specified, otherwise a
+        :exc:`NetworkXError` is raised.
+
+    max_degree : int
+        Maximum degree of nodes in the created graph. If not specified,
+        this is set to ``n``, the total number of nodes in the graph.
+
+    min_community : int
+        Minimum size of communities in the graph. If not specified, this
+        is set to ``min_degree``.
+
+    max_community : int
+        Maximum size of communities in the graph. If not specified, this
+        is set to ``n``, the total number of nodes in the graph.
+
+    tol : float
+        Tolerance when comparing floats, specifically when comparing
+        average degree values.
+
+    max_iters : int
+        Maximum number of iterations to try to create the community sizes,
+        degree distribution, and community affiliations.
+
+    seed : integer, random_state, or None (default)
+        Indicator of random number generation state.
+        See :ref:`Randomness<randomness>`.
+
+    Returns
+    -------
+    G : NetworkX graph
+        The LFR benchmark graph generated according to the specified
+        parameters.
+
+        Each node in the graph has a node attribute ``'community'`` that
+        stores the community (that is, the set of nodes) that includes
+        it.
+
+    Raises
+    ------
+    NetworkXError
+        If any of the parameters do not meet their upper and lower bounds:
+
+        - ``tau1`` and ``tau2`` must be strictly greater than 1.
+        - ``mu`` must be in [0, 1].
+        - ``max_degree`` must be in {1, ..., *n*}.
+        - ``min_community`` and ``max_community`` must be in {0, ...,
+          *n*}.
+
+        If not exactly one of ``average_degree`` and ``min_degree`` is
+        specified.
+
+        If ``min_degree`` is not specified and a suitable ``min_degree``
+        cannot be found.
+
+    ExceededMaxIterations
+        If a valid degree sequence cannot be created within
+        ``max_iters`` number of iterations.
+
+        If a valid set of community sizes cannot be created within
+        ``max_iters`` number of iterations.
+
+        If a valid community assignment cannot be created within ``10 *
+        n * max_iters`` number of iterations.
+
+    Examples
+    --------
+    Basic usage::
+
+        >>> from networkx.generators.community import LFR_benchmark_graph
+        >>> n = 250
+        >>> tau1 = 3
+        >>> tau2 = 1.5
+        >>> mu = 0.1
+        >>> G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5,
+        ...                         min_community=20, seed=10)
+
+    Continuing the example above, you can get the communities from the
+    node attributes of the graph::
+
+        >>> communities = {frozenset(G.nodes[v]['community']) for v in G}
+
+    Notes
+    -----
+    This algorithm differs slightly from the original way it was
+    presented in [1].
+
+    1) Rather than connecting the graph via a configuration model then
+       rewiring to match the intra-community and inter-community
+       degrees, we do this wiring explicitly at the end, which should be
+       equivalent.
+    2) The code posted on the author's website [2] calculates the random
+       power law distributed variables and their average using
+       continuous approximations, whereas we use the discrete
+       distributions here as both degree and community size are
+       discrete.
+
+    Though the authors describe the algorithm as quite robust, testing
+    during development indicates that a somewhat narrower parameter set
+    is likely to successfully produce a graph. Some suggestions have
+    been provided in the event of exceptions.
+
+    References
+    ----------
+    .. [1] "Benchmark graphs for testing community detection algorithms",
+           Andrea Lancichinetti, Santo Fortunato, and Filippo Radicchi,
+           Phys. Rev. E 78, 046110 2008
+    .. [2] http://santo.fortunato.googlepages.com/inthepress2
+
+    """
+    # Perform some basic parameter validation.
+    if not tau1 > 1:
+        raise nx.NetworkXError("tau1 must be greater than one")
+    if not tau2 > 1:
+        raise nx.NetworkXError("tau2 must be greater than one")
+    if not 0 <= mu <= 1:
+        raise nx.NetworkXError("mu must be in the interval [0, 1]")
+
+    # Validate parameters for generating the degree sequence.
+    if max_degree is None:
+        max_degree = n
+    elif not 0 < max_degree <= n:
+        raise nx.NetworkXError("max_degree must be in the interval (0, n]")
+    if not ((min_degree is None) ^ (average_degree is None)):
+        raise nx.NetworkXError("Must assign exactly one of min_degree and"
+                               " average_degree")
+    if min_degree is None:
+        min_degree = _generate_min_degree(tau1, average_degree, max_degree,
+                                          tol, max_iters)
+
+    # Generate a degree sequence with a power law distribution.
+    low, high = min_degree, max_degree
+
+    def condition(seq): return sum(seq) % 2 == 0
+
+    def length(seq): return len(seq) >= n
+    deg_seq = _powerlaw_sequence(tau1, low, high, condition,
+                                 length, max_iters, seed)
+
+    # Validate parameters for generating the community size sequence.
+    if min_community is None:
+        min_community = min(deg_seq)
+    if max_community is None:
+        max_community = max(deg_seq)
+
+    # Generate a community size sequence with a power law distribution.
+    #
+    # TODO The original code incremented the number of iterations each
+    # time a new Zipf random value was drawn from the distribution. This
+    # differed from the way the number of iterations was incremented in
+    # `_powerlaw_degree_sequence`, so this code was changed to match
+    # that one. As a result, this code is allowed many more chances to
+    # generate a valid community size sequence.
+    low, high = min_community, max_community
+
+    def condition(seq): return sum(seq) == n
+
+    def length(seq): return sum(seq) >= n
+    comms = _powerlaw_sequence(tau2, low, high, condition,
+                               length, max_iters, seed)
+
+    # Generate the communities based on the given degree sequence and
+    # community sizes.
+    max_iters *= 10 * n
+    communities = _generate_communities(deg_seq, comms, mu, max_iters, seed)
+
+    # Finally, generate the benchmark graph based on the given
+    # communities, joining nodes according to the intra- and
+    # inter-community degrees.
+    G = nx.Graph()
+    G.add_nodes_from(range(n))
+    for c in communities:
+        for u in c:
+            while G.degree(u) < round(deg_seq[u] * (1 - mu)):
+                v = seed.choice(list(c))
+                G.add_edge(u, v)
+            while G.degree(u) < deg_seq[u]:
+                v = seed.choice(range(n))
+                if v not in c:
+                    G.add_edge(u, v)
+            G.nodes[u]['community'] = c
+    return G
author	guerler
date	Fri, 31 Jul 2020 00:32:28 -0400
parents
children