comparison planemo/lib/python3.7/site-packages/networkx/algorithms/clique.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 # Copyright (C) 2004-2019 by
2 # Aric Hagberg <hagberg@lanl.gov>
3 # Dan Schult <dschult@colgate.edu>
4 # Pieter Swart <swart@lanl.gov>
5 # All rights reserved.
6 # BSD license.
7 """Functions for finding and manipulating cliques.
8
9 Finding the largest clique in a graph is NP-complete problem, so most of
10 these algorithms have an exponential running time; for more information,
11 see the Wikipedia article on the clique problem [1]_.
12
13 .. [1] clique problem:: https://en.wikipedia.org/wiki/Clique_problem
14
15 """
16 from collections import deque
17 from itertools import chain
18 from itertools import combinations
19 from itertools import islice
20 try:
21 from itertools import ifilter as filter
22 except ImportError:
23 pass
24 import networkx as nx
25 from networkx.utils import not_implemented_for
26 __author__ = """Dan Schult (dschult@colgate.edu)"""
27 __all__ = ['find_cliques', 'find_cliques_recursive', 'make_max_clique_graph',
28 'make_clique_bipartite', 'graph_clique_number',
29 'graph_number_of_cliques', 'node_clique_number',
30 'number_of_cliques', 'cliques_containing_node',
31 'enumerate_all_cliques']
32
33
34 @not_implemented_for('directed')
35 def enumerate_all_cliques(G):
36 """Returns all cliques in an undirected graph.
37
38 This function returns an iterator over cliques, each of which is a
39 list of nodes. The iteration is ordered by cardinality of the
40 cliques: first all cliques of size one, then all cliques of size
41 two, etc.
42
43 Parameters
44 ----------
45 G : NetworkX graph
46 An undirected graph.
47
48 Returns
49 -------
50 iterator
51 An iterator over cliques, each of which is a list of nodes in
52 `G`. The cliques are ordered according to size.
53
54 Notes
55 -----
56 To obtain a list of all cliques, use
57 `list(enumerate_all_cliques(G))`. However, be aware that in the
58 worst-case, the length of this list can be exponential in the number
59 of nodes in the graph (for example, when the graph is the complete
60 graph). This function avoids storing all cliques in memory by only
61 keeping current candidate node lists in memory during its search.
62
63 The implementation is adapted from the algorithm by Zhang, et
64 al. (2005) [1]_ to output all cliques discovered.
65
66 This algorithm ignores self-loops and parallel edges, since cliques
67 are not conventionally defined with such edges.
68
69 References
70 ----------
71 .. [1] Yun Zhang, Abu-Khzam, F.N., Baldwin, N.E., Chesler, E.J.,
72 Langston, M.A., Samatova, N.F.,
73 "Genome-Scale Computational Approaches to Memory-Intensive
74 Applications in Systems Biology".
75 *Supercomputing*, 2005. Proceedings of the ACM/IEEE SC 2005
76 Conference, pp. 12, 12--18 Nov. 2005.
77 <https://doi.org/10.1109/SC.2005.29>.
78
79 """
80 index = {}
81 nbrs = {}
82 for u in G:
83 index[u] = len(index)
84 # Neighbors of u that appear after u in the iteration order of G.
85 nbrs[u] = {v for v in G[u] if v not in index}
86
87 queue = deque(([u], sorted(nbrs[u], key=index.__getitem__)) for u in G)
88 # Loop invariants:
89 # 1. len(base) is nondecreasing.
90 # 2. (base + cnbrs) is sorted with respect to the iteration order of G.
91 # 3. cnbrs is a set of common neighbors of nodes in base.
92 while queue:
93 base, cnbrs = map(list, queue.popleft())
94 yield base
95 for i, u in enumerate(cnbrs):
96 # Use generators to reduce memory consumption.
97 queue.append((chain(base, [u]),
98 filter(nbrs[u].__contains__,
99 islice(cnbrs, i + 1, None))))
100
101
102 @not_implemented_for('directed')
103 def find_cliques(G):
104 """Returns all maximal cliques in an undirected graph.
105
106 For each node *v*, a *maximal clique for v* is a largest complete
107 subgraph containing *v*. The largest maximal clique is sometimes
108 called the *maximum clique*.
109
110 This function returns an iterator over cliques, each of which is a
111 list of nodes. It is an iterative implementation, so should not
112 suffer from recursion depth issues.
113
114 Parameters
115 ----------
116 G : NetworkX graph
117 An undirected graph.
118
119 Returns
120 -------
121 iterator
122 An iterator over maximal cliques, each of which is a list of
123 nodes in `G`. The order of cliques is arbitrary.
124
125 See Also
126 --------
127 find_cliques_recursive
128 A recursive version of the same algorithm.
129
130 Notes
131 -----
132 To obtain a list of all maximal cliques, use
133 `list(find_cliques(G))`. However, be aware that in the worst-case,
134 the length of this list can be exponential in the number of nodes in
135 the graph. This function avoids storing all cliques in memory by
136 only keeping current candidate node lists in memory during its search.
137
138 This implementation is based on the algorithm published by Bron and
139 Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi
140 (2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. It
141 essentially unrolls the recursion used in the references to avoid
142 issues of recursion stack depth (for a recursive implementation, see
143 :func:`find_cliques_recursive`).
144
145 This algorithm ignores self-loops and parallel edges, since cliques
146 are not conventionally defined with such edges.
147
148 References
149 ----------
150 .. [1] Bron, C. and Kerbosch, J.
151 "Algorithm 457: finding all cliques of an undirected graph".
152 *Communications of the ACM* 16, 9 (Sep. 1973), 575--577.
153 <http://portal.acm.org/citation.cfm?doid=362342.362367>
154
155 .. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi,
156 "The worst-case time complexity for generating all maximal
157 cliques and computational experiments",
158 *Theoretical Computer Science*, Volume 363, Issue 1,
159 Computing and Combinatorics,
160 10th Annual International Conference on
161 Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 28--42
162 <https://doi.org/10.1016/j.tcs.2006.06.015>
163
164 .. [3] F. Cazals, C. Karande,
165 "A note on the problem of reporting maximal cliques",
166 *Theoretical Computer Science*,
167 Volume 407, Issues 1--3, 6 November 2008, Pages 564--568,
168 <https://doi.org/10.1016/j.tcs.2008.05.010>
169
170 """
171 if len(G) == 0:
172 return
173
174 adj = {u: {v for v in G[u] if v != u} for u in G}
175 Q = [None]
176
177 subg = set(G)
178 cand = set(G)
179 u = max(subg, key=lambda u: len(cand & adj[u]))
180 ext_u = cand - adj[u]
181 stack = []
182
183 try:
184 while True:
185 if ext_u:
186 q = ext_u.pop()
187 cand.remove(q)
188 Q[-1] = q
189 adj_q = adj[q]
190 subg_q = subg & adj_q
191 if not subg_q:
192 yield Q[:]
193 else:
194 cand_q = cand & adj_q
195 if cand_q:
196 stack.append((subg, cand, ext_u))
197 Q.append(None)
198 subg = subg_q
199 cand = cand_q
200 u = max(subg, key=lambda u: len(cand & adj[u]))
201 ext_u = cand - adj[u]
202 else:
203 Q.pop()
204 subg, cand, ext_u = stack.pop()
205 except IndexError:
206 pass
207
208
209 # TODO Should this also be not implemented for directed graphs?
210 def find_cliques_recursive(G):
211 """Returns all maximal cliques in a graph.
212
213 For each node *v*, a *maximal clique for v* is a largest complete
214 subgraph containing *v*. The largest maximal clique is sometimes
215 called the *maximum clique*.
216
217 This function returns an iterator over cliques, each of which is a
218 list of nodes. It is a recursive implementation, so may suffer from
219 recursion depth issues.
220
221 Parameters
222 ----------
223 G : NetworkX graph
224
225 Returns
226 -------
227 iterator
228 An iterator over maximal cliques, each of which is a list of
229 nodes in `G`. The order of cliques is arbitrary.
230
231 See Also
232 --------
233 find_cliques
234 An iterative version of the same algorithm.
235
236 Notes
237 -----
238 To obtain a list of all maximal cliques, use
239 `list(find_cliques_recursive(G))`. However, be aware that in the
240 worst-case, the length of this list can be exponential in the number
241 of nodes in the graph. This function avoids storing all cliques in memory
242 by only keeping current candidate node lists in memory during its search.
243
244 This implementation is based on the algorithm published by Bron and
245 Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi
246 (2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. For a
247 non-recursive implementation, see :func:`find_cliques`.
248
249 This algorithm ignores self-loops and parallel edges, since cliques
250 are not conventionally defined with such edges.
251
252 References
253 ----------
254 .. [1] Bron, C. and Kerbosch, J.
255 "Algorithm 457: finding all cliques of an undirected graph".
256 *Communications of the ACM* 16, 9 (Sep. 1973), 575--577.
257 <http://portal.acm.org/citation.cfm?doid=362342.362367>
258
259 .. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi,
260 "The worst-case time complexity for generating all maximal
261 cliques and computational experiments",
262 *Theoretical Computer Science*, Volume 363, Issue 1,
263 Computing and Combinatorics,
264 10th Annual International Conference on
265 Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 28--42
266 <https://doi.org/10.1016/j.tcs.2006.06.015>
267
268 .. [3] F. Cazals, C. Karande,
269 "A note on the problem of reporting maximal cliques",
270 *Theoretical Computer Science*,
271 Volume 407, Issues 1--3, 6 November 2008, Pages 564--568,
272 <https://doi.org/10.1016/j.tcs.2008.05.010>
273
274 """
275 if len(G) == 0:
276 return iter([])
277
278 adj = {u: {v for v in G[u] if v != u} for u in G}
279 Q = []
280
281 def expand(subg, cand):
282 u = max(subg, key=lambda u: len(cand & adj[u]))
283 for q in cand - adj[u]:
284 cand.remove(q)
285 Q.append(q)
286 adj_q = adj[q]
287 subg_q = subg & adj_q
288 if not subg_q:
289 yield Q[:]
290 else:
291 cand_q = cand & adj_q
292 if cand_q:
293 for clique in expand(subg_q, cand_q):
294 yield clique
295 Q.pop()
296
297 return expand(set(G), set(G))
298
299
300 def make_max_clique_graph(G, create_using=None):
301 """Returns the maximal clique graph of the given graph.
302
303 The nodes of the maximal clique graph of `G` are the cliques of
304 `G` and an edge joins two cliques if the cliques are not disjoint.
305
306 Parameters
307 ----------
308 G : NetworkX graph
309
310 create_using : NetworkX graph constructor, optional (default=nx.Graph)
311 Graph type to create. If graph instance, then cleared before populated.
312
313 Returns
314 -------
315 NetworkX graph
316 A graph whose nodes are the cliques of `G` and whose edges
317 join two cliques if they are not disjoint.
318
319 Notes
320 -----
321 This function behaves like the following code::
322
323 import networkx as nx
324 G = nx.make_clique_bipartite(G)
325 cliques = [v for v in G.nodes() if G.nodes[v]['bipartite'] == 0]
326 G = nx.bipartite.project(G, cliques)
327 G = nx.relabel_nodes(G, {-v: v - 1 for v in G})
328
329 It should be faster, though, since it skips all the intermediate
330 steps.
331
332 """
333 if create_using is None:
334 B = G.__class__()
335 else:
336 B = nx.empty_graph(0, create_using)
337 cliques = list(enumerate(set(c) for c in find_cliques(G)))
338 # Add a numbered node for each clique.
339 B.add_nodes_from(i for i, c in cliques)
340 # Join cliques by an edge if they share a node.
341 clique_pairs = combinations(cliques, 2)
342 B.add_edges_from((i, j) for (i, c1), (j, c2) in clique_pairs if c1 & c2)
343 return B
344
345
346 def make_clique_bipartite(G, fpos=None, create_using=None, name=None):
347 """Returns the bipartite clique graph corresponding to `G`.
348
349 In the returned bipartite graph, the "bottom" nodes are the nodes of
350 `G` and the "top" nodes represent the maximal cliques of `G`.
351 There is an edge from node *v* to clique *C* in the returned graph
352 if and only if *v* is an element of *C*.
353
354 Parameters
355 ----------
356 G : NetworkX graph
357 An undirected graph.
358
359 fpos : bool
360 If True or not None, the returned graph will have an
361 additional attribute, `pos`, a dictionary mapping node to
362 position in the Euclidean plane.
363
364 create_using : NetworkX graph constructor, optional (default=nx.Graph)
365 Graph type to create. If graph instance, then cleared before populated.
366
367 Returns
368 -------
369 NetworkX graph
370 A bipartite graph whose "bottom" set is the nodes of the graph
371 `G`, whose "top" set is the cliques of `G`, and whose edges
372 join nodes of `G` to the cliques that contain them.
373
374 The nodes of the graph `G` have the node attribute
375 'bipartite' set to 1 and the nodes representing cliques
376 have the node attribute 'bipartite' set to 0, as is the
377 convention for bipartite graphs in NetworkX.
378
379 """
380 B = nx.empty_graph(0, create_using)
381 B.clear()
382 # The "bottom" nodes in the bipartite graph are the nodes of the
383 # original graph, G.
384 B.add_nodes_from(G, bipartite=1)
385 for i, cl in enumerate(find_cliques(G)):
386 # The "top" nodes in the bipartite graph are the cliques. These
387 # nodes get negative numbers as labels.
388 name = -i - 1
389 B.add_node(name, bipartite=0)
390 B.add_edges_from((v, name) for v in cl)
391 return B
392
393
394 def graph_clique_number(G, cliques=None):
395 """Returns the clique number of the graph.
396
397 The *clique number* of a graph is the size of the largest clique in
398 the graph.
399
400 Parameters
401 ----------
402 G : NetworkX graph
403 An undirected graph.
404
405 cliques : list
406 A list of cliques, each of which is itself a list of nodes. If
407 not specified, the list of all cliques will be computed, as by
408 :func:`find_cliques`.
409
410 Returns
411 -------
412 int
413 The size of the largest clique in `G`.
414
415 Notes
416 -----
417 You should provide `cliques` if you have already computed the list
418 of maximal cliques, in order to avoid an exponential time search for
419 maximal cliques.
420
421 """
422 if cliques is None:
423 cliques = find_cliques(G)
424 if len(G.nodes) < 1:
425 return 0
426 return max([len(c) for c in cliques] or [1])
427
428
429 def graph_number_of_cliques(G, cliques=None):
430 """Returns the number of maximal cliques in the graph.
431
432 Parameters
433 ----------
434 G : NetworkX graph
435 An undirected graph.
436
437 cliques : list
438 A list of cliques, each of which is itself a list of nodes. If
439 not specified, the list of all cliques will be computed, as by
440 :func:`find_cliques`.
441
442 Returns
443 -------
444 int
445 The number of maximal cliques in `G`.
446
447 Notes
448 -----
449 You should provide `cliques` if you have already computed the list
450 of maximal cliques, in order to avoid an exponential time search for
451 maximal cliques.
452
453 """
454 if cliques is None:
455 cliques = list(find_cliques(G))
456 return len(cliques)
457
458
459 def node_clique_number(G, nodes=None, cliques=None):
460 """ Returns the size of the largest maximal clique containing
461 each given node.
462
463 Returns a single or list depending on input nodes.
464 Optional list of cliques can be input if already computed.
465 """
466 if cliques is None:
467 if nodes is not None:
468 # Use ego_graph to decrease size of graph
469 if isinstance(nodes, list):
470 d = {}
471 for n in nodes:
472 H = nx.ego_graph(G, n)
473 d[n] = max((len(c) for c in find_cliques(H)))
474 else:
475 H = nx.ego_graph(G, nodes)
476 d = max((len(c) for c in find_cliques(H)))
477 return d
478 # nodes is None--find all cliques
479 cliques = list(find_cliques(G))
480
481 if nodes is None:
482 nodes = list(G.nodes()) # none, get entire graph
483
484 if not isinstance(nodes, list): # check for a list
485 v = nodes
486 # assume it is a single value
487 d = max([len(c) for c in cliques if v in c])
488 else:
489 d = {}
490 for v in nodes:
491 d[v] = max([len(c) for c in cliques if v in c])
492 return d
493
494 # if nodes is None: # none, use entire graph
495 # nodes=G.nodes()
496 # elif not isinstance(nodes, list): # check for a list
497 # nodes=[nodes] # assume it is a single value
498
499 # if cliques is None:
500 # cliques=list(find_cliques(G))
501 # d={}
502 # for v in nodes:
503 # d[v]=max([len(c) for c in cliques if v in c])
504
505 # if nodes in G:
506 # return d[v] #return single value
507 # return d
508
509
510 def number_of_cliques(G, nodes=None, cliques=None):
511 """Returns the number of maximal cliques for each node.
512
513 Returns a single or list depending on input nodes.
514 Optional list of cliques can be input if already computed.
515 """
516 if cliques is None:
517 cliques = list(find_cliques(G))
518
519 if nodes is None:
520 nodes = list(G.nodes()) # none, get entire graph
521
522 if not isinstance(nodes, list): # check for a list
523 v = nodes
524 # assume it is a single value
525 numcliq = len([1 for c in cliques if v in c])
526 else:
527 numcliq = {}
528 for v in nodes:
529 numcliq[v] = len([1 for c in cliques if v in c])
530 return numcliq
531
532
533 def cliques_containing_node(G, nodes=None, cliques=None):
534 """Returns a list of cliques containing the given node.
535
536 Returns a single list or list of lists depending on input nodes.
537 Optional list of cliques can be input if already computed.
538 """
539 if cliques is None:
540 cliques = list(find_cliques(G))
541
542 if nodes is None:
543 nodes = list(G.nodes()) # none, get entire graph
544
545 if not isinstance(nodes, list): # check for a list
546 v = nodes
547 # assume it is a single value
548 vcliques = [c for c in cliques if v in c]
549 else:
550 vcliques = {}
551 for v in nodes:
552 vcliques[v] = [c for c in cliques if v in c]
553 return vcliques