Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/networkx/algorithms/clique.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 # Copyright (C) 2004-2019 by | |
2 # Aric Hagberg <hagberg@lanl.gov> | |
3 # Dan Schult <dschult@colgate.edu> | |
4 # Pieter Swart <swart@lanl.gov> | |
5 # All rights reserved. | |
6 # BSD license. | |
7 """Functions for finding and manipulating cliques. | |
8 | |
9 Finding the largest clique in a graph is NP-complete problem, so most of | |
10 these algorithms have an exponential running time; for more information, | |
11 see the Wikipedia article on the clique problem [1]_. | |
12 | |
13 .. [1] clique problem:: https://en.wikipedia.org/wiki/Clique_problem | |
14 | |
15 """ | |
16 from collections import deque | |
17 from itertools import chain | |
18 from itertools import combinations | |
19 from itertools import islice | |
20 try: | |
21 from itertools import ifilter as filter | |
22 except ImportError: | |
23 pass | |
24 import networkx as nx | |
25 from networkx.utils import not_implemented_for | |
26 __author__ = """Dan Schult (dschult@colgate.edu)""" | |
27 __all__ = ['find_cliques', 'find_cliques_recursive', 'make_max_clique_graph', | |
28 'make_clique_bipartite', 'graph_clique_number', | |
29 'graph_number_of_cliques', 'node_clique_number', | |
30 'number_of_cliques', 'cliques_containing_node', | |
31 'enumerate_all_cliques'] | |
32 | |
33 | |
34 @not_implemented_for('directed') | |
35 def enumerate_all_cliques(G): | |
36 """Returns all cliques in an undirected graph. | |
37 | |
38 This function returns an iterator over cliques, each of which is a | |
39 list of nodes. The iteration is ordered by cardinality of the | |
40 cliques: first all cliques of size one, then all cliques of size | |
41 two, etc. | |
42 | |
43 Parameters | |
44 ---------- | |
45 G : NetworkX graph | |
46 An undirected graph. | |
47 | |
48 Returns | |
49 ------- | |
50 iterator | |
51 An iterator over cliques, each of which is a list of nodes in | |
52 `G`. The cliques are ordered according to size. | |
53 | |
54 Notes | |
55 ----- | |
56 To obtain a list of all cliques, use | |
57 `list(enumerate_all_cliques(G))`. However, be aware that in the | |
58 worst-case, the length of this list can be exponential in the number | |
59 of nodes in the graph (for example, when the graph is the complete | |
60 graph). This function avoids storing all cliques in memory by only | |
61 keeping current candidate node lists in memory during its search. | |
62 | |
63 The implementation is adapted from the algorithm by Zhang, et | |
64 al. (2005) [1]_ to output all cliques discovered. | |
65 | |
66 This algorithm ignores self-loops and parallel edges, since cliques | |
67 are not conventionally defined with such edges. | |
68 | |
69 References | |
70 ---------- | |
71 .. [1] Yun Zhang, Abu-Khzam, F.N., Baldwin, N.E., Chesler, E.J., | |
72 Langston, M.A., Samatova, N.F., | |
73 "Genome-Scale Computational Approaches to Memory-Intensive | |
74 Applications in Systems Biology". | |
75 *Supercomputing*, 2005. Proceedings of the ACM/IEEE SC 2005 | |
76 Conference, pp. 12, 12--18 Nov. 2005. | |
77 <https://doi.org/10.1109/SC.2005.29>. | |
78 | |
79 """ | |
80 index = {} | |
81 nbrs = {} | |
82 for u in G: | |
83 index[u] = len(index) | |
84 # Neighbors of u that appear after u in the iteration order of G. | |
85 nbrs[u] = {v for v in G[u] if v not in index} | |
86 | |
87 queue = deque(([u], sorted(nbrs[u], key=index.__getitem__)) for u in G) | |
88 # Loop invariants: | |
89 # 1. len(base) is nondecreasing. | |
90 # 2. (base + cnbrs) is sorted with respect to the iteration order of G. | |
91 # 3. cnbrs is a set of common neighbors of nodes in base. | |
92 while queue: | |
93 base, cnbrs = map(list, queue.popleft()) | |
94 yield base | |
95 for i, u in enumerate(cnbrs): | |
96 # Use generators to reduce memory consumption. | |
97 queue.append((chain(base, [u]), | |
98 filter(nbrs[u].__contains__, | |
99 islice(cnbrs, i + 1, None)))) | |
100 | |
101 | |
102 @not_implemented_for('directed') | |
103 def find_cliques(G): | |
104 """Returns all maximal cliques in an undirected graph. | |
105 | |
106 For each node *v*, a *maximal clique for v* is a largest complete | |
107 subgraph containing *v*. The largest maximal clique is sometimes | |
108 called the *maximum clique*. | |
109 | |
110 This function returns an iterator over cliques, each of which is a | |
111 list of nodes. It is an iterative implementation, so should not | |
112 suffer from recursion depth issues. | |
113 | |
114 Parameters | |
115 ---------- | |
116 G : NetworkX graph | |
117 An undirected graph. | |
118 | |
119 Returns | |
120 ------- | |
121 iterator | |
122 An iterator over maximal cliques, each of which is a list of | |
123 nodes in `G`. The order of cliques is arbitrary. | |
124 | |
125 See Also | |
126 -------- | |
127 find_cliques_recursive | |
128 A recursive version of the same algorithm. | |
129 | |
130 Notes | |
131 ----- | |
132 To obtain a list of all maximal cliques, use | |
133 `list(find_cliques(G))`. However, be aware that in the worst-case, | |
134 the length of this list can be exponential in the number of nodes in | |
135 the graph. This function avoids storing all cliques in memory by | |
136 only keeping current candidate node lists in memory during its search. | |
137 | |
138 This implementation is based on the algorithm published by Bron and | |
139 Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi | |
140 (2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. It | |
141 essentially unrolls the recursion used in the references to avoid | |
142 issues of recursion stack depth (for a recursive implementation, see | |
143 :func:`find_cliques_recursive`). | |
144 | |
145 This algorithm ignores self-loops and parallel edges, since cliques | |
146 are not conventionally defined with such edges. | |
147 | |
148 References | |
149 ---------- | |
150 .. [1] Bron, C. and Kerbosch, J. | |
151 "Algorithm 457: finding all cliques of an undirected graph". | |
152 *Communications of the ACM* 16, 9 (Sep. 1973), 575--577. | |
153 <http://portal.acm.org/citation.cfm?doid=362342.362367> | |
154 | |
155 .. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi, | |
156 "The worst-case time complexity for generating all maximal | |
157 cliques and computational experiments", | |
158 *Theoretical Computer Science*, Volume 363, Issue 1, | |
159 Computing and Combinatorics, | |
160 10th Annual International Conference on | |
161 Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 28--42 | |
162 <https://doi.org/10.1016/j.tcs.2006.06.015> | |
163 | |
164 .. [3] F. Cazals, C. Karande, | |
165 "A note on the problem of reporting maximal cliques", | |
166 *Theoretical Computer Science*, | |
167 Volume 407, Issues 1--3, 6 November 2008, Pages 564--568, | |
168 <https://doi.org/10.1016/j.tcs.2008.05.010> | |
169 | |
170 """ | |
171 if len(G) == 0: | |
172 return | |
173 | |
174 adj = {u: {v for v in G[u] if v != u} for u in G} | |
175 Q = [None] | |
176 | |
177 subg = set(G) | |
178 cand = set(G) | |
179 u = max(subg, key=lambda u: len(cand & adj[u])) | |
180 ext_u = cand - adj[u] | |
181 stack = [] | |
182 | |
183 try: | |
184 while True: | |
185 if ext_u: | |
186 q = ext_u.pop() | |
187 cand.remove(q) | |
188 Q[-1] = q | |
189 adj_q = adj[q] | |
190 subg_q = subg & adj_q | |
191 if not subg_q: | |
192 yield Q[:] | |
193 else: | |
194 cand_q = cand & adj_q | |
195 if cand_q: | |
196 stack.append((subg, cand, ext_u)) | |
197 Q.append(None) | |
198 subg = subg_q | |
199 cand = cand_q | |
200 u = max(subg, key=lambda u: len(cand & adj[u])) | |
201 ext_u = cand - adj[u] | |
202 else: | |
203 Q.pop() | |
204 subg, cand, ext_u = stack.pop() | |
205 except IndexError: | |
206 pass | |
207 | |
208 | |
209 # TODO Should this also be not implemented for directed graphs? | |
210 def find_cliques_recursive(G): | |
211 """Returns all maximal cliques in a graph. | |
212 | |
213 For each node *v*, a *maximal clique for v* is a largest complete | |
214 subgraph containing *v*. The largest maximal clique is sometimes | |
215 called the *maximum clique*. | |
216 | |
217 This function returns an iterator over cliques, each of which is a | |
218 list of nodes. It is a recursive implementation, so may suffer from | |
219 recursion depth issues. | |
220 | |
221 Parameters | |
222 ---------- | |
223 G : NetworkX graph | |
224 | |
225 Returns | |
226 ------- | |
227 iterator | |
228 An iterator over maximal cliques, each of which is a list of | |
229 nodes in `G`. The order of cliques is arbitrary. | |
230 | |
231 See Also | |
232 -------- | |
233 find_cliques | |
234 An iterative version of the same algorithm. | |
235 | |
236 Notes | |
237 ----- | |
238 To obtain a list of all maximal cliques, use | |
239 `list(find_cliques_recursive(G))`. However, be aware that in the | |
240 worst-case, the length of this list can be exponential in the number | |
241 of nodes in the graph. This function avoids storing all cliques in memory | |
242 by only keeping current candidate node lists in memory during its search. | |
243 | |
244 This implementation is based on the algorithm published by Bron and | |
245 Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi | |
246 (2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. For a | |
247 non-recursive implementation, see :func:`find_cliques`. | |
248 | |
249 This algorithm ignores self-loops and parallel edges, since cliques | |
250 are not conventionally defined with such edges. | |
251 | |
252 References | |
253 ---------- | |
254 .. [1] Bron, C. and Kerbosch, J. | |
255 "Algorithm 457: finding all cliques of an undirected graph". | |
256 *Communications of the ACM* 16, 9 (Sep. 1973), 575--577. | |
257 <http://portal.acm.org/citation.cfm?doid=362342.362367> | |
258 | |
259 .. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi, | |
260 "The worst-case time complexity for generating all maximal | |
261 cliques and computational experiments", | |
262 *Theoretical Computer Science*, Volume 363, Issue 1, | |
263 Computing and Combinatorics, | |
264 10th Annual International Conference on | |
265 Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 28--42 | |
266 <https://doi.org/10.1016/j.tcs.2006.06.015> | |
267 | |
268 .. [3] F. Cazals, C. Karande, | |
269 "A note on the problem of reporting maximal cliques", | |
270 *Theoretical Computer Science*, | |
271 Volume 407, Issues 1--3, 6 November 2008, Pages 564--568, | |
272 <https://doi.org/10.1016/j.tcs.2008.05.010> | |
273 | |
274 """ | |
275 if len(G) == 0: | |
276 return iter([]) | |
277 | |
278 adj = {u: {v for v in G[u] if v != u} for u in G} | |
279 Q = [] | |
280 | |
281 def expand(subg, cand): | |
282 u = max(subg, key=lambda u: len(cand & adj[u])) | |
283 for q in cand - adj[u]: | |
284 cand.remove(q) | |
285 Q.append(q) | |
286 adj_q = adj[q] | |
287 subg_q = subg & adj_q | |
288 if not subg_q: | |
289 yield Q[:] | |
290 else: | |
291 cand_q = cand & adj_q | |
292 if cand_q: | |
293 for clique in expand(subg_q, cand_q): | |
294 yield clique | |
295 Q.pop() | |
296 | |
297 return expand(set(G), set(G)) | |
298 | |
299 | |
300 def make_max_clique_graph(G, create_using=None): | |
301 """Returns the maximal clique graph of the given graph. | |
302 | |
303 The nodes of the maximal clique graph of `G` are the cliques of | |
304 `G` and an edge joins two cliques if the cliques are not disjoint. | |
305 | |
306 Parameters | |
307 ---------- | |
308 G : NetworkX graph | |
309 | |
310 create_using : NetworkX graph constructor, optional (default=nx.Graph) | |
311 Graph type to create. If graph instance, then cleared before populated. | |
312 | |
313 Returns | |
314 ------- | |
315 NetworkX graph | |
316 A graph whose nodes are the cliques of `G` and whose edges | |
317 join two cliques if they are not disjoint. | |
318 | |
319 Notes | |
320 ----- | |
321 This function behaves like the following code:: | |
322 | |
323 import networkx as nx | |
324 G = nx.make_clique_bipartite(G) | |
325 cliques = [v for v in G.nodes() if G.nodes[v]['bipartite'] == 0] | |
326 G = nx.bipartite.project(G, cliques) | |
327 G = nx.relabel_nodes(G, {-v: v - 1 for v in G}) | |
328 | |
329 It should be faster, though, since it skips all the intermediate | |
330 steps. | |
331 | |
332 """ | |
333 if create_using is None: | |
334 B = G.__class__() | |
335 else: | |
336 B = nx.empty_graph(0, create_using) | |
337 cliques = list(enumerate(set(c) for c in find_cliques(G))) | |
338 # Add a numbered node for each clique. | |
339 B.add_nodes_from(i for i, c in cliques) | |
340 # Join cliques by an edge if they share a node. | |
341 clique_pairs = combinations(cliques, 2) | |
342 B.add_edges_from((i, j) for (i, c1), (j, c2) in clique_pairs if c1 & c2) | |
343 return B | |
344 | |
345 | |
346 def make_clique_bipartite(G, fpos=None, create_using=None, name=None): | |
347 """Returns the bipartite clique graph corresponding to `G`. | |
348 | |
349 In the returned bipartite graph, the "bottom" nodes are the nodes of | |
350 `G` and the "top" nodes represent the maximal cliques of `G`. | |
351 There is an edge from node *v* to clique *C* in the returned graph | |
352 if and only if *v* is an element of *C*. | |
353 | |
354 Parameters | |
355 ---------- | |
356 G : NetworkX graph | |
357 An undirected graph. | |
358 | |
359 fpos : bool | |
360 If True or not None, the returned graph will have an | |
361 additional attribute, `pos`, a dictionary mapping node to | |
362 position in the Euclidean plane. | |
363 | |
364 create_using : NetworkX graph constructor, optional (default=nx.Graph) | |
365 Graph type to create. If graph instance, then cleared before populated. | |
366 | |
367 Returns | |
368 ------- | |
369 NetworkX graph | |
370 A bipartite graph whose "bottom" set is the nodes of the graph | |
371 `G`, whose "top" set is the cliques of `G`, and whose edges | |
372 join nodes of `G` to the cliques that contain them. | |
373 | |
374 The nodes of the graph `G` have the node attribute | |
375 'bipartite' set to 1 and the nodes representing cliques | |
376 have the node attribute 'bipartite' set to 0, as is the | |
377 convention for bipartite graphs in NetworkX. | |
378 | |
379 """ | |
380 B = nx.empty_graph(0, create_using) | |
381 B.clear() | |
382 # The "bottom" nodes in the bipartite graph are the nodes of the | |
383 # original graph, G. | |
384 B.add_nodes_from(G, bipartite=1) | |
385 for i, cl in enumerate(find_cliques(G)): | |
386 # The "top" nodes in the bipartite graph are the cliques. These | |
387 # nodes get negative numbers as labels. | |
388 name = -i - 1 | |
389 B.add_node(name, bipartite=0) | |
390 B.add_edges_from((v, name) for v in cl) | |
391 return B | |
392 | |
393 | |
394 def graph_clique_number(G, cliques=None): | |
395 """Returns the clique number of the graph. | |
396 | |
397 The *clique number* of a graph is the size of the largest clique in | |
398 the graph. | |
399 | |
400 Parameters | |
401 ---------- | |
402 G : NetworkX graph | |
403 An undirected graph. | |
404 | |
405 cliques : list | |
406 A list of cliques, each of which is itself a list of nodes. If | |
407 not specified, the list of all cliques will be computed, as by | |
408 :func:`find_cliques`. | |
409 | |
410 Returns | |
411 ------- | |
412 int | |
413 The size of the largest clique in `G`. | |
414 | |
415 Notes | |
416 ----- | |
417 You should provide `cliques` if you have already computed the list | |
418 of maximal cliques, in order to avoid an exponential time search for | |
419 maximal cliques. | |
420 | |
421 """ | |
422 if cliques is None: | |
423 cliques = find_cliques(G) | |
424 if len(G.nodes) < 1: | |
425 return 0 | |
426 return max([len(c) for c in cliques] or [1]) | |
427 | |
428 | |
429 def graph_number_of_cliques(G, cliques=None): | |
430 """Returns the number of maximal cliques in the graph. | |
431 | |
432 Parameters | |
433 ---------- | |
434 G : NetworkX graph | |
435 An undirected graph. | |
436 | |
437 cliques : list | |
438 A list of cliques, each of which is itself a list of nodes. If | |
439 not specified, the list of all cliques will be computed, as by | |
440 :func:`find_cliques`. | |
441 | |
442 Returns | |
443 ------- | |
444 int | |
445 The number of maximal cliques in `G`. | |
446 | |
447 Notes | |
448 ----- | |
449 You should provide `cliques` if you have already computed the list | |
450 of maximal cliques, in order to avoid an exponential time search for | |
451 maximal cliques. | |
452 | |
453 """ | |
454 if cliques is None: | |
455 cliques = list(find_cliques(G)) | |
456 return len(cliques) | |
457 | |
458 | |
459 def node_clique_number(G, nodes=None, cliques=None): | |
460 """ Returns the size of the largest maximal clique containing | |
461 each given node. | |
462 | |
463 Returns a single or list depending on input nodes. | |
464 Optional list of cliques can be input if already computed. | |
465 """ | |
466 if cliques is None: | |
467 if nodes is not None: | |
468 # Use ego_graph to decrease size of graph | |
469 if isinstance(nodes, list): | |
470 d = {} | |
471 for n in nodes: | |
472 H = nx.ego_graph(G, n) | |
473 d[n] = max((len(c) for c in find_cliques(H))) | |
474 else: | |
475 H = nx.ego_graph(G, nodes) | |
476 d = max((len(c) for c in find_cliques(H))) | |
477 return d | |
478 # nodes is None--find all cliques | |
479 cliques = list(find_cliques(G)) | |
480 | |
481 if nodes is None: | |
482 nodes = list(G.nodes()) # none, get entire graph | |
483 | |
484 if not isinstance(nodes, list): # check for a list | |
485 v = nodes | |
486 # assume it is a single value | |
487 d = max([len(c) for c in cliques if v in c]) | |
488 else: | |
489 d = {} | |
490 for v in nodes: | |
491 d[v] = max([len(c) for c in cliques if v in c]) | |
492 return d | |
493 | |
494 # if nodes is None: # none, use entire graph | |
495 # nodes=G.nodes() | |
496 # elif not isinstance(nodes, list): # check for a list | |
497 # nodes=[nodes] # assume it is a single value | |
498 | |
499 # if cliques is None: | |
500 # cliques=list(find_cliques(G)) | |
501 # d={} | |
502 # for v in nodes: | |
503 # d[v]=max([len(c) for c in cliques if v in c]) | |
504 | |
505 # if nodes in G: | |
506 # return d[v] #return single value | |
507 # return d | |
508 | |
509 | |
510 def number_of_cliques(G, nodes=None, cliques=None): | |
511 """Returns the number of maximal cliques for each node. | |
512 | |
513 Returns a single or list depending on input nodes. | |
514 Optional list of cliques can be input if already computed. | |
515 """ | |
516 if cliques is None: | |
517 cliques = list(find_cliques(G)) | |
518 | |
519 if nodes is None: | |
520 nodes = list(G.nodes()) # none, get entire graph | |
521 | |
522 if not isinstance(nodes, list): # check for a list | |
523 v = nodes | |
524 # assume it is a single value | |
525 numcliq = len([1 for c in cliques if v in c]) | |
526 else: | |
527 numcliq = {} | |
528 for v in nodes: | |
529 numcliq[v] = len([1 for c in cliques if v in c]) | |
530 return numcliq | |
531 | |
532 | |
533 def cliques_containing_node(G, nodes=None, cliques=None): | |
534 """Returns a list of cliques containing the given node. | |
535 | |
536 Returns a single list or list of lists depending on input nodes. | |
537 Optional list of cliques can be input if already computed. | |
538 """ | |
539 if cliques is None: | |
540 cliques = list(find_cliques(G)) | |
541 | |
542 if nodes is None: | |
543 nodes = list(G.nodes()) # none, get entire graph | |
544 | |
545 if not isinstance(nodes, list): # check for a list | |
546 v = nodes | |
547 # assume it is a single value | |
548 vcliques = [c for c in cliques if v in c] | |
549 else: | |
550 vcliques = {} | |
551 for v in nodes: | |
552 vcliques[v] = [c for c in cliques if v in c] | |
553 return vcliques |