comparison get_clusters.py @ 0:4f3222cb5cf6 draft

"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 79589d149a8ff2791d4f71d28b155011672db827"
author chemteam
date Fri, 11 Sep 2020 21:54:45 +0000
parents
children afcb925def69
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3222cb5cf6
1 import argparse
2 import collections
3 import json
4
5 import numpy as np
6
7 from scipy.cluster.hierarchy import fcluster
8
9
10 def separate_clusters(Z_fpath, threshold, min_members, output):
11 Z = np.loadtxt(Z_fpath)
12 branch_assignments = fcluster(Z, threshold, criterion='distance')
13 cluster_dict = collections.defaultdict(list)
14 for n, val in enumerate(branch_assignments):
15 cluster_dict[branch_assignments[n]].append(n)
16 cluster_dict = {int(k): v for k, v in cluster_dict.items()
17 if len(v) >= min_members}
18 with open(output, 'w') as f:
19 json.dump(cluster_dict, f, indent=4, sort_keys=True)
20
21
22 def main():
23 parser = argparse.ArgumentParser()
24 parser.add_argument('--Z', required=True,
25 help='File for cluster linkage array.')
26 parser.add_argument('--threshold', type=int, required=True,
27 help='Distance cutoff.')
28 parser.add_argument('--min-members', type=int, required=True,
29 help='Minimum number of members of the cluster.')
30 parser.add_argument('--output', required=True,
31 help='Output file.')
32 args = parser.parse_args()
33
34 separate_clusters(args.Z, args.threshold,
35 args.min_members, args.output)
36
37
38 if __name__ == "__main__":
39 main()