Mercurial > repos > chemteam > fastpca
annotate get_clusters.py @ 1:d9f8cc3258f9 draft
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
author | chemteam |
---|---|
date | Mon, 24 Aug 2020 06:09:11 -0400 |
parents | |
children | 9ca30ad95444 |
rev | line source |
---|---|
1
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
1 import argparse |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
2 import collections |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
3 import json |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
4 |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
5 import numpy as np |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
6 |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
7 from scipy.cluster.hierarchy import fcluster |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
8 |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
9 |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
10 def separate_clusters(Z_fpath, threshold, min_members, output): |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
11 Z = np.loadtxt(Z_fpath) |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
12 branch_assignments = fcluster(Z, threshold, criterion='distance') |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
13 cluster_dict = collections.defaultdict(list) |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
14 for n, val in enumerate(branch_assignments): |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
15 cluster_dict[branch_assignments[n]].append(n) |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
16 cluster_dict = {int(k): v for k, v in cluster_dict.items() |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
17 if len(v) >= min_members} |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
18 with open(output, 'w') as f: |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
19 json.dump(cluster_dict, f, indent=4, sort_keys=True) |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
20 |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
21 |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
22 def main(): |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
23 parser = argparse.ArgumentParser() |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
24 parser.add_argument('--Z', required=True, |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
25 help='File for cluster linkage array.') |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
26 parser.add_argument('--threshold', type=int, required=True, |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
27 help='Distance cutoff.') |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
28 parser.add_argument('--min-members', type=int, required=True, |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
29 help='Minimum number of members of the cluster.') |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
30 parser.add_argument('--output', required=True, |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
31 help='Output file.') |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
32 args = parser.parse_args() |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
33 |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
34 separate_clusters(args.Z, args.threshold, |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
35 args.min_members, args.output) |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
36 |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
37 |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
38 if __name__ == "__main__": |
d9f8cc3258f9
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
chemteam
parents:
diff
changeset
|
39 main() |