annotate tn93_cluster.py @ 1:9d793e88e15f draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
author iuc
date Fri, 23 Apr 2021 03:05:33 +0000
parents
children b38f620a3628
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
1 import argparse
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
2 import json
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
3 import os
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
4 import shlex
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
5 import shutil
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
6 import subprocess
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
7 import sys
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
8
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
9
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
10 def cluster_to_fasta(json_file, fasta_file, reference_name=None):
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
11 with open(json_file, "r") as fh:
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
12 cluster_json = json.load(fh)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
13 with open(fasta_file, "w") as fh2:
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
14 for c in cluster_json:
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
15 if reference_name is not None:
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
16 if reference_name in c['members']:
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
17 cc = c['centroid'].split('\n')
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
18 cc[0] = ">" + reference_name
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
19 print("\n".join(cc), file=fh2)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
20 continue
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
21 print(c['centroid'], file=fh2)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
22
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
23 return(os.path.getmtime(fasta_file), len(cluster_json))
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
24
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
25
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
26 def run_command(command):
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
27 proc = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
28 stdout, stderr = proc.communicate()
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
29 result = proc.returncode
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
30 if result != 0:
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
31 print('Command `%s` failed with exit code %s\n' % (command, result), file=sys.stderr)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
32 print('--------------------- STDOUT ---------------------')
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
33 print(stdout.decode().replace('\\n', '\n'))
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
34 print('------------------- END STDOUT -------------------')
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
35 print('--------------------- STDERR ---------------------', file=sys.stderr)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
36 print(stderr.decode().replace('\\n', '\n'), file=sys.stderr)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
37 print('------------------- END STDERR -------------------', file=sys.stderr)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
38 return(int(result))
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
39
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
40
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
41 def main(arguments):
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
42 threshold = arguments.threshold
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
43 step = threshold * 0.25
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
44 shutil.copy(arguments.input, os.path.join(os.getcwd(), 'reference_msa.fa'))
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
45 shutil.copy(arguments.input, os.path.join(os.getcwd(), 'reference_msa.fa.bak'))
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
46 with open(arguments.reference) as fh:
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
47 for line in fh:
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
48 if line[0] == '>':
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
49 _ref_seq_name = line[1:].split(' ')[0].strip()
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
50 break
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
51 while True and threshold <= 1:
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
52 command = 'tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f reference_msa.fa' % (threshold, arguments.ambigs, arguments.cluster_type, arguments.overlap, arguments.fraction)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
53 return_code = run_command(command)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
54 if return_code != 0:
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
55 return return_code
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
56 input_stamp, cluster_count = cluster_to_fasta('clusters.json', 'reference_msa.fa.bak', _ref_seq_name)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
57 if cluster_count <= arguments.cluster_count or threshold == 1:
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
58 break
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
59 else:
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
60 threshold += step
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
61 print('Found %d clusters at threshold %f' % (cluster_count, threshold))
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
62 shutil.copy('reference_msa.fa.bak', arguments.compressed)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
63 shutil.copy('clusters.json', arguments.output)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
64 os.remove('reference_msa.fa.bak')
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
65 return 0
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
66
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
67
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
68 if __name__ == '__main__':
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
69 parser = argparse.ArgumentParser(description='Combine alignments into a single file, adding a reference sequence as well')
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
70 parser.add_argument('--input', help='Input MSA', required=True, type=str)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
71 parser.add_argument('--reference', help='Reference sequence', required=True, type=str)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
72 parser.add_argument('--output', help='Input MSA', required=True, type=str)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
73 parser.add_argument('--threshold', help='Threshold', required=True, type=float)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
74 parser.add_argument('--ambigs', help='Handle ambigs', required=True, type=str)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
75 parser.add_argument('--cluster-type', help='Cluster type', required=True, type=str)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
76 parser.add_argument('--overlap', help='Overlap', required=True, type=int)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
77 parser.add_argument('--fraction', help='Fraction', required=True, type=float)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
78 parser.add_argument('--cluster-count', help='Max query', required=True, type=int)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
79 parser.add_argument('--compressed', help='File to write compressed clusters to', required=True, type=str)
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
80 arguments = parser.parse_args()
9d793e88e15f "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
81 exit(main(arguments))