Mercurial > repos > iuc > tn93_filter
comparison tn93_cluster.py @ 2:1aa73004ba9b draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit e9f254ea1c6712a96512cae4df91bfec8207a492
author | iuc |
---|---|
date | Sat, 28 Sep 2024 16:34:09 +0000 |
parents | cf50aeb956f2 |
children |
comparison
equal
deleted
inserted
replaced
1:cf50aeb956f2 | 2:1aa73004ba9b |
---|---|
10 with open(json_file, "r") as fh: | 10 with open(json_file, "r") as fh: |
11 cluster_json = json.load(fh) | 11 cluster_json = json.load(fh) |
12 with open(fasta_file, "w") as fh2: | 12 with open(fasta_file, "w") as fh2: |
13 for c in cluster_json: | 13 for c in cluster_json: |
14 if reference_name is not None: | 14 if reference_name is not None: |
15 if reference_name in c['members']: | 15 if reference_name in c["members"]: |
16 cc = c['centroid'].split('\n') | 16 cc = c["centroid"].split("\n") |
17 cc[0] = ">" + reference_name | 17 cc[0] = ">" + reference_name |
18 print("\n".join(cc), file=fh2) | 18 print("\n".join(cc), file=fh2) |
19 continue | 19 continue |
20 print(c['centroid'], file=fh2) | 20 print(c["centroid"], file=fh2) |
21 | 21 |
22 return(os.path.getmtime(fasta_file), len(cluster_json)) | 22 return (os.path.getmtime(fasta_file), len(cluster_json)) |
23 | 23 |
24 | 24 |
25 def run_command(command): | 25 def run_command(command): |
26 proc = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 26 proc = subprocess.Popen( |
27 shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE | |
28 ) | |
27 stdout, stderr = proc.communicate() | 29 stdout, stderr = proc.communicate() |
28 result = proc.returncode | 30 result = proc.returncode |
29 if result != 0: | 31 if result != 0: |
30 print('Command `%s` failed with exit code %s\n' % (command, result), file=sys.stderr) | 32 print( |
31 print('--------------------- STDOUT ---------------------') | 33 "Command `%s` failed with exit code %s\n" % (command, result), |
32 print(stdout.decode().replace('\\n', '\n')) | 34 file=sys.stderr, |
33 print('------------------- END STDOUT -------------------') | 35 ) |
34 print('--------------------- STDERR ---------------------', file=sys.stderr) | 36 print("--------------------- STDOUT ---------------------") |
35 print(stderr.decode().replace('\\n', '\n'), file=sys.stderr) | 37 print(stdout.decode().replace("\\n", "\n")) |
36 print('------------------- END STDERR -------------------', file=sys.stderr) | 38 print("------------------- END STDOUT -------------------") |
37 return(int(result)) | 39 print("--------------------- STDERR ---------------------", file=sys.stderr) |
40 print(stderr.decode().replace("\\n", "\n"), file=sys.stderr) | |
41 print("------------------- END STDERR -------------------", file=sys.stderr) | |
42 return int(result) | |
38 | 43 |
39 | 44 |
40 def main(arguments): | 45 def main(arguments): |
41 threshold = arguments.threshold | 46 threshold = arguments.threshold |
42 step = threshold * 0.25 | 47 step = threshold * 0.25 |
43 with open(arguments.reference) as fh: | 48 with open(arguments.reference) as fh: |
44 for line in fh: | 49 for line in fh: |
45 if line[0] == '>': | 50 if line[0] == ">": |
46 _ref_seq_name = line[1:].split(' ')[0].strip() | 51 _ref_seq_name = line[1:].split(" ")[0].strip() |
47 break | 52 break |
48 while threshold <= 1: | 53 while threshold <= 1: |
49 command = 'tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f %s' % (threshold, arguments.ambigs, arguments.cluster_type, arguments.overlap, arguments.fraction, arguments.input) | 54 command = ( |
55 "tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f %s" | |
56 % ( | |
57 threshold, | |
58 arguments.ambigs, | |
59 arguments.cluster_type, | |
60 arguments.overlap, | |
61 arguments.fraction, | |
62 arguments.input, | |
63 ) | |
64 ) | |
50 return_code = run_command(command) | 65 return_code = run_command(command) |
51 if return_code != 0: | 66 if return_code != 0: |
52 return return_code | 67 return return_code |
53 input_stamp, cluster_count = cluster_to_fasta('clusters.json', 'clusters.fa', _ref_seq_name) | 68 input_stamp, cluster_count = cluster_to_fasta( |
69 "clusters.json", "clusters.fa", _ref_seq_name | |
70 ) | |
54 if cluster_count <= arguments.cluster_count: | 71 if cluster_count <= arguments.cluster_count: |
55 break | 72 break |
56 else: | 73 else: |
57 threshold += step | 74 threshold += step |
58 print('Found %d clusters at threshold %f' % (cluster_count, threshold)) | 75 print("Found %d clusters at threshold %f" % (cluster_count, threshold)) |
59 return 0 | 76 return 0 |
60 | 77 |
61 | 78 |
62 if __name__ == '__main__': | 79 if __name__ == "__main__": |
63 parser = argparse.ArgumentParser(description='Combine alignments into a single file, adding a reference sequence as well') | 80 parser = argparse.ArgumentParser( |
64 parser.add_argument('--input', help='Input MSA', required=True, type=str) | 81 description="Combine alignments into a single file, adding a reference sequence as well" |
65 parser.add_argument('--reference', help='Reference sequence', required=True, type=str) | 82 ) |
66 parser.add_argument('--output', help='Input MSA', required=True, type=str) | 83 parser.add_argument("--input", help="Input MSA", required=True, type=str) |
67 parser.add_argument('--threshold', help='Threshold', required=True, type=float) | 84 parser.add_argument( |
68 parser.add_argument('--ambigs', help='Handle ambigs', required=True, type=str) | 85 "--reference", help="Reference sequence", required=True, type=str |
69 parser.add_argument('--cluster-type', help='Cluster type', required=True, type=str) | 86 ) |
70 parser.add_argument('--overlap', help='Overlap', required=True, type=int) | 87 parser.add_argument("--output", help="Input MSA", required=True, type=str) |
71 parser.add_argument('--fraction', help='Fraction', required=True, type=float) | 88 parser.add_argument("--threshold", help="Threshold", required=True, type=float) |
72 parser.add_argument('--cluster-count', help='Max query', required=True, type=int) | 89 parser.add_argument("--ambigs", help="Handle ambigs", required=True, type=str) |
73 parser.add_argument('--compressed', help='File to write compressed clusters to', required=True, type=str) | 90 parser.add_argument("--cluster-type", help="Cluster type", required=True, type=str) |
91 parser.add_argument("--overlap", help="Overlap", required=True, type=int) | |
92 parser.add_argument("--fraction", help="Fraction", required=True, type=float) | |
93 parser.add_argument("--cluster-count", help="Max query", required=True, type=int) | |
94 parser.add_argument( | |
95 "--compressed", | |
96 help="File to write compressed clusters to", | |
97 required=True, | |
98 type=str, | |
99 ) | |
74 arguments = parser.parse_args() | 100 arguments = parser.parse_args() |
75 exit(main(arguments)) | 101 exit(main(arguments)) |