comparison tn93_cluster.py @ 3:2fd21f5b16bc draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit e9f254ea1c6712a96512cae4df91bfec8207a492
author iuc
date Sat, 28 Sep 2024 16:34:29 +0000
parents b38f620a3628
children
comparison
equal deleted inserted replaced
2:b38f620a3628 3:2fd21f5b16bc
10 with open(json_file, "r") as fh: 10 with open(json_file, "r") as fh:
11 cluster_json = json.load(fh) 11 cluster_json = json.load(fh)
12 with open(fasta_file, "w") as fh2: 12 with open(fasta_file, "w") as fh2:
13 for c in cluster_json: 13 for c in cluster_json:
14 if reference_name is not None: 14 if reference_name is not None:
15 if reference_name in c['members']: 15 if reference_name in c["members"]:
16 cc = c['centroid'].split('\n') 16 cc = c["centroid"].split("\n")
17 cc[0] = ">" + reference_name 17 cc[0] = ">" + reference_name
18 print("\n".join(cc), file=fh2) 18 print("\n".join(cc), file=fh2)
19 continue 19 continue
20 print(c['centroid'], file=fh2) 20 print(c["centroid"], file=fh2)
21 21
22 return(os.path.getmtime(fasta_file), len(cluster_json)) 22 return (os.path.getmtime(fasta_file), len(cluster_json))
23 23
24 24
25 def run_command(command): 25 def run_command(command):
26 proc = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) 26 proc = subprocess.Popen(
27 shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE
28 )
27 stdout, stderr = proc.communicate() 29 stdout, stderr = proc.communicate()
28 result = proc.returncode 30 result = proc.returncode
29 if result != 0: 31 if result != 0:
30 print('Command `%s` failed with exit code %s\n' % (command, result), file=sys.stderr) 32 print(
31 print('--------------------- STDOUT ---------------------') 33 "Command `%s` failed with exit code %s\n" % (command, result),
32 print(stdout.decode().replace('\\n', '\n')) 34 file=sys.stderr,
33 print('------------------- END STDOUT -------------------') 35 )
34 print('--------------------- STDERR ---------------------', file=sys.stderr) 36 print("--------------------- STDOUT ---------------------")
35 print(stderr.decode().replace('\\n', '\n'), file=sys.stderr) 37 print(stdout.decode().replace("\\n", "\n"))
36 print('------------------- END STDERR -------------------', file=sys.stderr) 38 print("------------------- END STDOUT -------------------")
37 return(int(result)) 39 print("--------------------- STDERR ---------------------", file=sys.stderr)
40 print(stderr.decode().replace("\\n", "\n"), file=sys.stderr)
41 print("------------------- END STDERR -------------------", file=sys.stderr)
42 return int(result)
38 43
39 44
40 def main(arguments): 45 def main(arguments):
41 threshold = arguments.threshold 46 threshold = arguments.threshold
42 step = threshold * 0.25 47 step = threshold * 0.25
43 with open(arguments.reference) as fh: 48 with open(arguments.reference) as fh:
44 for line in fh: 49 for line in fh:
45 if line[0] == '>': 50 if line[0] == ">":
46 _ref_seq_name = line[1:].split(' ')[0].strip() 51 _ref_seq_name = line[1:].split(" ")[0].strip()
47 break 52 break
48 while threshold <= 1: 53 while threshold <= 1:
49 command = 'tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f %s' % (threshold, arguments.ambigs, arguments.cluster_type, arguments.overlap, arguments.fraction, arguments.input) 54 command = (
55 "tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f %s"
56 % (
57 threshold,
58 arguments.ambigs,
59 arguments.cluster_type,
60 arguments.overlap,
61 arguments.fraction,
62 arguments.input,
63 )
64 )
50 return_code = run_command(command) 65 return_code = run_command(command)
51 if return_code != 0: 66 if return_code != 0:
52 return return_code 67 return return_code
53 input_stamp, cluster_count = cluster_to_fasta('clusters.json', 'clusters.fa', _ref_seq_name) 68 input_stamp, cluster_count = cluster_to_fasta(
69 "clusters.json", "clusters.fa", _ref_seq_name
70 )
54 if cluster_count <= arguments.cluster_count: 71 if cluster_count <= arguments.cluster_count:
55 break 72 break
56 else: 73 else:
57 threshold += step 74 threshold += step
58 print('Found %d clusters at threshold %f' % (cluster_count, threshold)) 75 print("Found %d clusters at threshold %f" % (cluster_count, threshold))
59 return 0 76 return 0
60 77
61 78
62 if __name__ == '__main__': 79 if __name__ == "__main__":
63 parser = argparse.ArgumentParser(description='Combine alignments into a single file, adding a reference sequence as well') 80 parser = argparse.ArgumentParser(
64 parser.add_argument('--input', help='Input MSA', required=True, type=str) 81 description="Combine alignments into a single file, adding a reference sequence as well"
65 parser.add_argument('--reference', help='Reference sequence', required=True, type=str) 82 )
66 parser.add_argument('--output', help='Input MSA', required=True, type=str) 83 parser.add_argument("--input", help="Input MSA", required=True, type=str)
67 parser.add_argument('--threshold', help='Threshold', required=True, type=float) 84 parser.add_argument(
68 parser.add_argument('--ambigs', help='Handle ambigs', required=True, type=str) 85 "--reference", help="Reference sequence", required=True, type=str
69 parser.add_argument('--cluster-type', help='Cluster type', required=True, type=str) 86 )
70 parser.add_argument('--overlap', help='Overlap', required=True, type=int) 87 parser.add_argument("--output", help="Input MSA", required=True, type=str)
71 parser.add_argument('--fraction', help='Fraction', required=True, type=float) 88 parser.add_argument("--threshold", help="Threshold", required=True, type=float)
72 parser.add_argument('--cluster-count', help='Max query', required=True, type=int) 89 parser.add_argument("--ambigs", help="Handle ambigs", required=True, type=str)
73 parser.add_argument('--compressed', help='File to write compressed clusters to', required=True, type=str) 90 parser.add_argument("--cluster-type", help="Cluster type", required=True, type=str)
91 parser.add_argument("--overlap", help="Overlap", required=True, type=int)
92 parser.add_argument("--fraction", help="Fraction", required=True, type=float)
93 parser.add_argument("--cluster-count", help="Max query", required=True, type=int)
94 parser.add_argument(
95 "--compressed",
96 help="File to write compressed clusters to",
97 required=True,
98 type=str,
99 )
74 arguments = parser.parse_args() 100 arguments = parser.parse_args()
75 exit(main(arguments)) 101 exit(main(arguments))