Mercurial > repos > iuc > tn93_readreduce
comparison tn93_cluster.py @ 3:c176164dc8a5 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit e9f254ea1c6712a96512cae4df91bfec8207a492
| author | iuc |
|---|---|
| date | Sat, 28 Sep 2024 16:34:19 +0000 |
| parents | 1d2ec0b0a0a7 |
| children |
comparison
equal
deleted
inserted
replaced
| 2:1d2ec0b0a0a7 | 3:c176164dc8a5 |
|---|---|
| 10 with open(json_file, "r") as fh: | 10 with open(json_file, "r") as fh: |
| 11 cluster_json = json.load(fh) | 11 cluster_json = json.load(fh) |
| 12 with open(fasta_file, "w") as fh2: | 12 with open(fasta_file, "w") as fh2: |
| 13 for c in cluster_json: | 13 for c in cluster_json: |
| 14 if reference_name is not None: | 14 if reference_name is not None: |
| 15 if reference_name in c['members']: | 15 if reference_name in c["members"]: |
| 16 cc = c['centroid'].split('\n') | 16 cc = c["centroid"].split("\n") |
| 17 cc[0] = ">" + reference_name | 17 cc[0] = ">" + reference_name |
| 18 print("\n".join(cc), file=fh2) | 18 print("\n".join(cc), file=fh2) |
| 19 continue | 19 continue |
| 20 print(c['centroid'], file=fh2) | 20 print(c["centroid"], file=fh2) |
| 21 | 21 |
| 22 return(os.path.getmtime(fasta_file), len(cluster_json)) | 22 return (os.path.getmtime(fasta_file), len(cluster_json)) |
| 23 | 23 |
| 24 | 24 |
| 25 def run_command(command): | 25 def run_command(command): |
| 26 proc = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 26 proc = subprocess.Popen( |
| 27 shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE | |
| 28 ) | |
| 27 stdout, stderr = proc.communicate() | 29 stdout, stderr = proc.communicate() |
| 28 result = proc.returncode | 30 result = proc.returncode |
| 29 if result != 0: | 31 if result != 0: |
| 30 print('Command `%s` failed with exit code %s\n' % (command, result), file=sys.stderr) | 32 print( |
| 31 print('--------------------- STDOUT ---------------------') | 33 "Command `%s` failed with exit code %s\n" % (command, result), |
| 32 print(stdout.decode().replace('\\n', '\n')) | 34 file=sys.stderr, |
| 33 print('------------------- END STDOUT -------------------') | 35 ) |
| 34 print('--------------------- STDERR ---------------------', file=sys.stderr) | 36 print("--------------------- STDOUT ---------------------") |
| 35 print(stderr.decode().replace('\\n', '\n'), file=sys.stderr) | 37 print(stdout.decode().replace("\\n", "\n")) |
| 36 print('------------------- END STDERR -------------------', file=sys.stderr) | 38 print("------------------- END STDOUT -------------------") |
| 37 return(int(result)) | 39 print("--------------------- STDERR ---------------------", file=sys.stderr) |
| 40 print(stderr.decode().replace("\\n", "\n"), file=sys.stderr) | |
| 41 print("------------------- END STDERR -------------------", file=sys.stderr) | |
| 42 return int(result) | |
| 38 | 43 |
| 39 | 44 |
| 40 def main(arguments): | 45 def main(arguments): |
| 41 threshold = arguments.threshold | 46 threshold = arguments.threshold |
| 42 step = threshold * 0.25 | 47 step = threshold * 0.25 |
| 43 with open(arguments.reference) as fh: | 48 with open(arguments.reference) as fh: |
| 44 for line in fh: | 49 for line in fh: |
| 45 if line[0] == '>': | 50 if line[0] == ">": |
| 46 _ref_seq_name = line[1:].split(' ')[0].strip() | 51 _ref_seq_name = line[1:].split(" ")[0].strip() |
| 47 break | 52 break |
| 48 while threshold <= 1: | 53 while threshold <= 1: |
| 49 command = 'tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f %s' % (threshold, arguments.ambigs, arguments.cluster_type, arguments.overlap, arguments.fraction, arguments.input) | 54 command = ( |
| 55 "tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f %s" | |
| 56 % ( | |
| 57 threshold, | |
| 58 arguments.ambigs, | |
| 59 arguments.cluster_type, | |
| 60 arguments.overlap, | |
| 61 arguments.fraction, | |
| 62 arguments.input, | |
| 63 ) | |
| 64 ) | |
| 50 return_code = run_command(command) | 65 return_code = run_command(command) |
| 51 if return_code != 0: | 66 if return_code != 0: |
| 52 return return_code | 67 return return_code |
| 53 input_stamp, cluster_count = cluster_to_fasta('clusters.json', 'clusters.fa', _ref_seq_name) | 68 input_stamp, cluster_count = cluster_to_fasta( |
| 69 "clusters.json", "clusters.fa", _ref_seq_name | |
| 70 ) | |
| 54 if cluster_count <= arguments.cluster_count: | 71 if cluster_count <= arguments.cluster_count: |
| 55 break | 72 break |
| 56 else: | 73 else: |
| 57 threshold += step | 74 threshold += step |
| 58 print('Found %d clusters at threshold %f' % (cluster_count, threshold)) | 75 print("Found %d clusters at threshold %f" % (cluster_count, threshold)) |
| 59 return 0 | 76 return 0 |
| 60 | 77 |
| 61 | 78 |
| 62 if __name__ == '__main__': | 79 if __name__ == "__main__": |
| 63 parser = argparse.ArgumentParser(description='Combine alignments into a single file, adding a reference sequence as well') | 80 parser = argparse.ArgumentParser( |
| 64 parser.add_argument('--input', help='Input MSA', required=True, type=str) | 81 description="Combine alignments into a single file, adding a reference sequence as well" |
| 65 parser.add_argument('--reference', help='Reference sequence', required=True, type=str) | 82 ) |
| 66 parser.add_argument('--output', help='Input MSA', required=True, type=str) | 83 parser.add_argument("--input", help="Input MSA", required=True, type=str) |
| 67 parser.add_argument('--threshold', help='Threshold', required=True, type=float) | 84 parser.add_argument( |
| 68 parser.add_argument('--ambigs', help='Handle ambigs', required=True, type=str) | 85 "--reference", help="Reference sequence", required=True, type=str |
| 69 parser.add_argument('--cluster-type', help='Cluster type', required=True, type=str) | 86 ) |
| 70 parser.add_argument('--overlap', help='Overlap', required=True, type=int) | 87 parser.add_argument("--output", help="Input MSA", required=True, type=str) |
| 71 parser.add_argument('--fraction', help='Fraction', required=True, type=float) | 88 parser.add_argument("--threshold", help="Threshold", required=True, type=float) |
| 72 parser.add_argument('--cluster-count', help='Max query', required=True, type=int) | 89 parser.add_argument("--ambigs", help="Handle ambigs", required=True, type=str) |
| 73 parser.add_argument('--compressed', help='File to write compressed clusters to', required=True, type=str) | 90 parser.add_argument("--cluster-type", help="Cluster type", required=True, type=str) |
| 91 parser.add_argument("--overlap", help="Overlap", required=True, type=int) | |
| 92 parser.add_argument("--fraction", help="Fraction", required=True, type=float) | |
| 93 parser.add_argument("--cluster-count", help="Max query", required=True, type=int) | |
| 94 parser.add_argument( | |
| 95 "--compressed", | |
| 96 help="File to write compressed clusters to", | |
| 97 required=True, | |
| 98 type=str, | |
| 99 ) | |
| 74 arguments = parser.parse_args() | 100 arguments = parser.parse_args() |
| 75 exit(main(arguments)) | 101 exit(main(arguments)) |
