comparison tn93_cluster.py @ 1:112d80c9ccca draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit 98c0d716cbd1237ae735ce83e0153ee246abd5d8"
author iuc
date Wed, 20 Apr 2022 17:00:11 +0000
parents af03f3398f03
children
comparison
equal deleted inserted replaced
0:af03f3398f03 1:112d80c9ccca
1 import argparse 1 import argparse
2 import json 2 import json
3 import os 3 import os
4 import shlex 4 import shlex
5 import shutil
6 import subprocess 5 import subprocess
7 import sys 6 import sys
8 7
9 8
10 def cluster_to_fasta(json_file, fasta_file, reference_name=None): 9 def cluster_to_fasta(json_file, fasta_file, reference_name=None):
39 38
40 39
41 def main(arguments): 40 def main(arguments):
42 threshold = arguments.threshold 41 threshold = arguments.threshold
43 step = threshold * 0.25 42 step = threshold * 0.25
44 shutil.copy(arguments.input, os.path.join(os.getcwd(), 'reference_msa.fa'))
45 shutil.copy(arguments.input, os.path.join(os.getcwd(), 'reference_msa.fa.bak'))
46 with open(arguments.reference) as fh: 43 with open(arguments.reference) as fh:
47 for line in fh: 44 for line in fh:
48 if line[0] == '>': 45 if line[0] == '>':
49 _ref_seq_name = line[1:].split(' ')[0].strip() 46 _ref_seq_name = line[1:].split(' ')[0].strip()
50 break 47 break
51 while True and threshold <= 1: 48 while threshold <= 1:
52 command = 'tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f reference_msa.fa' % (threshold, arguments.ambigs, arguments.cluster_type, arguments.overlap, arguments.fraction) 49 command = 'tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f %s' % (threshold, arguments.ambigs, arguments.cluster_type, arguments.overlap, arguments.fraction, arguments.input)
53 return_code = run_command(command) 50 return_code = run_command(command)
54 if return_code != 0: 51 if return_code != 0:
55 return return_code 52 return return_code
56 input_stamp, cluster_count = cluster_to_fasta('clusters.json', 'reference_msa.fa.bak', _ref_seq_name) 53 input_stamp, cluster_count = cluster_to_fasta('clusters.json', 'clusters.fa', _ref_seq_name)
57 if cluster_count <= arguments.cluster_count or threshold == 1: 54 if cluster_count <= arguments.cluster_count:
58 break 55 break
59 else: 56 else:
60 threshold += step 57 threshold += step
61 print('Found %d clusters at threshold %f' % (cluster_count, threshold)) 58 print('Found %d clusters at threshold %f' % (cluster_count, threshold))
62 shutil.copy('reference_msa.fa.bak', arguments.compressed)
63 shutil.copy('clusters.json', arguments.output)
64 os.remove('reference_msa.fa.bak')
65 return 0 59 return 0
66 60
67 61
68 if __name__ == '__main__': 62 if __name__ == '__main__':
69 parser = argparse.ArgumentParser(description='Combine alignments into a single file, adding a reference sequence as well') 63 parser = argparse.ArgumentParser(description='Combine alignments into a single file, adding a reference sequence as well')