Mercurial > repos > iuc > tn93_filter
view tn93_cluster.py @ 1:cf50aeb956f2 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit 98c0d716cbd1237ae735ce83e0153ee246abd5d8"
author | iuc |
---|---|
date | Wed, 20 Apr 2022 16:59:05 +0000 |
parents | ba95715078c9 |
children | 1aa73004ba9b |
line wrap: on
line source
import argparse import json import os import shlex import subprocess import sys def cluster_to_fasta(json_file, fasta_file, reference_name=None): with open(json_file, "r") as fh: cluster_json = json.load(fh) with open(fasta_file, "w") as fh2: for c in cluster_json: if reference_name is not None: if reference_name in c['members']: cc = c['centroid'].split('\n') cc[0] = ">" + reference_name print("\n".join(cc), file=fh2) continue print(c['centroid'], file=fh2) return(os.path.getmtime(fasta_file), len(cluster_json)) def run_command(command): proc = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = proc.communicate() result = proc.returncode if result != 0: print('Command `%s` failed with exit code %s\n' % (command, result), file=sys.stderr) print('--------------------- STDOUT ---------------------') print(stdout.decode().replace('\\n', '\n')) print('------------------- END STDOUT -------------------') print('--------------------- STDERR ---------------------', file=sys.stderr) print(stderr.decode().replace('\\n', '\n'), file=sys.stderr) print('------------------- END STDERR -------------------', file=sys.stderr) return(int(result)) def main(arguments): threshold = arguments.threshold step = threshold * 0.25 with open(arguments.reference) as fh: for line in fh: if line[0] == '>': _ref_seq_name = line[1:].split(' ')[0].strip() break while threshold <= 1: command = 'tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f %s' % (threshold, arguments.ambigs, arguments.cluster_type, arguments.overlap, arguments.fraction, arguments.input) return_code = run_command(command) if return_code != 0: return return_code input_stamp, cluster_count = cluster_to_fasta('clusters.json', 'clusters.fa', _ref_seq_name) if cluster_count <= arguments.cluster_count: break else: threshold += step print('Found %d clusters at threshold %f' % (cluster_count, threshold)) return 0 if __name__ == '__main__': parser = argparse.ArgumentParser(description='Combine alignments into a single file, adding a reference sequence as well') parser.add_argument('--input', help='Input MSA', required=True, type=str) parser.add_argument('--reference', help='Reference sequence', required=True, type=str) parser.add_argument('--output', help='Input MSA', required=True, type=str) parser.add_argument('--threshold', help='Threshold', required=True, type=float) parser.add_argument('--ambigs', help='Handle ambigs', required=True, type=str) parser.add_argument('--cluster-type', help='Cluster type', required=True, type=str) parser.add_argument('--overlap', help='Overlap', required=True, type=int) parser.add_argument('--fraction', help='Fraction', required=True, type=float) parser.add_argument('--cluster-count', help='Max query', required=True, type=int) parser.add_argument('--compressed', help='File to write compressed clusters to', required=True, type=str) arguments = parser.parse_args() exit(main(arguments))