Mercurial > repos > iuc > tn93_filter
diff tn93_cluster.py @ 2:1aa73004ba9b draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit e9f254ea1c6712a96512cae4df91bfec8207a492
author | iuc |
---|---|
date | Sat, 28 Sep 2024 16:34:09 +0000 |
parents | cf50aeb956f2 |
children |
line wrap: on
line diff
--- a/tn93_cluster.py Wed Apr 20 16:59:05 2022 +0000 +++ b/tn93_cluster.py Sat Sep 28 16:34:09 2024 +0000 @@ -12,29 +12,34 @@ with open(fasta_file, "w") as fh2: for c in cluster_json: if reference_name is not None: - if reference_name in c['members']: - cc = c['centroid'].split('\n') + if reference_name in c["members"]: + cc = c["centroid"].split("\n") cc[0] = ">" + reference_name print("\n".join(cc), file=fh2) continue - print(c['centroid'], file=fh2) + print(c["centroid"], file=fh2) - return(os.path.getmtime(fasta_file), len(cluster_json)) + return (os.path.getmtime(fasta_file), len(cluster_json)) def run_command(command): - proc = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) + proc = subprocess.Popen( + shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) stdout, stderr = proc.communicate() result = proc.returncode if result != 0: - print('Command `%s` failed with exit code %s\n' % (command, result), file=sys.stderr) - print('--------------------- STDOUT ---------------------') - print(stdout.decode().replace('\\n', '\n')) - print('------------------- END STDOUT -------------------') - print('--------------------- STDERR ---------------------', file=sys.stderr) - print(stderr.decode().replace('\\n', '\n'), file=sys.stderr) - print('------------------- END STDERR -------------------', file=sys.stderr) - return(int(result)) + print( + "Command `%s` failed with exit code %s\n" % (command, result), + file=sys.stderr, + ) + print("--------------------- STDOUT ---------------------") + print(stdout.decode().replace("\\n", "\n")) + print("------------------- END STDOUT -------------------") + print("--------------------- STDERR ---------------------", file=sys.stderr) + print(stderr.decode().replace("\\n", "\n"), file=sys.stderr) + print("------------------- END STDERR -------------------", file=sys.stderr) + return int(result) def main(arguments): @@ -42,34 +47,55 @@ step = threshold * 0.25 with open(arguments.reference) as fh: for line in fh: - if line[0] == '>': - _ref_seq_name = line[1:].split(' ')[0].strip() + if line[0] == ">": + _ref_seq_name = line[1:].split(" ")[0].strip() break while threshold <= 1: - command = 'tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f %s' % (threshold, arguments.ambigs, arguments.cluster_type, arguments.overlap, arguments.fraction, arguments.input) + command = ( + "tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f %s" + % ( + threshold, + arguments.ambigs, + arguments.cluster_type, + arguments.overlap, + arguments.fraction, + arguments.input, + ) + ) return_code = run_command(command) if return_code != 0: return return_code - input_stamp, cluster_count = cluster_to_fasta('clusters.json', 'clusters.fa', _ref_seq_name) + input_stamp, cluster_count = cluster_to_fasta( + "clusters.json", "clusters.fa", _ref_seq_name + ) if cluster_count <= arguments.cluster_count: break else: threshold += step - print('Found %d clusters at threshold %f' % (cluster_count, threshold)) + print("Found %d clusters at threshold %f" % (cluster_count, threshold)) return 0 -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Combine alignments into a single file, adding a reference sequence as well') - parser.add_argument('--input', help='Input MSA', required=True, type=str) - parser.add_argument('--reference', help='Reference sequence', required=True, type=str) - parser.add_argument('--output', help='Input MSA', required=True, type=str) - parser.add_argument('--threshold', help='Threshold', required=True, type=float) - parser.add_argument('--ambigs', help='Handle ambigs', required=True, type=str) - parser.add_argument('--cluster-type', help='Cluster type', required=True, type=str) - parser.add_argument('--overlap', help='Overlap', required=True, type=int) - parser.add_argument('--fraction', help='Fraction', required=True, type=float) - parser.add_argument('--cluster-count', help='Max query', required=True, type=int) - parser.add_argument('--compressed', help='File to write compressed clusters to', required=True, type=str) +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Combine alignments into a single file, adding a reference sequence as well" + ) + parser.add_argument("--input", help="Input MSA", required=True, type=str) + parser.add_argument( + "--reference", help="Reference sequence", required=True, type=str + ) + parser.add_argument("--output", help="Input MSA", required=True, type=str) + parser.add_argument("--threshold", help="Threshold", required=True, type=float) + parser.add_argument("--ambigs", help="Handle ambigs", required=True, type=str) + parser.add_argument("--cluster-type", help="Cluster type", required=True, type=str) + parser.add_argument("--overlap", help="Overlap", required=True, type=int) + parser.add_argument("--fraction", help="Fraction", required=True, type=float) + parser.add_argument("--cluster-count", help="Max query", required=True, type=int) + parser.add_argument( + "--compressed", + help="File to write compressed clusters to", + required=True, + type=str, + ) arguments = parser.parse_args() exit(main(arguments))