Mercurial > repos > iuc > virannot_otu
diff rps2tsv.py @ 4:adcf06db3030 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
author | iuc |
---|---|
date | Tue, 13 May 2025 11:52:27 +0000 |
parents | 40fb54cc6628 |
children |
line wrap: on
line diff
--- a/rps2tsv.py Sun Sep 08 14:09:31 2024 +0000 +++ b/rps2tsv.py Tue May 13 11:52:27 2025 +0000 @@ -5,6 +5,7 @@ # Author: Marie Lefebvre - INRAE # Aims: Convert rpsblast xml output to csv and add taxonomy +"""Module which converts rpsblast xml output to tsv and add taxonomy""" import argparse import json @@ -19,6 +20,9 @@ def main(): + """ + Main function + """ options = _set_options() _set_log_level(options.verbosity) hits = _read_xml(options) @@ -44,6 +48,12 @@ hit_evalue = hit.expect # evalue hit_startQ = hit.query_start hit_endQ = hit.query_end + hit_identity = hit.identities + hit_aln_length = hit.align_length + pident = "%0.3f" % (100 * float(hit_identity) / float(hit_aln_length)) + if float(pident) < 0.1: + continue + hsp["pident"] = pident hsp["frame"] = hit_frame hsp["evalue"] = hit_evalue hsp["startQ"] = hit_startQ @@ -83,7 +93,8 @@ taxonomy = names if len(taxonomy) != 0: kingdoms.append(taxonomy[0]) - frequency = {kingdom: kingdoms.count(kingdom) for kingdom in kingdoms} # {'Pseudomonadota': 9, 'cellular organisms': 4} + # {'Pseudomonadota': 9, 'cellular organisms': 4} + frequency = {kingdom: kingdoms.count(kingdom) for kingdom in kingdoms} sorted_freq = dict(sorted(frequency.items(), key=lambda x: x[1], reverse=True)) concat_freq = ";".join("{}({})".format(k, v) for k, v in sorted_freq.items()) hsp["taxonomy"] = concat_freq @@ -96,29 +107,40 @@ Write output """ log.info("Write output file " + options.output) - headers = "#query_id\tquery_length\tcdd_id\thit_id\tevalue\tstartQ\tendQ\tframe\tdescription\tsuperkingdom\n" + headers = "#query_id\tquery_length\tcdd_id\thit_id\tevalue\tstartQ\tendQ\tframe\tdescription\tsuperkingdom\tpident\n" f = open(options.output, "w+") f.write(headers) for h in hits: f.write(h + "\t" + str(hits[h]["query_length"]) + "\t") f.write(hits[h]["cdd_id"] + "\t" + hits[h]["hit_id"] + "\t" + str(hits[h]["evalue"]) + "\t") - f.write(str(hits[h]["startQ"]) + "\t" + str(hits[h]["endQ"]) + "\t" + str(hits[h]["frame"]) + "\t") - f.write(hits[h]["description"] + "\t" + hits[h]["taxonomy"]) + f.write(str(hits[h]["startQ"]) + "\t" + str(hits[h]["endQ"]) + "\t" + + str(hits[h]["frame"]) + "\t") + f.write(hits[h]["description"] + "\t" + hits[h]["taxonomy"] + "\t" + hits[h]["pident"]) f.write("\n") f.close() def _set_options(): + """ + Script parameters + """ parser = argparse.ArgumentParser() - parser.add_argument('-x', '--xml', help='XML files with results of blast', action='store', required=True, dest='xml_file') - parser.add_argument('-e', '--max_evalue', help='Max evalue', action='store', type=float, default=0.0001, dest='max_evalue') - parser.add_argument('-o', '--out', help='The output file (.tab).', action='store', type=str, default='./rps2tsv_output.tab', dest='output') - parser.add_argument('-v', '--verbosity', help='Verbose level', action='store', type=int, choices=[1, 2, 3, 4], default=1) + parser.add_argument('-x', '--xml', help='XML files with results of blast', action='store', + required=True, dest='xml_file') + parser.add_argument('-e', '--max_evalue', help='Max evalue', action='store', + type=float, default=0.0001, dest='max_evalue') + parser.add_argument('-o', '--out', help='The output file (.tab).', action='store', + type=str, default='./rps2tsv_output.tab', dest='output') + parser.add_argument('-v', '--verbosity', help='Verbose level', action='store', + type=int, choices=[1, 2, 3, 4], default=1) args = parser.parse_args() return args def _set_log_level(verbosity): + """ + Debbug + """ if verbosity == 1: log_format = '%(asctime)s %(levelname)-8s %(message)s' log.basicConfig(level=log.INFO, format=log_format)