Mercurial > repos > iuc > virannot_otu
comparison rps2tsv.py @ 4:adcf06db3030 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
author | iuc |
---|---|
date | Tue, 13 May 2025 11:52:27 +0000 |
parents | 40fb54cc6628 |
children |
comparison
equal
deleted
inserted
replaced
3:40fb54cc6628 | 4:adcf06db3030 |
---|---|
3 | 3 |
4 # Name: rps2ecsv | 4 # Name: rps2ecsv |
5 # Author: Marie Lefebvre - INRAE | 5 # Author: Marie Lefebvre - INRAE |
6 # Aims: Convert rpsblast xml output to csv and add taxonomy | 6 # Aims: Convert rpsblast xml output to csv and add taxonomy |
7 | 7 |
8 """Module which converts rpsblast xml output to tsv and add taxonomy""" | |
8 | 9 |
9 import argparse | 10 import argparse |
10 import json | 11 import json |
11 import logging as log | 12 import logging as log |
12 from urllib import request | 13 from urllib import request |
17 | 18 |
18 ncbi = NCBITaxa() | 19 ncbi = NCBITaxa() |
19 | 20 |
20 | 21 |
21 def main(): | 22 def main(): |
23 """ | |
24 Main function | |
25 """ | |
22 options = _set_options() | 26 options = _set_options() |
23 _set_log_level(options.verbosity) | 27 _set_log_level(options.verbosity) |
24 hits = _read_xml(options) | 28 hits = _read_xml(options) |
25 _write_tsv(options, hits) | 29 _write_tsv(options, hits) |
26 | 30 |
42 continue | 46 continue |
43 hit_frame = hit.frame[0] # frame | 47 hit_frame = hit.frame[0] # frame |
44 hit_evalue = hit.expect # evalue | 48 hit_evalue = hit.expect # evalue |
45 hit_startQ = hit.query_start | 49 hit_startQ = hit.query_start |
46 hit_endQ = hit.query_end | 50 hit_endQ = hit.query_end |
51 hit_identity = hit.identities | |
52 hit_aln_length = hit.align_length | |
53 pident = "%0.3f" % (100 * float(hit_identity) / float(hit_aln_length)) | |
54 if float(pident) < 0.1: | |
55 continue | |
56 hsp["pident"] = pident | |
47 hsp["frame"] = hit_frame | 57 hsp["frame"] = hit_frame |
48 hsp["evalue"] = hit_evalue | 58 hsp["evalue"] = hit_evalue |
49 hsp["startQ"] = hit_startQ | 59 hsp["startQ"] = hit_startQ |
50 hsp["endQ"] = hit_endQ | 60 hsp["endQ"] = hit_endQ |
51 hsp["query_id"] = blast_record.query | 61 hsp["query_id"] = blast_record.query |
81 taxonomy = names | 91 taxonomy = names |
82 else: | 92 else: |
83 taxonomy = names | 93 taxonomy = names |
84 if len(taxonomy) != 0: | 94 if len(taxonomy) != 0: |
85 kingdoms.append(taxonomy[0]) | 95 kingdoms.append(taxonomy[0]) |
86 frequency = {kingdom: kingdoms.count(kingdom) for kingdom in kingdoms} # {'Pseudomonadota': 9, 'cellular organisms': 4} | 96 # {'Pseudomonadota': 9, 'cellular organisms': 4} |
97 frequency = {kingdom: kingdoms.count(kingdom) for kingdom in kingdoms} | |
87 sorted_freq = dict(sorted(frequency.items(), key=lambda x: x[1], reverse=True)) | 98 sorted_freq = dict(sorted(frequency.items(), key=lambda x: x[1], reverse=True)) |
88 concat_freq = ";".join("{}({})".format(k, v) for k, v in sorted_freq.items()) | 99 concat_freq = ";".join("{}({})".format(k, v) for k, v in sorted_freq.items()) |
89 hsp["taxonomy"] = concat_freq | 100 hsp["taxonomy"] = concat_freq |
90 xml_results[hsp["query_id"]] = hsp | 101 xml_results[hsp["query_id"]] = hsp |
91 return xml_results | 102 return xml_results |
94 def _write_tsv(options, hits): | 105 def _write_tsv(options, hits): |
95 """ | 106 """ |
96 Write output | 107 Write output |
97 """ | 108 """ |
98 log.info("Write output file " + options.output) | 109 log.info("Write output file " + options.output) |
99 headers = "#query_id\tquery_length\tcdd_id\thit_id\tevalue\tstartQ\tendQ\tframe\tdescription\tsuperkingdom\n" | 110 headers = "#query_id\tquery_length\tcdd_id\thit_id\tevalue\tstartQ\tendQ\tframe\tdescription\tsuperkingdom\tpident\n" |
100 f = open(options.output, "w+") | 111 f = open(options.output, "w+") |
101 f.write(headers) | 112 f.write(headers) |
102 for h in hits: | 113 for h in hits: |
103 f.write(h + "\t" + str(hits[h]["query_length"]) + "\t") | 114 f.write(h + "\t" + str(hits[h]["query_length"]) + "\t") |
104 f.write(hits[h]["cdd_id"] + "\t" + hits[h]["hit_id"] + "\t" + str(hits[h]["evalue"]) + "\t") | 115 f.write(hits[h]["cdd_id"] + "\t" + hits[h]["hit_id"] + "\t" + str(hits[h]["evalue"]) + "\t") |
105 f.write(str(hits[h]["startQ"]) + "\t" + str(hits[h]["endQ"]) + "\t" + str(hits[h]["frame"]) + "\t") | 116 f.write(str(hits[h]["startQ"]) + "\t" + str(hits[h]["endQ"]) + "\t" |
106 f.write(hits[h]["description"] + "\t" + hits[h]["taxonomy"]) | 117 + str(hits[h]["frame"]) + "\t") |
118 f.write(hits[h]["description"] + "\t" + hits[h]["taxonomy"] + "\t" + hits[h]["pident"]) | |
107 f.write("\n") | 119 f.write("\n") |
108 f.close() | 120 f.close() |
109 | 121 |
110 | 122 |
111 def _set_options(): | 123 def _set_options(): |
124 """ | |
125 Script parameters | |
126 """ | |
112 parser = argparse.ArgumentParser() | 127 parser = argparse.ArgumentParser() |
113 parser.add_argument('-x', '--xml', help='XML files with results of blast', action='store', required=True, dest='xml_file') | 128 parser.add_argument('-x', '--xml', help='XML files with results of blast', action='store', |
114 parser.add_argument('-e', '--max_evalue', help='Max evalue', action='store', type=float, default=0.0001, dest='max_evalue') | 129 required=True, dest='xml_file') |
115 parser.add_argument('-o', '--out', help='The output file (.tab).', action='store', type=str, default='./rps2tsv_output.tab', dest='output') | 130 parser.add_argument('-e', '--max_evalue', help='Max evalue', action='store', |
116 parser.add_argument('-v', '--verbosity', help='Verbose level', action='store', type=int, choices=[1, 2, 3, 4], default=1) | 131 type=float, default=0.0001, dest='max_evalue') |
132 parser.add_argument('-o', '--out', help='The output file (.tab).', action='store', | |
133 type=str, default='./rps2tsv_output.tab', dest='output') | |
134 parser.add_argument('-v', '--verbosity', help='Verbose level', action='store', | |
135 type=int, choices=[1, 2, 3, 4], default=1) | |
117 args = parser.parse_args() | 136 args = parser.parse_args() |
118 return args | 137 return args |
119 | 138 |
120 | 139 |
121 def _set_log_level(verbosity): | 140 def _set_log_level(verbosity): |
141 """ | |
142 Debbug | |
143 """ | |
122 if verbosity == 1: | 144 if verbosity == 1: |
123 log_format = '%(asctime)s %(levelname)-8s %(message)s' | 145 log_format = '%(asctime)s %(levelname)-8s %(message)s' |
124 log.basicConfig(level=log.INFO, format=log_format) | 146 log.basicConfig(level=log.INFO, format=log_format) |
125 elif verbosity == 3: | 147 elif verbosity == 3: |
126 log_format = '%(filename)s:%(lineno)s - %(asctime)s %(levelname)-8s %(message)s' | 148 log_format = '%(filename)s:%(lineno)s - %(asctime)s %(levelname)-8s %(message)s' |