Mercurial > repos > iuc > virannot_rps2tsv
comparison rps2tsv.py @ 4:998724a43694 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
| author | iuc |
|---|---|
| date | Tue, 13 May 2025 11:52:07 +0000 |
| parents | d1fd5579469d |
| children |
comparison
equal
deleted
inserted
replaced
| 3:d1fd5579469d | 4:998724a43694 |
|---|---|
| 3 | 3 |
| 4 # Name: rps2ecsv | 4 # Name: rps2ecsv |
| 5 # Author: Marie Lefebvre - INRAE | 5 # Author: Marie Lefebvre - INRAE |
| 6 # Aims: Convert rpsblast xml output to csv and add taxonomy | 6 # Aims: Convert rpsblast xml output to csv and add taxonomy |
| 7 | 7 |
| 8 """Module which converts rpsblast xml output to tsv and add taxonomy""" | |
| 8 | 9 |
| 9 import argparse | 10 import argparse |
| 10 import json | 11 import json |
| 11 import logging as log | 12 import logging as log |
| 12 from urllib import request | 13 from urllib import request |
| 17 | 18 |
| 18 ncbi = NCBITaxa() | 19 ncbi = NCBITaxa() |
| 19 | 20 |
| 20 | 21 |
| 21 def main(): | 22 def main(): |
| 23 """ | |
| 24 Main function | |
| 25 """ | |
| 22 options = _set_options() | 26 options = _set_options() |
| 23 _set_log_level(options.verbosity) | 27 _set_log_level(options.verbosity) |
| 24 hits = _read_xml(options) | 28 hits = _read_xml(options) |
| 25 _write_tsv(options, hits) | 29 _write_tsv(options, hits) |
| 26 | 30 |
| 42 continue | 46 continue |
| 43 hit_frame = hit.frame[0] # frame | 47 hit_frame = hit.frame[0] # frame |
| 44 hit_evalue = hit.expect # evalue | 48 hit_evalue = hit.expect # evalue |
| 45 hit_startQ = hit.query_start | 49 hit_startQ = hit.query_start |
| 46 hit_endQ = hit.query_end | 50 hit_endQ = hit.query_end |
| 51 hit_identity = hit.identities | |
| 52 hit_aln_length = hit.align_length | |
| 53 pident = "%0.3f" % (100 * float(hit_identity) / float(hit_aln_length)) | |
| 54 if float(pident) < 0.1: | |
| 55 continue | |
| 56 hsp["pident"] = pident | |
| 47 hsp["frame"] = hit_frame | 57 hsp["frame"] = hit_frame |
| 48 hsp["evalue"] = hit_evalue | 58 hsp["evalue"] = hit_evalue |
| 49 hsp["startQ"] = hit_startQ | 59 hsp["startQ"] = hit_startQ |
| 50 hsp["endQ"] = hit_endQ | 60 hsp["endQ"] = hit_endQ |
| 51 hsp["query_id"] = blast_record.query | 61 hsp["query_id"] = blast_record.query |
| 81 taxonomy = names | 91 taxonomy = names |
| 82 else: | 92 else: |
| 83 taxonomy = names | 93 taxonomy = names |
| 84 if len(taxonomy) != 0: | 94 if len(taxonomy) != 0: |
| 85 kingdoms.append(taxonomy[0]) | 95 kingdoms.append(taxonomy[0]) |
| 86 frequency = {kingdom: kingdoms.count(kingdom) for kingdom in kingdoms} # {'Pseudomonadota': 9, 'cellular organisms': 4} | 96 # {'Pseudomonadota': 9, 'cellular organisms': 4} |
| 97 frequency = {kingdom: kingdoms.count(kingdom) for kingdom in kingdoms} | |
| 87 sorted_freq = dict(sorted(frequency.items(), key=lambda x: x[1], reverse=True)) | 98 sorted_freq = dict(sorted(frequency.items(), key=lambda x: x[1], reverse=True)) |
| 88 concat_freq = ";".join("{}({})".format(k, v) for k, v in sorted_freq.items()) | 99 concat_freq = ";".join("{}({})".format(k, v) for k, v in sorted_freq.items()) |
| 89 hsp["taxonomy"] = concat_freq | 100 hsp["taxonomy"] = concat_freq |
| 90 xml_results[hsp["query_id"]] = hsp | 101 xml_results[hsp["query_id"]] = hsp |
| 91 return xml_results | 102 return xml_results |
| 94 def _write_tsv(options, hits): | 105 def _write_tsv(options, hits): |
| 95 """ | 106 """ |
| 96 Write output | 107 Write output |
| 97 """ | 108 """ |
| 98 log.info("Write output file " + options.output) | 109 log.info("Write output file " + options.output) |
| 99 headers = "#query_id\tquery_length\tcdd_id\thit_id\tevalue\tstartQ\tendQ\tframe\tdescription\tsuperkingdom\n" | 110 headers = "#query_id\tquery_length\tcdd_id\thit_id\tevalue\tstartQ\tendQ\tframe\tdescription\tsuperkingdom\tpident\n" |
| 100 f = open(options.output, "w+") | 111 f = open(options.output, "w+") |
| 101 f.write(headers) | 112 f.write(headers) |
| 102 for h in hits: | 113 for h in hits: |
| 103 f.write(h + "\t" + str(hits[h]["query_length"]) + "\t") | 114 f.write(h + "\t" + str(hits[h]["query_length"]) + "\t") |
| 104 f.write(hits[h]["cdd_id"] + "\t" + hits[h]["hit_id"] + "\t" + str(hits[h]["evalue"]) + "\t") | 115 f.write(hits[h]["cdd_id"] + "\t" + hits[h]["hit_id"] + "\t" + str(hits[h]["evalue"]) + "\t") |
| 105 f.write(str(hits[h]["startQ"]) + "\t" + str(hits[h]["endQ"]) + "\t" + str(hits[h]["frame"]) + "\t") | 116 f.write(str(hits[h]["startQ"]) + "\t" + str(hits[h]["endQ"]) + "\t" |
| 106 f.write(hits[h]["description"] + "\t" + hits[h]["taxonomy"]) | 117 + str(hits[h]["frame"]) + "\t") |
| 118 f.write(hits[h]["description"] + "\t" + hits[h]["taxonomy"] + "\t" + hits[h]["pident"]) | |
| 107 f.write("\n") | 119 f.write("\n") |
| 108 f.close() | 120 f.close() |
| 109 | 121 |
| 110 | 122 |
| 111 def _set_options(): | 123 def _set_options(): |
| 124 """ | |
| 125 Script parameters | |
| 126 """ | |
| 112 parser = argparse.ArgumentParser() | 127 parser = argparse.ArgumentParser() |
| 113 parser.add_argument('-x', '--xml', help='XML files with results of blast', action='store', required=True, dest='xml_file') | 128 parser.add_argument('-x', '--xml', help='XML files with results of blast', action='store', |
| 114 parser.add_argument('-e', '--max_evalue', help='Max evalue', action='store', type=float, default=0.0001, dest='max_evalue') | 129 required=True, dest='xml_file') |
| 115 parser.add_argument('-o', '--out', help='The output file (.tab).', action='store', type=str, default='./rps2tsv_output.tab', dest='output') | 130 parser.add_argument('-e', '--max_evalue', help='Max evalue', action='store', |
| 116 parser.add_argument('-v', '--verbosity', help='Verbose level', action='store', type=int, choices=[1, 2, 3, 4], default=1) | 131 type=float, default=0.0001, dest='max_evalue') |
| 132 parser.add_argument('-o', '--out', help='The output file (.tab).', action='store', | |
| 133 type=str, default='./rps2tsv_output.tab', dest='output') | |
| 134 parser.add_argument('-v', '--verbosity', help='Verbose level', action='store', | |
| 135 type=int, choices=[1, 2, 3, 4], default=1) | |
| 117 args = parser.parse_args() | 136 args = parser.parse_args() |
| 118 return args | 137 return args |
| 119 | 138 |
| 120 | 139 |
| 121 def _set_log_level(verbosity): | 140 def _set_log_level(verbosity): |
| 141 """ | |
| 142 Debbug | |
| 143 """ | |
| 122 if verbosity == 1: | 144 if verbosity == 1: |
| 123 log_format = '%(asctime)s %(levelname)-8s %(message)s' | 145 log_format = '%(asctime)s %(levelname)-8s %(message)s' |
| 124 log.basicConfig(level=log.INFO, format=log_format) | 146 log.basicConfig(level=log.INFO, format=log_format) |
| 125 elif verbosity == 3: | 147 elif verbosity == 3: |
| 126 log_format = '%(filename)s:%(lineno)s - %(asctime)s %(levelname)-8s %(message)s' | 148 log_format = '%(filename)s:%(lineno)s - %(asctime)s %(levelname)-8s %(message)s' |
