Mercurial > repos > iuc > virannot_otu
annotate rps2tsv.py @ 4:adcf06db3030 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
author | iuc |
---|---|
date | Tue, 13 May 2025 11:52:27 +0000 |
parents | 40fb54cc6628 |
children |
rev | line source |
---|---|
0
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
2 |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
3 |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
4 # Name: rps2ecsv |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
5 # Author: Marie Lefebvre - INRAE |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
6 # Aims: Convert rpsblast xml output to csv and add taxonomy |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
7 |
4
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
8 """Module which converts rpsblast xml output to tsv and add taxonomy""" |
0
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
9 |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
10 import argparse |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
11 import json |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
12 import logging as log |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
13 from urllib import request |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
14 from urllib.error import HTTPError, URLError |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
15 |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
16 from Bio.Blast import NCBIXML |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
17 from ete3 import NCBITaxa |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
18 |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
19 ncbi = NCBITaxa() |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
20 |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
21 |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
22 def main(): |
4
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
23 """ |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
24 Main function |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
25 """ |
0
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
26 options = _set_options() |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
27 _set_log_level(options.verbosity) |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
28 hits = _read_xml(options) |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
29 _write_tsv(options, hits) |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
30 |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
31 |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
32 def _read_xml(options): |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
33 """ |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
34 Parse XML RPSblast results file |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
35 """ |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
36 log.info("Read XML file " + options.xml_file) |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
37 xml = open(options.xml_file, 'r') |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
38 records = NCBIXML.parse(xml) |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
39 xml_results = {} |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
40 for blast_record in records: |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
41 for aln in blast_record.alignments: |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
42 for hit in aln.hsps: |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
43 hsp = {} |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
44 hit_evalue = hit.expect |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
45 if hit_evalue > options.max_evalue: |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
46 continue |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
47 hit_frame = hit.frame[0] # frame |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
48 hit_evalue = hit.expect # evalue |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
49 hit_startQ = hit.query_start |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
50 hit_endQ = hit.query_end |
4
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
51 hit_identity = hit.identities |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
52 hit_aln_length = hit.align_length |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
53 pident = "%0.3f" % (100 * float(hit_identity) / float(hit_aln_length)) |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
54 if float(pident) < 0.1: |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
55 continue |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
56 hsp["pident"] = pident |
0
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
57 hsp["frame"] = hit_frame |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
58 hsp["evalue"] = hit_evalue |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
59 hsp["startQ"] = hit_startQ |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
60 hsp["endQ"] = hit_endQ |
3
40fb54cc6628
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
2
diff
changeset
|
61 hsp["query_id"] = blast_record.query |
0
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
62 hsp["cdd_id"] = aln.hit_def.split(",")[0] |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
63 hsp["hit_id"] = aln.hit_id |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
64 hsp["query_length"] = blast_record.query_length # length of the query |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
65 hsp["description"] = aln.hit_def |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
66 hsp["accession"] = aln.accession |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
67 hsp["pfam_id"] = hsp["description"].split(",")[0].replace("pfam", "PF") |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
68 log.info("Requeting Interpro for " + hsp["pfam_id"]) |
2
735a21808348
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
iuc
parents:
0
diff
changeset
|
69 url = "https://www.ebi.ac.uk/interpro/api/taxonomy/uniprot/entry/pfam/" + hsp["pfam_id"] |
0
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
70 req = request.Request(url) |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
71 try: |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
72 response = request.urlopen(req) |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
73 except HTTPError as e: |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
74 log.debug('Http error for interpro: ', e.code) |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
75 except URLError as e: |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
76 log.debug('Url error for interpro: ', e.reason) |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
77 else: |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
78 encoded_response = response.read() |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
79 decoded_response = encoded_response.decode() |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
80 payload = json.loads(decoded_response) |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
81 kingdoms = [] |
2
735a21808348
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
iuc
parents:
0
diff
changeset
|
82 for item in payload["results"][:6]: |
735a21808348
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
iuc
parents:
0
diff
changeset
|
83 if item["metadata"]["parent"] is not None: |
735a21808348
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
iuc
parents:
0
diff
changeset
|
84 lineage_parent = item["metadata"]["parent"] |
735a21808348
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
iuc
parents:
0
diff
changeset
|
85 translation = ncbi.get_taxid_translator([int(lineage_parent)]) |
735a21808348
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
iuc
parents:
0
diff
changeset
|
86 names = list(translation.values()) |
735a21808348
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
iuc
parents:
0
diff
changeset
|
87 if len(names) > 0: |
735a21808348
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
iuc
parents:
0
diff
changeset
|
88 if names[0] == "root": |
735a21808348
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
iuc
parents:
0
diff
changeset
|
89 taxonomy = names[1:] # remove 'root' at the begining |
735a21808348
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
iuc
parents:
0
diff
changeset
|
90 else: |
735a21808348
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
iuc
parents:
0
diff
changeset
|
91 taxonomy = names |
735a21808348
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
iuc
parents:
0
diff
changeset
|
92 else: |
735a21808348
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
iuc
parents:
0
diff
changeset
|
93 taxonomy = names |
735a21808348
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
iuc
parents:
0
diff
changeset
|
94 if len(taxonomy) != 0: |
735a21808348
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
iuc
parents:
0
diff
changeset
|
95 kingdoms.append(taxonomy[0]) |
4
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
96 # {'Pseudomonadota': 9, 'cellular organisms': 4} |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
97 frequency = {kingdom: kingdoms.count(kingdom) for kingdom in kingdoms} |
0
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
98 sorted_freq = dict(sorted(frequency.items(), key=lambda x: x[1], reverse=True)) |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
99 concat_freq = ";".join("{}({})".format(k, v) for k, v in sorted_freq.items()) |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
100 hsp["taxonomy"] = concat_freq |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
101 xml_results[hsp["query_id"]] = hsp |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
102 return xml_results |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
103 |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
104 |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
105 def _write_tsv(options, hits): |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
106 """ |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
107 Write output |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
108 """ |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
109 log.info("Write output file " + options.output) |
4
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
110 headers = "#query_id\tquery_length\tcdd_id\thit_id\tevalue\tstartQ\tendQ\tframe\tdescription\tsuperkingdom\tpident\n" |
0
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
111 f = open(options.output, "w+") |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
112 f.write(headers) |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
113 for h in hits: |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
114 f.write(h + "\t" + str(hits[h]["query_length"]) + "\t") |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
115 f.write(hits[h]["cdd_id"] + "\t" + hits[h]["hit_id"] + "\t" + str(hits[h]["evalue"]) + "\t") |
4
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
116 f.write(str(hits[h]["startQ"]) + "\t" + str(hits[h]["endQ"]) + "\t" |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
117 + str(hits[h]["frame"]) + "\t") |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
118 f.write(hits[h]["description"] + "\t" + hits[h]["taxonomy"] + "\t" + hits[h]["pident"]) |
0
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
119 f.write("\n") |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
120 f.close() |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
121 |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
122 |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
123 def _set_options(): |
4
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
124 """ |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
125 Script parameters |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
126 """ |
0
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
127 parser = argparse.ArgumentParser() |
4
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
128 parser.add_argument('-x', '--xml', help='XML files with results of blast', action='store', |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
129 required=True, dest='xml_file') |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
130 parser.add_argument('-e', '--max_evalue', help='Max evalue', action='store', |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
131 type=float, default=0.0001, dest='max_evalue') |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
132 parser.add_argument('-o', '--out', help='The output file (.tab).', action='store', |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
133 type=str, default='./rps2tsv_output.tab', dest='output') |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
134 parser.add_argument('-v', '--verbosity', help='Verbose level', action='store', |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
135 type=int, choices=[1, 2, 3, 4], default=1) |
0
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
136 args = parser.parse_args() |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
137 return args |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
138 |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
139 |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
140 def _set_log_level(verbosity): |
4
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
141 """ |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
142 Debbug |
adcf06db3030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
143 """ |
0
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
144 if verbosity == 1: |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
145 log_format = '%(asctime)s %(levelname)-8s %(message)s' |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
146 log.basicConfig(level=log.INFO, format=log_format) |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
147 elif verbosity == 3: |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
148 log_format = '%(filename)s:%(lineno)s - %(asctime)s %(levelname)-8s %(message)s' |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
149 log.basicConfig(level=log.DEBUG, format=log_format) |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
150 |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
151 |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
152 if __name__ == "__main__": |
c9dac9b2e01c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
153 main() |