comparison blast2tsv.py @ 2:77c3ef9b0ed7 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
author iuc
date Wed, 21 Aug 2024 13:13:39 +0000
parents e889010415a1
children
comparison
equal deleted inserted replaced
1:88ebde55bef8 2:77c3ef9b0ed7
26 _write_tsv(options, hits) 26 _write_tsv(options, hits)
27 27
28 28
29 def _guess_database(accession): 29 def _guess_database(accession):
30 """Guess the correct database for querying based off the format of the accession""" 30 """Guess the correct database for querying based off the format of the accession"""
31 database_mappings_refseq = {'AC_': 'nuccore', 'NC_': 'nuccore', 'NG_': 'nuccore', 31 if accession.isdigit():
32 'NT_': 'nuccore', 'NW_': 'nuccore', 'NZ_': 'nuccore', 32 db = 'taxonomy'
33 'AP_': 'protein', 'NP_': 'protein', 'YP_': 'protein', 33 else:
34 'XP_': 'protein', 'WP_': 'protein'} 34 database_mappings_refseq = {'AC': 'nuccore', 'NC': 'nuccore', 'NG': 'nuccore',
35 return database_mappings_refseq[accession[0:3]] 35 'NT': 'nuccore', 'NW': 'nuccore', 'NZ': 'nuccore',
36 'AP': 'protein', 'NP': 'protein', 'YP': 'protein',
37 'XP': 'protein', 'WP': 'protein', 'OX': 'nuccore'}
38 try:
39 db = database_mappings_refseq[accession[0:2]]
40 except KeyError:
41 db = 'nuccore'
42 log.warning("DB not found for " + accession + ". Set to nuccore.")
43 return db
36 44
37 45
38 def _read_xml(options): 46 def _read_xml(options):
39 """ 47 """
40 Parse XML blast results file 48 Parse XML blast results file
67 if hit_count == 1: 75 if hit_count == 1:
68 final_hit_count = hit_count 76 final_hit_count = hit_count
69 elif hit_count > 1: 77 elif hit_count > 1:
70 final_hit_count = hit_count - 1 78 final_hit_count = hit_count - 1
71 hsp["evalue"] = cumul_hit_evalue / final_hit_count # The smaller the E-value, the better the match 79 hsp["evalue"] = cumul_hit_evalue / final_hit_count # The smaller the E-value, the better the match
72 hsp["query_id"] = blast_record.query_id 80 hsp["query_id"] = blast_record.query # or query_id
73 hsp["query_length"] = blast_record.query_length # length of the query 81 hsp["query_length"] = blast_record.query_length # length of the query
74 hsp["accession"] = aln.accession.replace("ref|", "") 82 hsp["accession"] = aln.accession.replace("ref|", "")
75 hsp["description"] = aln.hit_def 83 hsp["description"] = aln.hit_def
76 hsp["hit_length"] = aln.length # length of the hit 84 hsp["hit_length"] = aln.length # length of the hit
77 hsp["hsp_length"] = hit.align_length # length of the hsp alignment 85 hsp["hsp_length"] = hit.align_length # length of the hsp alignment
99 hsp["organism"] = taxonomy[-1] 107 hsp["organism"] = taxonomy[-1]
100 except RuntimeError: 108 except RuntimeError:
101 hsp["tax_id"] = "" 109 hsp["tax_id"] = ""
102 hsp["taxonomy"] = "" 110 hsp["taxonomy"] = ""
103 hsp["organism"] = "" 111 hsp["organism"] = ""
104 log.warning("RuntimeError - Taxid not found for " + hsp["accession"]) 112 log.warning(f"RuntimeError - Taxid not found for {hsp['accession']}")
113 except Exception as err:
114 hsp["tax_id"] = ""
115 hsp["taxonomy"] = ""
116 hsp["organism"] = ""
117 log.warning(f"Taxid not found for {hsp['accession']}. The error is {err}")
105 if hsp["evalue"] <= options.max_evalue and hsp["queryOverlap"] >= options.min_qov and \ 118 if hsp["evalue"] <= options.max_evalue and hsp["queryOverlap"] >= options.min_qov and \
106 hsp["hitOverlap"] >= options.min_hov and hsp["score"] >= options.min_score: 119 hsp["hitOverlap"] >= options.min_hov and hsp["score"] >= options.min_score:
107 xml_results[hsp["query_id"]] = hsp 120 xml_results[hsp["query_id"]] = hsp
108 else: 121 else:
109 xml_results[hsp["query_id"]] = [hsp["query_length"]] 122 xml_results[hsp["query_id"]] = [hsp["query_length"]]