Mercurial > repos > iuc > virannot_blast2tsv
comparison blast2tsv.py @ 2:77c3ef9b0ed7 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
author | iuc |
---|---|
date | Wed, 21 Aug 2024 13:13:39 +0000 |
parents | e889010415a1 |
children |
comparison
equal
deleted
inserted
replaced
1:88ebde55bef8 | 2:77c3ef9b0ed7 |
---|---|
26 _write_tsv(options, hits) | 26 _write_tsv(options, hits) |
27 | 27 |
28 | 28 |
29 def _guess_database(accession): | 29 def _guess_database(accession): |
30 """Guess the correct database for querying based off the format of the accession""" | 30 """Guess the correct database for querying based off the format of the accession""" |
31 database_mappings_refseq = {'AC_': 'nuccore', 'NC_': 'nuccore', 'NG_': 'nuccore', | 31 if accession.isdigit(): |
32 'NT_': 'nuccore', 'NW_': 'nuccore', 'NZ_': 'nuccore', | 32 db = 'taxonomy' |
33 'AP_': 'protein', 'NP_': 'protein', 'YP_': 'protein', | 33 else: |
34 'XP_': 'protein', 'WP_': 'protein'} | 34 database_mappings_refseq = {'AC': 'nuccore', 'NC': 'nuccore', 'NG': 'nuccore', |
35 return database_mappings_refseq[accession[0:3]] | 35 'NT': 'nuccore', 'NW': 'nuccore', 'NZ': 'nuccore', |
36 'AP': 'protein', 'NP': 'protein', 'YP': 'protein', | |
37 'XP': 'protein', 'WP': 'protein', 'OX': 'nuccore'} | |
38 try: | |
39 db = database_mappings_refseq[accession[0:2]] | |
40 except KeyError: | |
41 db = 'nuccore' | |
42 log.warning("DB not found for " + accession + ". Set to nuccore.") | |
43 return db | |
36 | 44 |
37 | 45 |
38 def _read_xml(options): | 46 def _read_xml(options): |
39 """ | 47 """ |
40 Parse XML blast results file | 48 Parse XML blast results file |
67 if hit_count == 1: | 75 if hit_count == 1: |
68 final_hit_count = hit_count | 76 final_hit_count = hit_count |
69 elif hit_count > 1: | 77 elif hit_count > 1: |
70 final_hit_count = hit_count - 1 | 78 final_hit_count = hit_count - 1 |
71 hsp["evalue"] = cumul_hit_evalue / final_hit_count # The smaller the E-value, the better the match | 79 hsp["evalue"] = cumul_hit_evalue / final_hit_count # The smaller the E-value, the better the match |
72 hsp["query_id"] = blast_record.query_id | 80 hsp["query_id"] = blast_record.query # or query_id |
73 hsp["query_length"] = blast_record.query_length # length of the query | 81 hsp["query_length"] = blast_record.query_length # length of the query |
74 hsp["accession"] = aln.accession.replace("ref|", "") | 82 hsp["accession"] = aln.accession.replace("ref|", "") |
75 hsp["description"] = aln.hit_def | 83 hsp["description"] = aln.hit_def |
76 hsp["hit_length"] = aln.length # length of the hit | 84 hsp["hit_length"] = aln.length # length of the hit |
77 hsp["hsp_length"] = hit.align_length # length of the hsp alignment | 85 hsp["hsp_length"] = hit.align_length # length of the hsp alignment |
99 hsp["organism"] = taxonomy[-1] | 107 hsp["organism"] = taxonomy[-1] |
100 except RuntimeError: | 108 except RuntimeError: |
101 hsp["tax_id"] = "" | 109 hsp["tax_id"] = "" |
102 hsp["taxonomy"] = "" | 110 hsp["taxonomy"] = "" |
103 hsp["organism"] = "" | 111 hsp["organism"] = "" |
104 log.warning("RuntimeError - Taxid not found for " + hsp["accession"]) | 112 log.warning(f"RuntimeError - Taxid not found for {hsp['accession']}") |
113 except Exception as err: | |
114 hsp["tax_id"] = "" | |
115 hsp["taxonomy"] = "" | |
116 hsp["organism"] = "" | |
117 log.warning(f"Taxid not found for {hsp['accession']}. The error is {err}") | |
105 if hsp["evalue"] <= options.max_evalue and hsp["queryOverlap"] >= options.min_qov and \ | 118 if hsp["evalue"] <= options.max_evalue and hsp["queryOverlap"] >= options.min_qov and \ |
106 hsp["hitOverlap"] >= options.min_hov and hsp["score"] >= options.min_score: | 119 hsp["hitOverlap"] >= options.min_hov and hsp["score"] >= options.min_score: |
107 xml_results[hsp["query_id"]] = hsp | 120 xml_results[hsp["query_id"]] = hsp |
108 else: | 121 else: |
109 xml_results[hsp["query_id"]] = [hsp["query_length"]] | 122 xml_results[hsp["query_id"]] = [hsp["query_length"]] |