Mercurial > repos > iuc > virannot_blast2tsv
comparison blast2tsv.py @ 2:77c3ef9b0ed7 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
| author | iuc |
|---|---|
| date | Wed, 21 Aug 2024 13:13:39 +0000 |
| parents | e889010415a1 |
| children |
comparison
equal
deleted
inserted
replaced
| 1:88ebde55bef8 | 2:77c3ef9b0ed7 |
|---|---|
| 26 _write_tsv(options, hits) | 26 _write_tsv(options, hits) |
| 27 | 27 |
| 28 | 28 |
| 29 def _guess_database(accession): | 29 def _guess_database(accession): |
| 30 """Guess the correct database for querying based off the format of the accession""" | 30 """Guess the correct database for querying based off the format of the accession""" |
| 31 database_mappings_refseq = {'AC_': 'nuccore', 'NC_': 'nuccore', 'NG_': 'nuccore', | 31 if accession.isdigit(): |
| 32 'NT_': 'nuccore', 'NW_': 'nuccore', 'NZ_': 'nuccore', | 32 db = 'taxonomy' |
| 33 'AP_': 'protein', 'NP_': 'protein', 'YP_': 'protein', | 33 else: |
| 34 'XP_': 'protein', 'WP_': 'protein'} | 34 database_mappings_refseq = {'AC': 'nuccore', 'NC': 'nuccore', 'NG': 'nuccore', |
| 35 return database_mappings_refseq[accession[0:3]] | 35 'NT': 'nuccore', 'NW': 'nuccore', 'NZ': 'nuccore', |
| 36 'AP': 'protein', 'NP': 'protein', 'YP': 'protein', | |
| 37 'XP': 'protein', 'WP': 'protein', 'OX': 'nuccore'} | |
| 38 try: | |
| 39 db = database_mappings_refseq[accession[0:2]] | |
| 40 except KeyError: | |
| 41 db = 'nuccore' | |
| 42 log.warning("DB not found for " + accession + ". Set to nuccore.") | |
| 43 return db | |
| 36 | 44 |
| 37 | 45 |
| 38 def _read_xml(options): | 46 def _read_xml(options): |
| 39 """ | 47 """ |
| 40 Parse XML blast results file | 48 Parse XML blast results file |
| 67 if hit_count == 1: | 75 if hit_count == 1: |
| 68 final_hit_count = hit_count | 76 final_hit_count = hit_count |
| 69 elif hit_count > 1: | 77 elif hit_count > 1: |
| 70 final_hit_count = hit_count - 1 | 78 final_hit_count = hit_count - 1 |
| 71 hsp["evalue"] = cumul_hit_evalue / final_hit_count # The smaller the E-value, the better the match | 79 hsp["evalue"] = cumul_hit_evalue / final_hit_count # The smaller the E-value, the better the match |
| 72 hsp["query_id"] = blast_record.query_id | 80 hsp["query_id"] = blast_record.query # or query_id |
| 73 hsp["query_length"] = blast_record.query_length # length of the query | 81 hsp["query_length"] = blast_record.query_length # length of the query |
| 74 hsp["accession"] = aln.accession.replace("ref|", "") | 82 hsp["accession"] = aln.accession.replace("ref|", "") |
| 75 hsp["description"] = aln.hit_def | 83 hsp["description"] = aln.hit_def |
| 76 hsp["hit_length"] = aln.length # length of the hit | 84 hsp["hit_length"] = aln.length # length of the hit |
| 77 hsp["hsp_length"] = hit.align_length # length of the hsp alignment | 85 hsp["hsp_length"] = hit.align_length # length of the hsp alignment |
| 99 hsp["organism"] = taxonomy[-1] | 107 hsp["organism"] = taxonomy[-1] |
| 100 except RuntimeError: | 108 except RuntimeError: |
| 101 hsp["tax_id"] = "" | 109 hsp["tax_id"] = "" |
| 102 hsp["taxonomy"] = "" | 110 hsp["taxonomy"] = "" |
| 103 hsp["organism"] = "" | 111 hsp["organism"] = "" |
| 104 log.warning("RuntimeError - Taxid not found for " + hsp["accession"]) | 112 log.warning(f"RuntimeError - Taxid not found for {hsp['accession']}") |
| 113 except Exception as err: | |
| 114 hsp["tax_id"] = "" | |
| 115 hsp["taxonomy"] = "" | |
| 116 hsp["organism"] = "" | |
| 117 log.warning(f"Taxid not found for {hsp['accession']}. The error is {err}") | |
| 105 if hsp["evalue"] <= options.max_evalue and hsp["queryOverlap"] >= options.min_qov and \ | 118 if hsp["evalue"] <= options.max_evalue and hsp["queryOverlap"] >= options.min_qov and \ |
| 106 hsp["hitOverlap"] >= options.min_hov and hsp["score"] >= options.min_score: | 119 hsp["hitOverlap"] >= options.min_hov and hsp["score"] >= options.min_score: |
| 107 xml_results[hsp["query_id"]] = hsp | 120 xml_results[hsp["query_id"]] = hsp |
| 108 else: | 121 else: |
| 109 xml_results[hsp["query_id"]] = [hsp["query_length"]] | 122 xml_results[hsp["query_id"]] = [hsp["query_length"]] |
