Mercurial > repos > iuc > virannot_blast2tsv
diff blast2tsv.py @ 2:77c3ef9b0ed7 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
author | iuc |
---|---|
date | Wed, 21 Aug 2024 13:13:39 +0000 |
parents | e889010415a1 |
children |
line wrap: on
line diff
--- a/blast2tsv.py Sat May 18 18:14:54 2024 +0000 +++ b/blast2tsv.py Wed Aug 21 13:13:39 2024 +0000 @@ -28,11 +28,19 @@ def _guess_database(accession): """Guess the correct database for querying based off the format of the accession""" - database_mappings_refseq = {'AC_': 'nuccore', 'NC_': 'nuccore', 'NG_': 'nuccore', - 'NT_': 'nuccore', 'NW_': 'nuccore', 'NZ_': 'nuccore', - 'AP_': 'protein', 'NP_': 'protein', 'YP_': 'protein', - 'XP_': 'protein', 'WP_': 'protein'} - return database_mappings_refseq[accession[0:3]] + if accession.isdigit(): + db = 'taxonomy' + else: + database_mappings_refseq = {'AC': 'nuccore', 'NC': 'nuccore', 'NG': 'nuccore', + 'NT': 'nuccore', 'NW': 'nuccore', 'NZ': 'nuccore', + 'AP': 'protein', 'NP': 'protein', 'YP': 'protein', + 'XP': 'protein', 'WP': 'protein', 'OX': 'nuccore'} + try: + db = database_mappings_refseq[accession[0:2]] + except KeyError: + db = 'nuccore' + log.warning("DB not found for " + accession + ". Set to nuccore.") + return db def _read_xml(options): @@ -69,7 +77,7 @@ elif hit_count > 1: final_hit_count = hit_count - 1 hsp["evalue"] = cumul_hit_evalue / final_hit_count # The smaller the E-value, the better the match - hsp["query_id"] = blast_record.query_id + hsp["query_id"] = blast_record.query # or query_id hsp["query_length"] = blast_record.query_length # length of the query hsp["accession"] = aln.accession.replace("ref|", "") hsp["description"] = aln.hit_def @@ -101,7 +109,12 @@ hsp["tax_id"] = "" hsp["taxonomy"] = "" hsp["organism"] = "" - log.warning("RuntimeError - Taxid not found for " + hsp["accession"]) + log.warning(f"RuntimeError - Taxid not found for {hsp['accession']}") + except Exception as err: + hsp["tax_id"] = "" + hsp["taxonomy"] = "" + hsp["organism"] = "" + log.warning(f"Taxid not found for {hsp['accession']}. The error is {err}") if hsp["evalue"] <= options.max_evalue and hsp["queryOverlap"] >= options.min_qov and \ hsp["hitOverlap"] >= options.min_hov and hsp["score"] >= options.min_score: xml_results[hsp["query_id"]] = hsp