# HG changeset patch # User artbio # Date 1697322896 0 # Node ID 4af77e1af12a01858f2d5ad607a2be7b7c171877 # Parent 706fe8139955c7f4c14e5f79ff46c0bce8814d2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/fetch_fasta_from_ncbi commit 1a90ac7cdeb35399011207ab43c78043fd5c7287 diff -r 706fe8139955 -r 4af77e1af12a fetch_fasta_from_NCBI.py --- a/fetch_fasta_from_NCBI.py Tue Mar 16 23:26:58 2021 +0000 +++ b/fetch_fasta_from_NCBI.py Sat Oct 14 22:34:56 2023 +0000 @@ -205,7 +205,7 @@ fasta = response.read() self.logger.debug("Did all that") response.close() - if((response_code != 200) or + if ((response_code != 200) or (b"Resource temporarily unavailable" in fasta) or (b"Error" in fasta) or (not fasta.startswith(b">"))): serverTransaction = False @@ -260,7 +260,7 @@ return querylog def sanitiser(self, db, fastaseq): - if(db not in "nuccore protein"): + if (db not in "nuccore protein"): return fastaseq regex = re.compile(r"[ACDEFGHIKLMNPQRSTVWYBZ]{49,}") sane_seqlist = [] diff -r 706fe8139955 -r 4af77e1af12a fetch_fasta_from_NCBI.xml --- a/fetch_fasta_from_NCBI.xml Tue Mar 16 23:26:58 2021 +0000 +++ b/fetch_fasta_from_NCBI.xml Sat Oct 14 22:34:56 2023 +0000 @@ -1,4 +1,4 @@ - + urllib3 @@ -64,34 +64,34 @@ - + - + - + - + - + - + @@ -105,15 +105,19 @@ **What it does** -This tool retrieves nucleotide/peptide sequences from the corresponding NCBI database (nuccore or protein) for a given entrez query. +This tool retrieves nucleotide/peptide sequences from the corresponding +NCBI database (nuccore or protein) for a given entrez query. -The tool can be set with the query "txid10239[orgn] NOT txid131567[orgn] AND complete NOT partial[title] NOT phage[title]" for metaVisitor use purpose +The tool can be set with the query +"txid10239[orgn] NOT txid131567[orgn] AND complete NOT partial[title] NOT phage[title]" +for metaVisitor use purpose See `Entrez help`_ for explanation of query formats Be sure to use the appropriate NCBI query syntax. Always use [] to specify the search fields. -By checking the checkbox you can also run your query without sequence retrieval and get the number of sequences your query will fetch. +By checking the checkbox you can also run your query without sequence +retrieval and get the number of sequences your query will fetch. Note that the tool may fail in case of interrupted connexion with the NCBI database (see the log dataset) @@ -146,16 +150,8 @@ logging level (default: INFO) ]]> -**Acknowledgments** - -This Galaxy tool has been adapted from the galaxy tool `get_fasta_from_taxon`_. - -It is Copyright © 2014-2015 `CNRS and University Pierre et Marie Curie`_ and is released under the `MIT license`_. .. _Entrez help: https://www.ncbi.nlm.nih.gov/books/NBK3837/#EntrezHelp.Entrez_Searching_Options -.. _get_fasta_from_taxon: https://toolshed.g2.bx.psu.edu/view/crs4/get_fasta_from_taxon -.. _CNRS and University Pierre et Marie Curie: http://www.ibps.upmc.fr/en -.. _MIT license: http://opensource.org/licenses/MIT