Mercurial > repos > galaxyp > uniprotxml_downloader
changeset 5:265c35540faa draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
author | galaxyp |
---|---|
date | Fri, 04 Nov 2022 15:08:37 +0000 |
parents | 12692567c7f9 |
children | a371252a2cf6 |
files | macros.xml uniprotxml_downloader.py uniprotxml_downloader.xml |
diffstat | 3 files changed, 44 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Nov 04 15:08:37 2022 +0000 @@ -0,0 +1,8 @@ +<macros> + <xml name="query_field"> + <param name="field" type="select" label="Field"> + <option value="taxonomy_name">Taxonomy Name</option> + <option value="taxonomy_id">Taxonomy ID</option> + </param> + </xml> +</macros>
--- a/uniprotxml_downloader.py Tue Jun 01 11:54:47 2021 +0000 +++ b/uniprotxml_downloader.py Fri Nov 04 15:08:37 2022 +0000 @@ -52,6 +52,7 @@ parser.add_option('-t', '--taxon', dest='taxon', action='append', default=[], help='NCBI taxon ID to download') parser.add_option('-r', '--reviewed', dest='reviewed', help='Only uniprot reviewed entries') parser.add_option('-f', '--format', dest='format', choices=['xml', 'fasta'], default='xml', help='output format') + parser.add_option('-k', '--field', dest='field', choices=['taxonomy_name', 'taxonomy_id'], default='taxonomy_name', help='query field') parser.add_option('-o', '--output', dest='output', help='file path for the downloaded uniprot xml') parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr') (options, args) = parser.parse_args() @@ -66,7 +67,7 @@ taxid = fields[options.column].strip() if taxid: taxids.add(taxid) - taxon_queries = ['taxonomy:"%s"' % taxid for taxid in taxids] + taxon_queries = [f'{options.field}:"{taxid}"' for taxid in taxids] taxon_query = ' OR '.join(taxon_queries) if options.output: dest_path = options.output @@ -74,20 +75,26 @@ dest_path = "uniprot_%s.xml" % '_'.join(taxids) reviewed = " reviewed:%s" % options.reviewed if options.reviewed else '' try: - url = 'https://www.uniprot.org/uniprot/' + url = 'https://rest.uniprot.org/uniprotkb/stream' query = "%s%s" % (taxon_query, reviewed) - params = {'query': query, 'force': 'yes', 'format': options.format} + params = {'query': query, 'format': options.format} if options.debug: print("%s ? %s" % (url, params), file=sys.stderr) data = parse.urlencode(params) - print(f"Retrieving: {url+data}") + print(f"Retrieving: {url}?{data}") adapter = TimeoutHTTPAdapter(max_retries=retry_strategy) + http = requests.Session() http.mount("https://", adapter) - response = http.post(url, data=params) + response = http.get(url, params=params) http.close() + + if response.status_code != 200: + exit(f"Request failed with status code {response.status_code}:\n{response.text}") + with open(dest_path, 'w') as fh: fh.write(response.text) + if options.format == 'xml': with open(dest_path, 'r') as contents: while True:
--- a/uniprotxml_downloader.xml Tue Jun 01 11:54:47 2021 +0000 +++ b/uniprotxml_downloader.xml Fri Nov 04 15:08:37 2022 +0000 @@ -1,5 +1,8 @@ -<tool id="uniprotxml_downloader" name="UniProt" version="2.2.0" profile="21.01"> +<tool id="uniprotxml_downloader" name="UniProt" version="2.3.0" profile="21.01"> <description>download proteome as XML or fasta</description> + <macros> + <import>macros.xml</import> + </macros> <requirements> <requirement type="package" version="2.25.1">requests</requirement> </requirements> @@ -11,14 +14,17 @@ python '$__tool_directory__/uniprotxml_downloader.py' #if $taxid.input_choice == 'common': --taxon $taxid.organism + --field taxonomy_id #if $taxid.reviewed: --reviewed=$taxid.reviewed #end if #elif $taxid.input_choice == 'taxids': + --field $taxid.field #for $id in $taxid.taxons.split(','): -t '$id' #end for #elif $taxid.input_choice == 'history': + --field $taxid.field --input='${taxid.taxon_file}' --column=#echo int(str($taxid.column)) - 1# #end if @@ -58,10 +64,12 @@ help="Enter one or more Organsim IDs (separated by commas) from http://www.uniprot.org/proteomes/"> <validator type="regex" message="OrganismID[,OrganismID]">^\w+( \w+)*(,\w+( \w+)*)*$</validator> </param> + <expand macro="query_field"/> </when> <when value="history"> <param name="taxon_file" type="data" format="tabular,txt" label="Dataset (tab separated) with Taxon ID/Name column"/> <param name="column" type="data_column" data_ref="taxon_file" label="Column with Taxon ID/name"/> + <expand macro="query_field"/> </when> </conditional> <param name="format" type="select" label="uniprot output format"> @@ -89,7 +97,20 @@ </test> <test> <param name="input_choice" value="taxids"/> + <param name="taxons" value="765963,512562"/> + <param name="field" value="taxonomy_id"/> + <param name="format" value="fasta"/> + <output name="proteome"> + <assert_contents> + <has_text text="Shi470" /> + <has_text text="PeCan4" /> + </assert_contents> + </output> + </test> + <test> + <param name="input_choice" value="taxids"/> <param name="taxons" value="Shi470,PeCan4"/> + <param name="field" value="taxonomy_name"/> <param name="format" value="fasta"/> <output name="proteome"> <assert_contents> @@ -102,6 +123,7 @@ <param name="input_choice" value="history"/> <param name="taxon_file" value="Helicobacter_strains.tsv" ftype="tabular"/> <param name="column" value="1"/> + <param name="field" value="taxonomy_name"/> <param name="format" value="fasta"/> <output name="proteome"> <assert_contents> @@ -114,6 +136,7 @@ <param name="input_choice" value="history"/> <param name="taxon_file" value="Helicobacter_strains_ids.tsv" ftype="tabular"/> <param name="column" value="2"/> + <param name="field" value="taxonomy_id"/> <param name="format" value="fasta"/> <output name="proteome"> <assert_contents>