changeset 5:265c35540faa draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
author galaxyp
date Fri, 04 Nov 2022 15:08:37 +0000
parents 12692567c7f9
children a371252a2cf6
files macros.xml uniprotxml_downloader.py uniprotxml_downloader.xml
diffstat 3 files changed, 44 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Fri Nov 04 15:08:37 2022 +0000
@@ -0,0 +1,8 @@
+<macros>
+    <xml name="query_field">
+        <param name="field" type="select" label="Field">
+            <option value="taxonomy_name">Taxonomy Name</option>
+            <option value="taxonomy_id">Taxonomy ID</option>
+        </param>
+    </xml>
+</macros>
--- a/uniprotxml_downloader.py	Tue Jun 01 11:54:47 2021 +0000
+++ b/uniprotxml_downloader.py	Fri Nov 04 15:08:37 2022 +0000
@@ -52,6 +52,7 @@
     parser.add_option('-t', '--taxon', dest='taxon', action='append', default=[], help='NCBI taxon ID to download')
     parser.add_option('-r', '--reviewed', dest='reviewed', help='Only uniprot reviewed entries')
     parser.add_option('-f', '--format', dest='format', choices=['xml', 'fasta'], default='xml', help='output format')
+    parser.add_option('-k', '--field', dest='field', choices=['taxonomy_name', 'taxonomy_id'], default='taxonomy_name', help='query field')
     parser.add_option('-o', '--output', dest='output', help='file path for the downloaded uniprot xml')
     parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr')
     (options, args) = parser.parse_args()
@@ -66,7 +67,7 @@
                     taxid = fields[options.column].strip()
                     if taxid:
                         taxids.add(taxid)
-    taxon_queries = ['taxonomy:"%s"' % taxid for taxid in taxids]
+    taxon_queries = [f'{options.field}:"{taxid}"' for taxid in taxids]
     taxon_query = ' OR '.join(taxon_queries)
     if options.output:
         dest_path = options.output
@@ -74,20 +75,26 @@
         dest_path = "uniprot_%s.xml" % '_'.join(taxids)
     reviewed = " reviewed:%s" % options.reviewed if options.reviewed else ''
     try:
-        url = 'https://www.uniprot.org/uniprot/'
+        url = 'https://rest.uniprot.org/uniprotkb/stream'
         query = "%s%s" % (taxon_query, reviewed)
-        params = {'query': query, 'force': 'yes', 'format': options.format}
+        params = {'query': query, 'format': options.format}
         if options.debug:
             print("%s ? %s" % (url, params), file=sys.stderr)
         data = parse.urlencode(params)
-        print(f"Retrieving: {url+data}")
+        print(f"Retrieving: {url}?{data}")
         adapter = TimeoutHTTPAdapter(max_retries=retry_strategy)
+
         http = requests.Session()
         http.mount("https://", adapter)
-        response = http.post(url, data=params)
+        response = http.get(url, params=params)
         http.close()
+
+        if response.status_code != 200:
+            exit(f"Request failed with status code {response.status_code}:\n{response.text}")
+
         with open(dest_path, 'w') as fh:
             fh.write(response.text)
+
         if options.format == 'xml':
             with open(dest_path, 'r') as contents:
                 while True:
--- a/uniprotxml_downloader.xml	Tue Jun 01 11:54:47 2021 +0000
+++ b/uniprotxml_downloader.xml	Fri Nov 04 15:08:37 2022 +0000
@@ -1,5 +1,8 @@
-<tool id="uniprotxml_downloader" name="UniProt" version="2.2.0" profile="21.01">
+<tool id="uniprotxml_downloader" name="UniProt" version="2.3.0" profile="21.01">
     <description>download proteome as XML or fasta</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
     <requirements>
         <requirement type="package" version="2.25.1">requests</requirement>
     </requirements>
@@ -11,14 +14,17 @@
 python '$__tool_directory__/uniprotxml_downloader.py'
 #if $taxid.input_choice == 'common':
     --taxon $taxid.organism
+    --field taxonomy_id
     #if $taxid.reviewed:
         --reviewed=$taxid.reviewed
     #end if
 #elif $taxid.input_choice == 'taxids':
+    --field $taxid.field
     #for $id in $taxid.taxons.split(','):
         -t '$id'
     #end for
 #elif $taxid.input_choice == 'history':
+    --field $taxid.field
     --input='${taxid.taxon_file}'
     --column=#echo int(str($taxid.column)) - 1#
 #end if
@@ -58,10 +64,12 @@
                        help="Enter one or more Organsim IDs (separated by commas) from http://www.uniprot.org/proteomes/">
                     <validator type="regex" message="OrganismID[,OrganismID]">^\w+( \w+)*(,\w+( \w+)*)*$</validator>
                 </param>
+                <expand macro="query_field"/>
             </when>
             <when value="history">
                 <param name="taxon_file" type="data" format="tabular,txt" label="Dataset (tab separated) with Taxon ID/Name column"/>
                 <param name="column" type="data_column" data_ref="taxon_file" label="Column with Taxon ID/name"/>
+                <expand macro="query_field"/>
             </when>
         </conditional>
         <param name="format" type="select" label="uniprot output format">
@@ -89,7 +97,20 @@
         </test>
         <test>
             <param name="input_choice" value="taxids"/>
+            <param name="taxons" value="765963,512562"/>
+            <param name="field" value="taxonomy_id"/>
+            <param name="format" value="fasta"/>
+            <output name="proteome">
+                <assert_contents>
+                    <has_text text="Shi470" />
+                    <has_text text="PeCan4" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="input_choice" value="taxids"/>
             <param name="taxons" value="Shi470,PeCan4"/>
+            <param name="field" value="taxonomy_name"/>
             <param name="format" value="fasta"/>
             <output name="proteome">
                 <assert_contents>
@@ -102,6 +123,7 @@
             <param name="input_choice" value="history"/>
             <param name="taxon_file" value="Helicobacter_strains.tsv" ftype="tabular"/>
             <param name="column" value="1"/>
+            <param name="field" value="taxonomy_name"/>
             <param name="format" value="fasta"/>
             <output name="proteome">
                 <assert_contents>
@@ -114,6 +136,7 @@
             <param name="input_choice" value="history"/>
             <param name="taxon_file" value="Helicobacter_strains_ids.tsv" ftype="tabular"/>
             <param name="column" value="2"/>
+            <param name="field" value="taxonomy_id"/>
             <param name="format" value="fasta"/>
             <output name="proteome">
                 <assert_contents>