comparison dbbuilder.xml @ 11:8e637098a8ab draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/dbbuilder commit 16ba4570b04301b774ee0420694f379cc640744b
author galaxyp
date Tue, 27 Sep 2022 13:22:04 +0000
parents e9df53a75f3c
children 983bf725dfc2
comparison
equal deleted inserted replaced
10:e9df53a75f3c 11:8e637098a8ab
1 <tool id="dbbuilder" name="Protein Database Downloader" version="0.3.2"> 1 <tool id="dbbuilder" name="Protein Database Downloader" version="0.3.3">
2 <description></description> 2 <description></description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="1.20.1">wget</requirement> 4 <requirement type="package" version="1.20.1">wget</requirement>
5 <requirement type="package" version="3.8">python</requirement>
6 <requirement type="package" version="2.20.1">requests</requirement>
5 </requirements> 7 </requirements>
6 <stdio> 8 <stdio>
7 <exit_code range="1:" level="fatal" description="Error downloading database." /> 9 <exit_code range="1:" level="fatal" description="Error downloading database." />
8 <regex match="ERROR" level="fatal" source="stderr" description="Error downloading database." /> 10 <regex match="ERROR" level="fatal" source="stderr" description="Error downloading database." />
9 </stdio> 11 </stdio>
12 <!-- http://maxquant.org/contaminants.zip --> 14 <!-- http://maxquant.org/contaminants.zip -->
13 <!-- ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.protein.faa.gz--> 15 <!-- ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.protein.faa.gz-->
14 <command> 16 <command>
15 <![CDATA[ 17 <![CDATA[
16 #if $source.from == "uniprot" 18 #if $source.from == "uniprot"
17 #set $url = 'http://www.uniprot.org/uniprot/?query=taxonomy:"' + str($source.taxon) + '"' + str($source.set) + str($source.reviewed) + '&force=yes&format=fasta' + str($source.include_isoform) 19 #if $source.set:
18 #set $type = "direct" 20 #set $modified_set = '&' + str($source.set)
21 #else
22 #set $modified_set = ''
23 #end if
24 #if $source.taxon_id
25 #set $taxon_id = $source.taxon_id
26 #else
27 #set $taxon_id = $source.taxon
28 #end if
29 #set $url = 'https://rest.uniprot.org/uniprotkb/stream?compressed=true&format=fasta&query=taxonomy_id:"' + str($taxon_id) + '"' + str($modified_set) + str($source.reviewed) + str($source.include_isoform)
30 #set $type = "uniprotkb_stream"
19 #elif $source.from == "cRAP" 31 #elif $source.from == "cRAP"
20 ##set $url = "ftp://ftp.thegpm.org/fasta/cRAP/crap.fasta" 32 ##set $url = "ftp://ftp.thegpm.org/fasta/cRAP/crap.fasta"
21 #set $url = "https://raw.githubusercontent.com/pravs3683/cRAP/master/cRAP_protein_database.fasta" 33 #set $url = "https://raw.githubusercontent.com/pravs3683/cRAP/master/cRAP_protein_database.fasta"
22 #set $type = "direct" 34 #set $type = "direct"
23 #elif $source.from == "HMP" 35 #elif $source.from == "HMP"
32 #end if 44 #end if
33 #elif $source.from == "url" 45 #elif $source.from == "url"
34 #set $url = $source.url 46 #set $url = $source.url
35 #set $type = $source.archive_type 47 #set $type = $source.archive_type
36 #end if 48 #end if
37 #if $type =="direct" 49 #if $type =="uniprotkb_stream"
50 python '$__tool_directory__/uniprotkb.py' --url '$url' -o 'tmp.gz' && gzip -dc 'tmp.gz' > '${output_database}'
51 #elif $type =="direct"
38 wget -nv '$url' -O '${output_database}' --no-check-certificate 52 wget -nv '$url' -O '${output_database}' --no-check-certificate
39 #elif $type =="zip" 53 #elif $type =="zip"
40 wget -nv '$url' -O tmp.zip --no-check-certificate && zcat -c tmp.zip > '${output_database}' 54 wget -nv '$url' -O tmp.zip --no-check-certificate && zcat -c tmp.zip > '${output_database}'
41 #elif $type =="gzip" 55 #elif $type =="gzip"
42 wget -nv '$url' -O tmp.gz --no-check-certificate && (if `command -v gzcat > /dev/null`; then gzcat tmp.gz; else zcat tmp.gz ; fi) > '${output_database}' 56 wget -nv '$url' -O tmp.gz --no-check-certificate && (if `command -v gzcat > /dev/null`; then gzcat tmp.gz; else zcat tmp.gz ; fi) > '${output_database}'
49 #end if 63 #end if
50 ]]> 64 ]]>
51 </command> 65 </command>
52 <inputs> 66 <inputs>
53 <conditional name="source"> 67 <conditional name="source">
54 <param name="from" type="select" label="Download from" help="Select database source. cRAP acts as a database for common MS contaminants. UniProtKB is a cross species collection of functional protein databases"> 68 <param name="from" type="select" label="Download from"
69 help="Select database source. cRAP acts as a database for common MS contaminants. UniProtKB is a cross species collection of functional protein databases">
55 <option value="uniprot">UniProtKB</option> 70 <option value="uniprot">UniProtKB</option>
56 <option value="cRAP">cRAP (contaminants)</option> 71 <option value="cRAP">cRAP (contaminants)</option>
57 <option value="HMP">Human Microbiome Project body sites</option> 72 <option value="HMP">Human Microbiome Project body sites</option>
58 <option value="HOMD">Human Oral Microbiome Database (HOMD)</option> 73 <option value="HOMD">Human Oral Microbiome Database (HOMD)</option>
59 <option value="url">Custom URL</option> 74 <option value="url">Custom URL</option>
62 <param name="taxon" type="select" format="text" help="select species for protein database"> 77 <param name="taxon" type="select" format="text" help="select species for protein database">
63 <label>Taxonomy</label> 78 <label>Taxonomy</label>
64 <options from_file="uniprot_taxons.loc"> 79 <options from_file="uniprot_taxons.loc">
65 <column name="name" index="0" /> 80 <column name="name" index="0" />
66 <column name="value" index="1" /> 81 <column name="value" index="1" />
82 <filter type="add_value" name="Escherichia coli (strain K12)" value="83333" />
67 </options> 83 </options>
68 </param> 84 </param>
85 <param name="taxon_id" type="integer" value="" min="1" optional="true" help="Specify a NCBI taxon id to override species selection"/>
69 <param name="reviewed" type="select" help="UniProtKB/TrEMBL (unreviewed)is a large, automatically annotated database- may contain redundant sequences, but there is a higher chance peptides will be identified. UniProtKB/Swiss-Prot (reviewed) is a smaller, manually annotated database- less of a chance peptides will be identified but less sequence redundancy"> 86 <param name="reviewed" type="select" help="UniProtKB/TrEMBL (unreviewed)is a large, automatically annotated database- may contain redundant sequences, but there is a higher chance peptides will be identified. UniProtKB/Swiss-Prot (reviewed) is a smaller, manually annotated database- less of a chance peptides will be identified but less sequence redundancy">
70 <option value="+">UniProtKB</option> 87 <option value="">UniProtKB</option>
71 <option value="+reviewed%3Ayes">UniProtKB/Swiss-Prot (reviewed only)</option> 88 <option value="+reviewed%3Atrue">UniProtKB/Swiss-Prot (reviewed only)</option>
72 <option value="+reviewed%3Ano">UniProtKB/TrEMBL (unreviewed only)</option> 89 <option value="+reviewed%3Afalse">UniProtKB/TrEMBL (unreviewed only)</option>
73 <sanitizer> 90 <sanitizer>
74 <valid> 91 <valid>
75 <add value="%"/> 92 <add value="%"/>
76 </valid> 93 </valid>
77 </sanitizer> 94 </sanitizer>
78 </param> 95 </param>
79 <param name="set" type="select" label="Proteome Set"> 96 <param name="set" type="select" label="Proteome Set">
80 <option value="+">Any</option> 97 <option value="">Any</option>
81 <option value="+keyword%3a1185" selected="true">Reference Proteome Set</option> 98 <option value="keyword%3aKW-1185" selected="true">Reference Proteome Set</option>
82 <sanitizer> 99 <sanitizer>
83 <valid> 100 <valid>
84 <add value="%"/> 101 <add value="%"/>
85 </valid> 102 </valid>
86 </sanitizer> 103 </sanitizer>
87 </param> 104 </param>
88 <param name="include_isoform" type="boolean" truevalue="&amp;include=yes" falsevalue="" label="Include isoform data" help="several different forms of a given protein are incorporated into database" /> 105 <param name="include_isoform" type="boolean" truevalue="&amp;includeIsoform=true" falsevalue=""
106 label="Include isoform data" help="several different forms of a given protein are incorporated into database" />
89 </when> 107 </when>
90 <when value="cRAP" /> 108 <when value="cRAP" />
91 <when value="HMP"> 109 <when value="HMP">
92 <param name="site" type="select" label="Proteome for body site"> 110 <param name="site" type="select" label="Proteome for body site">
93 <option value="Airways">HMP airways</option> 111 <option value="Airways">HMP airways</option>
127 <outputs> 145 <outputs>
128 <data format="fasta" name="output_database" label="Protein Database ${source.from}" /> 146 <data format="fasta" name="output_database" label="Protein Database ${source.from}" />
129 </outputs> 147 </outputs>
130 <tests> 148 <tests>
131 <test> 149 <test>
132 <param name="from" value="cRAP" /> 150 <conditional name="source">
151 <param name="from" value="cRAP" />
152 </conditional>
133 <output name="output_database"> 153 <output name="output_database">
134 <assert_contents> 154 <assert_contents>
135 <has_text text="KKA1_ECOLX" /> 155 <has_text text="KKA1_ECOLX" />
136 </assert_contents> 156 </assert_contents>
137 </output> 157 </output>
138 </test> 158 </test>
159 <test>
160 <conditional name="source">
161 <param name="from" value="uniprot" />
162 <param name="taxon" value="83333"/>
163 <param name="taxon_id" value="2697049"/>
164 </conditional>
165 <output name="output_database">
166 <assert_contents>
167 <has_text text="SPIKE_SARS2" />
168 </assert_contents>
169 </output>
170 </test>
171 <test>
172 <conditional name="source">
173 <param name="from" value="uniprot" />
174 <param name="taxon_id" value="2697049"/>
175 <param name="reviewed" value="+reviewed%3Atrue"/>
176 <param name="set" value=""/>
177 </conditional>
178 <output name="output_database">
179 <assert_contents>
180 <has_text text=">sp|P0DTC1|R1A_SARS2" />
181 <not_has_text text=">tr|A0A679G4D8|A0A679G4D8_SARS2" />
182 </assert_contents>
183 </output>
184 </test>
185 <test>
186 <conditional name="source">
187 <param name="from" value="uniprot" />
188 <param name="taxon_id" value="2697049"/>
189 <param name="reviewed" value="+reviewed%3Afalse"/>
190 <param name="set" value=""/>
191 </conditional>
192 <output name="output_database">
193 <assert_contents>
194 <has_text text=">tr|A0A679G4D8|A0A679G4D8_SARS2" />
195 <not_has_text text=">sp|P0DTC1|R1A_SARS2" />
196 </assert_contents>
197 </output>
198 </test>
199
139 <test> 200 <test>
140 <param name="from" value="url" /> 201 <param name="from" value="url" />
141 <param name="url" value="https://raw.githubusercontent.com/pravs3683/cRAP/master/cRAP_protein_database.fasta" /> 202 <param name="url" value="https://raw.githubusercontent.com/pravs3683/cRAP/master/cRAP_protein_database.fasta" />
142 <param name="archive_type" value="direct" /> 203 <param name="archive_type" value="direct" />
143 <output name="output_database"> 204 <output name="output_database">