3
|
1 <tool id="unipept" name="Unipept" version="2.0.1">
|
0
|
2 <description>retrieve taxonomy for peptides</description>
|
|
3 <macros>
|
|
4 <xml name="equate_il">
|
1
|
5 <param name="equate_il" type="boolean" truevalue="-e" falsevalue="" checked="true" label="Equate isoleucine and leucine">
|
0
|
6 <help>isoleucine (I) and leucine (L) are equated when matching tryptic peptides to UniProt records</help>
|
3
|
7 </param>
|
0
|
8 </xml>
|
|
9 <xml name="extra">
|
|
10 <param name="extra" type="boolean" truevalue="-x" falsevalue="" checked="false" label="retrieve extra information">
|
|
11 <yield/>
|
3
|
12 </param>
|
|
13 </xml>
|
|
14 <xml name="extra_true">
|
|
15 <param name="extra" type="boolean" truevalue="-x" falsevalue="" checked="true" label="retrieve extra information">
|
|
16 <yield/>
|
|
17 </param>
|
0
|
18 </xml>
|
|
19 <xml name="names">
|
|
20 <param name="names" type="boolean" truevalue="-n" falsevalue="" checked="true" label="names" >
|
1
|
21 <help>return the names in complete taxonomic lineage</help>
|
3
|
22 </param>
|
1
|
23 <param name="allfields" type="boolean" truevalue="-A" falsevalue="" checked="false" label="allfields" >
|
|
24 <help>include fields for most specific taxonomic classification: taxon_rank,taxon_id,taxon_name before lineage</help>
|
3
|
25 </param>
|
0
|
26 </xml>
|
|
27 </macros>
|
|
28 <requirements>
|
3
|
29 <requirement type="package" version="2.7">python</requirement>
|
0
|
30 </requirements>
|
|
31 <stdio>
|
|
32 <exit_code range="1:" />
|
|
33 </stdio>
|
3
|
34 <command><![CDATA[
|
|
35 python '$__tool_directory__/unipept.py'
|
0
|
36 --api=$unipept.api
|
|
37 $unipept.equate_il $unipept.extra
|
|
38 #if $unipept.api != 'pept2prot':
|
1
|
39 $unipept.names $unipept.allfields
|
0
|
40 #end if
|
|
41 $strict
|
|
42 #if str($peptide_src.fmt) == 'proteomic':
|
|
43 #if $peptide_src.input.datatype.file_ext == 'fasta':
|
|
44 --fasta="$peptide_src.input"
|
|
45 #elif $peptide_src.input.datatype.file_ext == 'mzid':
|
|
46 --mzid="$peptide_src.input"
|
|
47 #elif $peptide_src.input.datatype.file_ext == 'pepxml':
|
|
48 --pepxml="$peptide_src.input"
|
|
49 #end if
|
|
50 #elif str($peptide_src.fmt) == 'tabular':
|
|
51 --tabular="$peptide_src.input_tsv"
|
|
52 #set $col = int(str($peptide_src.column)) - 1
|
|
53 --column=$col
|
|
54 #elif str($peptide_src.fmt) == 'fasta':
|
|
55 --fasta="$peptide_src.input_fasta"
|
|
56 #elif str($peptide_src.fmt) == 'mzid':
|
|
57 --mzid="$peptide_src.input_mzid"
|
|
58 #elif str($peptide_src.fmt) == 'pepxml':
|
|
59 --pepxml="$peptide_src.input_pepxml"
|
|
60 #end if
|
3
|
61 #if 'json' in str($outputs).split(',') and str($unipept.api) != 'pept2prot':
|
0
|
62 --json $output_json
|
|
63 #end if
|
|
64 #if 'tsv' in str($outputs).split(','):
|
|
65 --tsv $output_tsv
|
|
66 #end if
|
|
67 #if 'csv' in str($outputs).split(','):
|
|
68 --csv $output_csv
|
|
69 #end if
|
1
|
70 #if 'unmatched' in str($outputs).split(','):
|
|
71 --unmatched $output_unmatched
|
0
|
72 #end if
|
|
73 ]]></command>
|
|
74 <inputs>
|
|
75 <conditional name="unipept">
|
|
76 <param name="api" type="select" label="Unipept application" >
|
1
|
77 <option value="pept2lca" selected="true">pept2lca: lowest common ancestor</option>
|
|
78 <option value="pept2taxa">pept2taxa: organisms associated with the UniProt entries containing a given tryptic peptide</option>
|
0
|
79 <option value="pept2prot">pept2prot: UniProt entries containing a given tryptic peptide</option>
|
|
80 </param>
|
1
|
81 <when value="pept2lca">
|
|
82 <expand macro="equate_il" />
|
|
83 <expand macro="extra">
|
|
84 <help>Return the complete lineage of the taxonomic lowest common ancestor, and include ID fields.</help>
|
|
85 </expand>
|
|
86 <expand macro="names" />
|
|
87 </when>
|
0
|
88 <when value="pept2taxa">
|
|
89 <expand macro="equate_il" />
|
3
|
90 <expand macro="extra_true">
|
1
|
91 <help>Return the complete lineage of each organism, and include ID fields.</help>
|
0
|
92 </expand>
|
|
93 <expand macro="names" />
|
|
94 </when>
|
|
95 <when value="pept2prot">
|
|
96 <expand macro="equate_il" />
|
|
97 <expand macro="extra">
|
|
98 <help>Return additional information fields: taxon_name, ec_references, go_references, refseq_ids, refseq_protein_ids, insdc_ids, insdc_protein_ids
|
|
99 WARNING: Huge perfomance penalty! Only use for small number of peptides when the extra infomation is required.
|
|
100 </help>
|
|
101 </expand>
|
|
102 </when>
|
|
103 </conditional>
|
|
104 <conditional name="peptide_src">
|
|
105 <param name="fmt" type="select" label="Peptides input format" >
|
|
106 <option value="proteomic">proteomics formats: mzid, pepxml, fasta</option>
|
|
107 <option value="tabular">tabular</option>
|
|
108 <option value="fasta">fasta</option>
|
|
109 <option value="mzid">mzid</option>
|
|
110 <option value="pepxml">pepxml</option>
|
|
111 </param>
|
|
112 <when value="proteomic">
|
|
113 <param name="input" type="data" format="mzid,pepxml,fasta" label="Peptide Input" />
|
|
114 </when>
|
|
115 <when value="tabular">
|
|
116 <param name="input_tsv" type="data" format="tabular" label="Tabular Input Containing Peptide column" />
|
|
117 <param name="column" label="Select column with peptides" type="data_column" numerical="false" data_ref="input_tsv" />
|
|
118 </when>
|
|
119 <when value="fasta">
|
|
120 <param name="input_fasta" type="data" format="fasta" label="Peptide Fasta Input" />
|
|
121 </when>
|
|
122 <when value="mzid">
|
|
123 <param name="input_mzid" type="data" format="mzid" label="mzIndetML Input" />
|
|
124 </when>
|
|
125 <when value="pepxml">
|
|
126 <param name="input_pepxml" type="data" format="pepxml" label="mzIndetML Input" />
|
|
127 </when>
|
|
128 </conditional>
|
|
129 <param name="outputs" type="select" multiple="true" display="checkboxes" label="Choose outputs">
|
|
130 <option value="tsv" selected="true">tabular</option>
|
|
131 <option value="csv">Comma Separated Values (.csv)</option>
|
3
|
132 <option value="json">JSON Taxomony Tree (ignored for pept2prot)</option>
|
1
|
133 <option value="unmatched">Unmatched peptides</option>
|
0
|
134 </param>
|
|
135 <param name="strict" type="boolean" truevalue="--strict" falsevalue="" checked="false" label="Exit with error on invalid peptides, otherwise ignore them"/>
|
|
136 </inputs>
|
|
137 <outputs>
|
3
|
138 <data name="output_json" format="d3_hierarchy" label="${tool.name} ${unipept.api} on ${on_string} json">
|
|
139 <filter>'json' in outputs and unipept['api'] != 'pept2prot'</filter>
|
|
140 <change_format>
|
|
141 <when input="api" value="pept2prot" format="json" />
|
|
142 </change_format>
|
0
|
143 </data>
|
|
144 <data name="output_tsv" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} tsv">
|
|
145 <filter>'tsv' in outputs</filter>
|
|
146 </data>
|
|
147 <data name="output_csv" format="csv" label="${tool.name} ${unipept.api} on ${on_string} csv">
|
|
148 <filter>'csv' in outputs</filter>
|
|
149 </data>
|
1
|
150 <data name="output_unmatched" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} unmatched">
|
|
151 <filter>'unmatched' in outputs</filter>
|
0
|
152 </data>
|
|
153 </outputs>
|
|
154 <tests>
|
|
155 <test>
|
1
|
156 <param name="api" value="pept2lca"/>
|
0
|
157 <param name="fmt" value="tabular"/>
|
1
|
158 <param name="input_tsv" value="tryptic.tsv"/>
|
0
|
159 <param name="column" value="2"/>
|
|
160 <param name="extra" value="True"/>
|
|
161 <param name="names" value="True"/>
|
1
|
162 <param name="outputs" value="tsv,unmatched"/>
|
0
|
163 <output name="output_tsv">
|
|
164 <assert_contents>
|
1
|
165 <has_text text="Homininae" />
|
0
|
166 </assert_contents>
|
|
167 </output>
|
1
|
168 <output name="output_unmatched">
|
0
|
169 <assert_contents>
|
1
|
170 <has_text text="QTAMAV" />
|
0
|
171 </assert_contents>
|
|
172 </output>
|
|
173 </test>
|
|
174 <test>
|
1
|
175 <param name="api" value="pept2lca"/>
|
0
|
176 <param name="fmt" value="fasta"/>
|
1
|
177 <param name="input_fasta" value="peptide.fa"/>
|
0
|
178 <param name="equate_il" value="True"/>
|
|
179 <param name="extra" value="True"/>
|
|
180 <param name="names" value="True"/>
|
1
|
181 <param name="outputs" value="json,tsv"/>
|
0
|
182 <output name="output_json">
|
|
183 <assert_contents>
|
1
|
184 <has_text text="VMDVNDHKPEFYNCSLPACTFTPEEAQVNFTGYVDEHASPHIPIDDLTMVVYDPDKGSNGTFLLSLGGPDAEAFSVSPERAAGSASVQVLVRVSALVDYERQTAMAV" />
|
|
185 </assert_contents>
|
|
186 </output>
|
|
187 <output name="output_tsv">
|
|
188 <assert_contents>
|
|
189 <has_text text="9606" />
|
|
190 <has_text text="9598" />
|
0
|
191 </assert_contents>
|
|
192 </output>
|
1
|
193 </test>
|
|
194 <test>
|
|
195 <param name="api" value="pept2taxa"/>
|
|
196 <param name="fmt" value="fasta"/>
|
|
197 <param name="input_fasta" value="peptide.fa"/>
|
|
198 <param name="equate_il" value="True"/>
|
|
199 <param name="extra" value="False"/>
|
|
200 <param name="names" value="False"/>
|
|
201 <param name="outputs" value="tsv"/>
|
|
202 <output name="output_tsv">
|
0
|
203 <assert_contents>
|
1
|
204 <has_text text="sapiens" />
|
|
205 <has_text text="troglodytes" />
|
|
206 <has_text text="Gorilla" />
|
|
207 <has_text text="Macaca" />
|
0
|
208 </assert_contents>
|
|
209 </output>
|
|
210 </test>
|
|
211 </tests>
|
|
212 <help><![CDATA[
|
|
213 **Unipept**
|
|
214
|
|
215 Retrieve Uniprot and taxanomic information for trypic peptides.
|
1
|
216
|
|
217 Unipept API documentation - http://unipept.ugent.be/apidocs
|
0
|
218
|
1
|
219 **Input**
|
|
220
|
|
221 Input peptides can be retrieved from tabular, fasta, mzid, or pepxml datasets.
|
|
222
|
|
223 Processing deatils::
|
|
224
|
|
225 The input peptides are split into typtic peptide fragments in order to match the Unipept records.
|
|
226 Only fragments that are complete tryptic peptides between 5 and 50 animo acid in length will be matched by Unipept.
|
|
227 The match to the most specific tryptic fragment is reported.
|
|
228
|
|
229
|
|
230 **Unipept APIs**
|
|
231
|
|
232 **pept2prot** - http://unipept.ugent.be/apidocs/pept2prot
|
|
233
|
0
|
234 Returns the list of UniProt entries containing a given tryptic peptide. This is the same information as provided on the Protein matches tab when performing a search with the Tryptic Peptide Analysis in the web interface.
|
|
235
|
|
236 By default, each object contains the following information fields extracted from the UniProt record::
|
|
237
|
|
238 peptide: the peptide that matched this record
|
|
239 uniprot_id: the UniProt accession number of the matching record
|
|
240 taxon_id: the NCBI taxon id of the organism associated with the matching record
|
|
241
|
|
242 When the extra parameter is set to true, objects contain the following additional fields extracted from the UniProt record::
|
|
243
|
|
244 taxon_name: the name of the organism associated with the matching UniProt record
|
|
245 ec_references: a space separated list of associated EC numbers
|
|
246 go_references: a space separated list of associated GO terms
|
|
247 refseq_ids: a space separated list of associated RefSeq accession numbers
|
|
248 refseq_protein_ids: a space separated list of associated RefSeq protein accession numbers
|
|
249 insdc_ids: a space separated list of associated insdc accession numbers
|
|
250 insdc_protein_ids: a space separated list of associated insdc protein accession numbers
|
|
251
|
|
252
|
1
|
253 **pept2taxa** - http://unipept.ugent.be/apidocs/pept2taxa
|
|
254
|
0
|
255 Returns the set of organisms associated with the UniProt entries containing a given tryptic peptide. This is the same information as provided on the Lineage table tab when performing a search with the Tryptic Peptide Analysis in the web interface.
|
|
256
|
|
257 By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy::
|
|
258
|
|
259 peptide: the peptide that matched this record
|
|
260 taxon_id: the NCBI taxon id of the organism associated with the matching record
|
|
261 taxon_name: the name of the organism associated with the matching record
|
|
262 taxon_rank: the taxonomic rank of the organism associated with the matching record
|
|
263
|
|
264 When the extra parameter is set to true, objects contain additional information about the lineages of the organism extracted from the NCBI taxonomy. The taxon id of each rank in the lineage is specified using the following information fields::
|
|
265
|
|
266 superkingdom_id
|
|
267 kingdom_id
|
|
268 subkingdom_id
|
|
269 superphylum_id
|
|
270 phylum_id
|
|
271 subphylum_id
|
|
272 superclass_id
|
|
273 class_id
|
|
274 subclass_id
|
|
275 infraclass_id
|
|
276 superorder_id
|
|
277 order_id
|
|
278 suborder_id
|
|
279 infraorder_id
|
|
280 parvorder_id
|
|
281 superfamily_id
|
|
282 family_id
|
|
283 subfamily_id
|
|
284 tribe_id
|
|
285 subtribe_id
|
|
286 genus_id
|
|
287 subgenus_id
|
|
288 species_group_id
|
|
289 species_subgroup_id
|
|
290 species_id
|
|
291 subspecies_id
|
|
292 varietas_id
|
|
293 forma_id
|
|
294
|
|
295
|
1
|
296 **pept2lca** - http://unipept.ugent.be/apidocs/pept2lca
|
|
297
|
0
|
298 Returns the taxonomic lowest common ancestor for a given tryptic peptide. This is the same information as provided when performing a search with the Tryptic Peptide Analysis in the web interface.
|
|
299
|
|
300 By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy::
|
|
301
|
|
302 peptide: the peptide that matched this record
|
|
303 taxon_id: the NCBI taxon id of the organism associated with the matching record
|
|
304 taxon_name: the name of the organism associated with the matching record
|
|
305 taxon_rank: the taxonomic rank of the organism associated with the matching record
|
|
306
|
|
307 When the extra parameter is set to true, objects contain additional information about the lineage of the taxonomic lowest common ancestor extracted from the NCBI taxonomy. The taxon id of each rank in the lineage is specified using the following information fields::
|
|
308
|
|
309 superkingdom_id
|
|
310 kingdom_id
|
|
311 subkingdom_id
|
|
312 superphylum_id
|
|
313 phylum_id
|
|
314 subphylum_id
|
|
315 superclass_id
|
|
316 class_id
|
|
317 subclass_id
|
|
318 infraclass_id
|
|
319 superorder_id
|
|
320 order_id
|
|
321 suborder_id
|
|
322 infraorder_id
|
|
323 parvorder_id
|
|
324 superfamily_id
|
|
325 family_id
|
|
326 subfamily_id
|
|
327 tribe_id
|
|
328 subtribe_id
|
|
329 genus_id
|
|
330 subgenus_id
|
|
331 species_group_id
|
|
332 species_subgroup_id
|
|
333 species_id
|
|
334 subspecies_id
|
|
335 varietas_id
|
|
336 forma_id
|
|
337
|
|
338
|
|
339 **Attributions**
|
|
340
|
|
341 The Unipept metaproteomics analysis pipeline
|
|
342 Bart Mesuere1,*, Griet Debyser2, Maarten Aerts3, Bart Devreese2, Peter Vandamme3 andPeter Dawyndt1
|
|
343 Article first published online: 11 FEB 2015
|
|
344 DOI: 10.1002/pmic.201400361
|
|
345 http://onlinelibrary.wiley.com/doi/10.1002/pmic.201400361/abstract;jsessionid=BFF1994E4C14DA73D7C907EB208AD710.f04t04
|
|
346
|
|
347 ]]></help>
|
|
348 <citations>
|
|
349 <citation type="doi">doi:10.1002/pmic.201400361</citation>
|
|
350 </citations>
|
|
351
|
|
352 </tool>
|