comparison pepquery2.xml @ 0:a07976bbc4d9 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pepquery2 commit 00e1b1770d0b1f9fe31384b37b55f7ae9d97b597
author galaxyp
date Sun, 06 Nov 2022 16:25:25 +0000
parents
children b5489f81c2fa
comparison
equal deleted inserted replaced
-1:000000000000 0:a07976bbc4d9
1 <tool id="pepquery2" name="PepQuery2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
2 <description>Peptide-centric search engine for novel peptide identification and validation.</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <requirements>
7 <requirement type="package" version="@TOOL_VERSION@">pepquery</requirement>
8 </requirements>
9 <stdio>
10 <exit_code range="1:" level="fatal" description="Failed" />
11 <regex match="Exception"
12 source="stderr"
13 level="fatal"
14 description="java Exception" />
15 </stdio>
16 <command><![CDATA[
17 @CMD_IMPORTS@
18 #if $req_inputs.db_type.db_type_selector == 'history'
19 #set $db_file = $re.sub('\s','_',$re.sub('[.][^.]*$','',$req_inputs.db_type.db_file.display_name.split('/')[-1])) + ".fa"
20 ln -s '$req_inputs.db_type.db_file' '$db_file' &&
21 #end if
22 #if $req_inputs.ms_dataset.ms_dataset_type == 'history'
23 @INDEX_SPECTRUM_FILES@
24 $index_spectrum_files($ms_index.files_path, $req_inputs.ms_dataset.spectrum_files) &&
25 #elif $req_inputs.ms_dataset.ms_dataset_type == 'indexed'
26 #if $os.path.exists($os.path.join($req_inputs.ms_dataset.index.extra_files_path, 'summary.txt'))
27 #set $index_dir = 'index_dir'
28 ln -s '$req_inputs.ms_dataset.index.extra_files_path' index_dir &&
29 #else
30 #raise ValueError
31 #end if
32 #end if
33 ## PepQuery command
34 pepquery
35 -Xmx\$[ \${GALAXY_MEMORY_MB:-8192} / 1024 ]g
36 #if $validation.task_type == "known"
37 -s 2 $validation.decoy
38 #else
39 -s 1
40 #end if
41 #if $req_inputs.ms_dataset.ms_dataset_type in ['history', 'indexed']
42 -ms '$index_dir'
43 #elif $req_inputs.ms_dataset.ms_dataset_type in ['PepQueryDB', 'public']
44 -b '$req_inputs.ms_dataset.dataset'
45 #end if
46
47 #if $req_inputs.db_type.db_type_selector == 'history'
48 -db '$db_file'
49 #else
50 -db '$req_inputs.db_type.db_id'
51 #end if
52 #if $req_inputs.input_type.input_type_selector == 'peptide'
53 -t $req_inputs.input_type.input_type_selector
54 -i '$req_inputs.input_type.multiple.input'
55 #else
56 -t $req_inputs.input_type.input_type_selector
57 #if $req_inputs.input_type.input_type_selector == 'protein'
58 #if $req_inputs.input_type.multiple.protein_input_selector == 'identifier'
59 #set $prot_id = str($req_inputs.input_type.multiple.input).replace('|','\|')
60 -i '"$prot_id"'
61 #else
62 -i '$req_inputs.input_type.multiple.input'
63 #end if
64 #else
65 -i '$req_inputs.input_type.input'
66 #if $req_inputs.input_type.input_type_selector == 'DNA'
67 #if $req_inputs.input_type.frame == 'None'
68 -frame '0'
69 #else
70 -frame '$req_inputs.input_type.frame'
71 #end if
72 #else
73 -anno '$req_inputs.input_type.anno'
74 #end if
75 #end if
76 #end if
77 #if $req_inputs.indexType
78 -indexType $req_inputs.indexType
79 #end if
80
81 #if $modifications.fixed_mod
82 -fixMod '$modifications.fixed_mod'
83 #end if
84 #if $modifications.var_mod
85 -varMod '$modifications.var_mod'
86 #end if
87 #if $digestion.enzyme
88 -e '$digestion.enzyme'
89 #end if
90 #if $digestion.max_missed_cleavages
91 -c '$digestion.max_missed_cleavages'
92 #end if
93
94 #if $modifications.max_mods
95 -maxVar '$modifications.max_mods'
96 #end if
97 $modifications.unmodified
98 $modifications.aa
99 #if $ms_params.tolerance_params.precursor_tolerance
100 -tol '$ms_params.tolerance_params.precursor_tolerance'
101 #end if
102
103 #if $ms_params.tolerance_params.precursor_unit
104 -tolu '$ms_params.tolerance_params.precursor_unit'
105 #end if
106 #if $ms_params.tolerance_params.tolerance
107 -itol '$ms_params.tolerance_params.tolerance'
108 #end if
109 #if $ms_params.search.frag_method
110 -fragmentMethod '$ms_params.search.frag_method'
111 #end if
112 #if $ms_params.search.scoring_method
113 -m '$ms_params.search.scoring_method'
114 #end if
115 $ms_params.search.extra_score_validation
116 #if $ms_params.search.max_charge
117 -maxCharge '$ms_params.search.max_charge'
118 #end if
119 #if $ms_params.search.min_charge
120 -minCharge '$ms_params.search.min_charge'
121 #end if
122 #if $ms_params.search.min_peaks
123 -minPeaks '$ms_params.search.min_peaks'
124 #end if
125 #if $ms_params.search.isotope_error
126 -ti '$ms_params.search.isotope_error'
127 #end if
128 #if $ms_params.search.min_score
129 -minScore '$ms_params.search.min_score'
130 #end if
131 #if $ms_params.search.min_length
132 -minLength '$ms_params.search.min_length'
133 #end if
134 #if $ms_params.search.max_length
135 -maxLength '$ms_params.search.max_length'
136 #end if
137 #if $ms_params.search.num_random_peptides
138 -n '$ms_params.search.num_random_peptides'
139 #end if
140 #if 'psm_annotation.txt' in $outputs_selected
141 -plot
142 #end if
143 $fast
144 -o pepquery_output
145 | tee >(sed "s/\x1b[^m]*m//g" > log.txt)
146 #set $flist = str($outputs_selected).replace(',',' ').replace('ms_index','')
147 && for i in $flist; do for f in `find pepquery_output/*/* -name \$i`; do cat \$f >> pepquery_output/\${i}; done; done
148 && for f in `find pepquery_output/*/ -name parameter.txt`; do cp \$f pepquery_output/parameter.txt; done
149 ]]>
150 </command>
151 <inputs>
152 <conditional name="validation">
153 <param name="task_type" argument="-s" type="select" label="Validation Task Type">
154 <option value="novel" selected="true">novel peptide/protein validation</option>
155 <option value="known">known peptide/protein validation</option>
156 </param>
157 <when value="novel"/>
158 <when value="known">
159 <param name="decoy" argument="-decoy" type="boolean" truevalue="-decoy" falsevalue="" checked="false" label="Identity the decoy version of the selected target protein"/>
160 </when>
161 </conditional>
162 <section name="req_inputs" title="Input Data" expanded="true">
163 <conditional name="input_type">
164 <param name="input_type_selector" argument="-t" type="select" label="Input Type" help="" >
165 <option value="peptide">peptide</option>
166 <option value="protein">protein</option>
167 <option value="DNA">DNA (translate to protein sequences)</option>
168 <!-- VCF,BED,GTF input options have not been implemented in this tool -->
169 </param>
170 <when value="peptide">
171 <conditional name="multiple">
172 <param name="peptide_input_selector" type="select" label="Peptides?">
173 <option value="multiple">Peptide list from your history</option>
174 <option value="single">Single peptide entered as text</option>
175 </param>
176 <when value="multiple">
177 <param name="input" argument="-i" type="data" format="tabular" label="Peptide Sequences (.txt)">
178 <help>Peptide sequence file containing peptides which you want to search (no column headers).
179 First column is am peptide sequence. Optional second column is spectrum title.
180 </help>
181 </param>
182 </when>
183 <when value="single">
184 <param name="input" argument="-i" type="text" label="Peptide Sequence" help="Peptide sequence(s) which you want to search">
185 <validator type="regex" message="Must be AA letters, multiple peptides separated by commas">^[AC-IK-NP-TV-Yac-ik-np-tv-y]+(,[AC-IK-NP-TV-Yac-ik-np-tv-y]+)*$</validator>
186 </param>
187 </when>
188 </conditional>
189 </when>
190 <when value="protein">
191 <conditional name="multiple">
192 <param name="protein_input_selector" type="select" label="Proteins?">
193 <option value="multiple">Protein fasta from your history</option>
194 <option value="single">Single protein entered as text</option>
195 <option value="identifier">Protein Identifier from selected Protein Reference Database</option>
196 </param>
197 <when value="multiple">
198 <param name="input" argument="-i" type="data" format="fasta" label="Protein Sequences (.txt)" help="Protein fasta file containing proteins which you want to search." />
199 </when>
200 <when value="single">
201 <param name="input" argument="-i" type="text" label="Protein Sequence" help="Protein sequence which you want to search">
202 <validator type="regex" message="Must be AA letters">^[AC-IK-NP-TV-Yac-ik-np-tv-y]+$</validator>
203 </param>
204 </when>
205 <when value="identifier">
206 <param name="input" argument="-i" type="text" label="Protein Identifier" help="Protein ID from the selected Protein Reference Database. E.g. sp|P07205|PGK2_HUMAN from swissprot:human">
207 <sanitizer invalid_char="">
208 <valid initial="string.ascii_letters,string.digits">
209 <add value="|" />
210 </valid>
211 </sanitizer>
212 <validator type="regex" message="Spaces not allowed in ID">^[^ ]+$</validator>
213 </param>
214 </when>
215 </conditional>
216 </when>
217 <when value="DNA">
218 <param name="input" argument="-i" type="text" label="DNA Sequence (at least 60 bp)" help="DNA sequence which you want to search">
219 <validator type="regex" message="Must be at least 60bp">^[acgtuAGCTU]{60}[acgtuAGCTU]*$</validator>
220 </param>
221 <param name="frame" argument="-f" type="select" label="Frame(s) for DNA translation" multiple="true" help="The frame(s) to translate DNA sequence to protein. Selecting nothing (default) keeps the longest frame">
222 <option value="1">1</option>
223 <option value="2">2</option>
224 <option value="3">3</option>
225 <option value="4">4</option>
226 <option value="5">5</option>
227 <option value="6">6</option>
228 </param>
229 </when>
230 </conditional>
231 <conditional name="db_type">
232 <param name="db_type_selector" type="select" label="Protein Reference Database from" help="" >
233 <option value="history">history</option>
234 <option value="download">download</option>
235 </param>
236 <when value="history">
237 <param name="db_file" argument="-db" type="data" format="fasta" label="Protein Reference Database File" help="an input sequence that matches a reference will be ignored." />
238 </when>
239 <when value="download">
240 <param name="db_id" type="text" value="" label="Public protein sequence database">
241 <help>Currently supported dowloads: gencode:human, swissprot:human, refseq:human</help>
242 <option value="gencode:human">gencode:human</option>
243 <option value="swissprot:human">swissprot:human</option>
244 <option value="refseq:human">refseq:human</option>
245 <validator type="regex" message="">^(swissprot|refseq|gencode):(human)$</validator>
246 </param>
247 </when>
248 </conditional>
249 <conditional name="ms_dataset">
250 <param name="ms_dataset_type" type="select" label="MS/MS dataset to search" help="" >
251 <option value="history"> Spectrum Datasets from history</option>
252 <option value="indexed">Indexed MS/MS spectrums</option>
253 <option value="PepQueryDB">PepQueryDB</option>
254 <option value="public">public proteomics data repositories</option>
255 </param>
256 <when value="history">
257 <param name="spectrum_files" argument="-ms" type="data" format="mgf,mzml,mzxml,thermo.raw" label="Spectrum File" help="Spectrum file used for identification, formats: MGF,mzML,mzXML,Thermo RAW" />
258 </when>
259 <when value="indexed">
260 <param name="index" argument="-ms" type="data" format="txt" label="PepQuery Index" help="" />
261 </when>
262 <when value="PepQueryDB">
263 <param name="dataset" argument="-b" type="text" value="" label="PepQueryDB dataset">
264 <help>PepQueryDB dataset IDs (separated by commas).</help>
265 <expand macro="pepquerydb_options" />
266 <validator type="regex" message="PepQueryDB dataset_name(,dataset_name)">^[a-zA-Z][^,]*(,[a-zA-Z][^,]*)*$</validator>
267 </param>
268 </when>
269 <when value="public">
270 <param name="dataset" type="text" value="" label="Public dataset">
271 <validator type="regex" message="An identifier strating with PXD or MSV or JPST">^(PXD|MSV|JPST).*$</validator>
272 </param>
273 </when>
274 </conditional>
275
276 <param name="indexType" argument="-indexType" type="select" optional="true" label="Report Spectrum Scan as" help="Default: index" >
277 <option value="1">index (1-based) in MGF</option>
278 <option value="2">spectrum title in MGF</option>
279 </param>
280 </section>
281
282 <param name="parameter_set" argument="-p" type="text" value="" optional="true" label="MS/MS searching parameter set name">
283 <help>Currently supported set names start with: MS1 or TMT</help>
284 <option value="MS1_H_MS2_H_LF">MS1_H_MS2_H_LF</option>
285 <option value="MS1_H_MS2_L_LF">MS1_H_MS2_L_LF</option>
286 <option value="TMT10_11">TMT10_11</option>
287 <option value="TMT10_11_MS2_L">TMT10_11_MS2_L</option>
288 <option value="TMT10_11_MS2_L_phosphorylation">TMT10_11_MS2_L_phosphorylation</option>
289 <option value="TMT10_11_acetylation">TMT10_11_acetylation</option>
290 <option value="TMT10_11_glycosylation">TMT10_11_glycosylation</option>
291 <option value="TMT10_11_phosphorylation">TMT10_11_phosphorylation</option>
292 <option value="TMT10_11_ubiquitination">TMT10_11_ubiquitination</option>
293 </param>
294
295 <section name="modifications" title="Modifications" expanded="false">
296 <param name="fixed_mod" argument="-fixMod" type="select" label="Fixed modification(s)" multiple="true" optional="true">
297 <help>Default: 1: Carbamidomethylation of C [57.02146372057]</help>
298 <expand macro="modifications" />
299 </param>
300 <param name="var_mod" argument="-varMod" type="select" label="Variable modification(s)" multiple="true" optional="true">
301 <help>Default: 2: Oxidation of M [15.99491461956]</help>
302 <expand macro="modifications" />
303 </param>
304
305 <param name="max_mods" argument="-maxVar" type="integer" label="Max Modifications" value="" min="0" max="10" optional="true" help="Max number of variable modifications Default: 3" />
306 <param name="unmodified" argument="-hc" type="boolean" truevalue="-hc" falsevalue="" checked="false" label="Use more stringent criterion for unrestricted modification searching" help="TRUE: score(UMS)>=score(targetPSM); FALSE: score(UMS)>score(targetPSM)" />
307 <param name="aa" argument="-aa" type="boolean" truevalue="-aa" falsevalue="" checked="false" label="Consider amino acid substitution modifications?" help="Whether or not to consider aa substitution modifications when perform modification filtering." />
308 </section>
309
310 <section name="digestion" title="Digestion" expanded="false">
311 <param name="enzyme" argument="-e" type="select" optional="true" label="Enzyme" help="Enzyme used for protein digestion. Default: Trypsin" >
312 <option value="0">Non enzyme</option>
313 <option value="1">Trypsin</option>
314 <option value="2">Trypsin (no P rule)</option>
315 <option value="3">Arg-C</option>
316 <option value="4">Arg-C (no P rule)</option>
317 <option value="5">Arg-N</option>
318 <option value="6">Glu-C</option>
319 <option value="7">Lys-C</option>
320 </param>
321 <param name="max_missed_cleavages" argument="-c" type="integer" value="" optional="true" label="Max Missed Cleavages" help="The max missed cleavages" />
322 </section>
323
324 <section name="ms_params" title="Mass spectrometer" expanded="false">
325 <section name="tolerance_params" title="Tolerance" expanded="true">
326 <param name="precursor_tolerance" argument="-tol" type="integer" value="" optional="true" label="Precursor Tolerance" help="The error window on experimental peptide mass values. This parameter is usually set according to the mass spectrometer which was used to generate the MS/MS data. Default: 10" />
327 <param name="precursor_unit" argument="-tolu" type="select" optional="true" label="Precursor Unit" help="The unit of precursor ion m/z tolerance. Default: ppm">
328 <option value="ppm">ppm</option>
329 <option value="Da">Da</option>
330 </param>
331 <param name="tolerance" argument="-itol" type="float" value="" optional="true" label="Tolerance" help="Error window for MS/MS fragment ion mass values in Da unit. Default: 0.6 Da" />
332 </section>
333
334 <section name="search" title="PSM" expanded="false">
335 <param name="frag_method" argument="-fragmentMethod" type="select" optional="true" label="Fragmentation Method" help="Default: CID/HCD">
336 <option value="1">CID/HCD</option>
337 <option value="2">ETD</option>
338 </param>
339 <param name="scoring_method" argument="-m" type="select" optional="true" label="Scoring Method" help="Default: HyperScore">
340 <option value="1">HyperScore</option>
341 <option value="2">MVH</option>
342 </param>
343 <param name="extra_score_validation" argument="-x" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Add extra score validation" help="use two scoring algorithms for peptide identification" />
344 <param name="min_charge" argument="-minCharge" type="integer" value="" optional="true" label="Minimum Charge" help="The minimum charge to consider if the charge state is not available. Default: 2"/>
345 <param name="max_charge" argument="-maxCharge" type="integer" value="" optional="true" label="Maximum Charge" help="The maximum charge to consider if the charge state is not available. Default: 3" />
346 <param name="min_peaks" argument="-minPeaks" type="integer" value="" optional="true" label="Minimum Peaks" help="Min peaks in spectrum. Default: 10" />
347 <param name="isotope_error" argument="-ti" type="text" value="" optional="true" label="Isotope peak error range">
348 <help>A comma-sepated range of integers from -2 to 2, e.g. '-1,0,1,2' Default: 0</help>
349 <validator type="regex" message="">^((-2,)?-1,)?0(,1(,2)?)?$</validator>
350 </param>
351 <param name="min_score" argument="-minScore" type="integer" value="" min="0" optional="true" label="Minimum Score" help="Minimum score to consider for peptide searching. Default: 12" />
352 <param name="min_length" argument="-minLength" type="integer" value="" min="0" optional="true" label="Minimum length of peptide" help="The maximum length of peptide to consider. Default: 7" />
353 <param name="max_length" argument="-maxLength" type="integer" value="" min="0" optional="true" label="Maximum length of peptide" help="The maximum length of peptide to consider. Default: 45" />
354 <param name="num_random_peptides" argument="-n" type="integer" value="" min="0" optional="true" label="Number of Random Peptides" help="The number of random peptides. Default: 1000" />
355 </section>
356 </section>
357
358 <param name="outputs_selected" type="select" multiple="true" optional="false" label="Select outputs">
359 <option value="psm.txt">psm.txt</option>
360 <option value="psm_rank.txt" selected="true">psm_rank.txt</option>
361 <option value="psm_rank.mgf">psm_rank.mgf</option>
362 <option value="psm_annotation.txt">psm_annotation.txt</option>
363 <option value="psm_type.txt">psm_type.txt</option>
364 <option value="detail.txt">detail.txt</option>
365 <option value="ptm.txt">ptm.txt</option>
366 <option value="ptm_detail.txt">ptm_detail.txt</option>
367 <option value="ms_index">MS/MS Index</option>
368 </param>
369 <param name="fast" argument="-fast" type="boolean" truevalue="-fast" falsevalue="" checked="false" label="Use fast mode for searching" help="In fast mode, only one better match from reference peptide-based competitive filtering steps will be returned. A peptide identified or not is not affected by this setting. For most applications, fast mode will speed up the analysis." />
370 </inputs>
371 <outputs>
372 <data name="log_txt" format="txt" from_work_dir="log.txt" label="${tool.name} on ${on_string}: log.txt">
373 </data>
374 <data name="parameter_txt" format="txt" from_work_dir="pepquery_output/parameter.txt" label="${tool.name} on ${on_string}: parameter.txt">
375 <filter>'parameter.txt' in outputs_selected and req_inputs['ms_dataset']['ms_dataset_type'] == 'history'</filter>
376 </data>
377 <data name="ms_index" format="txt" label="${tool.name} on ${on_string}: index summary.txt" from_work_dir="index_dir/summary.txt">
378 <filter>'ms_index' in outputs_selected and req_inputs['ms_dataset']['ms_dataset_type'] == 'history'</filter>
379 </data>
380 <data name="psm_txt" format="tabular" from_work_dir="pepquery_output/psm.txt" label="${tool.name} on ${on_string}: psm.txt">
381 <filter>'psm.txt' in outputs_selected</filter>
382 <actions>
383 <action name="comment_lines" type="metadata" default="1" />
384 <action name="column_names" type="metadata" default="peptide,modification,n,spectrum_title,charge,exp_mass,tol_ppm,tol_da,isotope_error,pep_mass,mz,score,n_db,total_db,n_random,total_random,pvalue" />
385 </actions>
386 </data>
387
388 <data name="psm_rank_txt" format="tabular" from_work_dir="pepquery_output/psm_rank.txt" label="${tool.name} on ${on_string}: psm_rank.txt">
389 <filter>'psm_rank.txt' in outputs_selected</filter>
390 <actions>
391 <action name="comment_lines" type="metadata" default="1" />
392 <action name="column_names" type="metadata" default="peptide,modification,n,spectrum_title,charge,exp_mass,tol_ppm,tol_da,isotope_error,pep_mass,mz,score,n_db,total_db,n_random,total_random,pvalue,rank,n_ptm,confident,ref_delta_score,mod_delta_score" />
393 </actions>
394 </data>
395
396 <data name="psm_rank_mgf" format="mgf" from_work_dir="pepquery_output/psm_rank.mgf" label="${tool.name} on ${on_string}: psm_rank.mgf">
397 <filter>'psm_rank.mgf' in outputs_selected</filter>
398 </data>
399 <data name="psm_type_txt" format="tabular" from_work_dir="pepquery_output/psmi_type.txt" label="${tool.name} on ${on_string}: psm_type.txt">
400 <filter>'psm_type.txt' in outputs_selected</filter>
401 <actions>
402 <action name="comment_lines" type="metadata" default="1" />
403 <action name="column_names" type="metadata" default="peptide,spectrum_title,type" />
404 </actions>
405 </data>
406 <data name="psm_annotation_txt" format="tabular" from_work_dir="pepquery_output/psm_annotation.txt" label="${tool.name} on ${on_string}: psm_annotation.txt">
407 <filter>'psm_annotation.txt' in outputs_selected</filter>
408 <actions>
409 <action name="comment_lines" type="metadata" default="1" />
410 <action name="column_names" type="metadata" default="peptide,modification,Query,calc_mr,observed_mz,charge,pepSeq,m_label,m_mz,m_intensity,mz,intensity" />
411 </actions>
412 </data>
413 <data name="detail_txt" format="tabular" from_work_dir="pepquery_output/detail.txt" label="${tool.name} on ${on_string}: detail.txt">
414 <filter>'detail.txt' in outputs_selected</filter>
415 <actions>
416 <action name="comment_lines" type="metadata" default="1" />
417 <action name="column_names" type="metadata" default="spectrum_title,peptide,modification,exp_mass,pep_mass,tol_ppm,tol_da,isotope_error,score" />
418 </actions>
419 </data>
420
421 <data name="ptm_txt" format="tabular" from_work_dir="pepquery_output/ptm.txt" label="${tool.name} on ${on_string}: ptm.txt">
422 <filter>'ptm.txt' in outputs_selected</filter>
423 <actions>
424 <action name="comment_lines" type="metadata" default="1" />
425 <action name="column_names" type="metadata" default="spectrum_title,peptide,charge,exp_mass,pep_mass,tol_ppm,tol_da,isotope_error,modification,score" />
426 </actions>
427 </data>
428 <data name="ptm_detail_txt" format="tabular" from_work_dir="pepquery_output/ptm_detail.txt" label="${tool.name} on ${on_string}: ptm_detail.txt">
429 <filter>'ptm_detail.txt' in outputs_selected</filter>
430 <actions>
431 <action name="comment_lines" type="metadata" default="1" />
432 <action name="column_names" type="metadata" default="peptide,modification,n,spectrum_title,charge,exp_mass,tol_ppm,tol_da,isotope_error,pep_mass,mz,score,n_db,total_db,n_random,total_random,pvalue,rank,ptm_spectrum_title,ptm_peptide,ptm_charge,ptm_exp_mass,ptm_pep_mass,ptm_tol_ppm,ptm_tol_da,ptm_isotope_error,ptm_modification,ptm_score" />
433 </actions>
434 </data>
435
436 </outputs>
437 <tests>
438 <!-- Test-1 PepQueryDB peptide gencode:human -->
439 <test expect_num_outputs="2">
440 <conditional name="validation">
441 <param name="task_type" value="novel"/>
442 </conditional>
443 <section name="req_inputs">
444 <conditional name="input_type">
445 <param name="input_type_selector" value="peptide"/>
446 <conditional name="multiple">
447 <param name="peptide_input_selector" value="single" />
448 <param name="input" value="LVVVGADGVGK,AHSSMVGVNLPQK"/>
449 </conditional>
450 </conditional>
451 <conditional name="db_type">
452 <param name="db_type_selector" value="download" />
453 <param name="db_id" value="gencode:human"/>
454 </conditional>
455 <conditional name="ms_dataset">
456 <param name="ms_dataset_type" value="PepQueryDB"/>
457 <param name="dataset" value="CPTAC_LUAD_Discovery_Study_Proteome_PDC000153" />
458 </conditional>
459 <param name="indexType" value="1"/>
460 </section>
461 <param name="parameter_set" value=""/>
462 <section name="modifications">
463 <param name="fixed_mod" value="1"/>
464 <param name="var_mod" value="2"/>
465 <param name="max_mods" value="3"/>
466 <param name="unmodified" value="True"/>
467 <param name="aa" value="False"/>
468 </section>
469 <section name="digestion">
470 <param name="enzyme" value="1"/>
471 <param name="max_missed_cleavages" value="2"/>
472 </section>
473 <section name="ms_params">
474 <section name="tolerance_params">
475 <param name="precursor_tolerance" value="10"/>
476 <param name="precursor_unit" value="ppm"/>
477 <param name="tolerance" value="0.6"/>
478 </section>
479 <section name="search">
480 <param name="frag_method" value="1"/>
481 <param name="scoring_method" value="1"/>
482 <param name="extra_score_validation" value="False"/>
483 <param name="min_charge" value="2"/>
484 <param name="max_charge" value="3"/>
485 <param name="min_peaks" value="10"/>
486 <param name="isotope_error" value="0"/>
487 <param name="min_score" value="12"/>
488 <param name="min_length" value="7"/>
489 <param name="max_length" value="45"/>
490 <param name="num_random_peptides" value="1000"/>
491 </section>
492 </section>
493 <output name="psm_rank_txt">
494 <assert_contents>
495 <has_text text="LVVVGADGVGK" />
496 <not_has_text text="AHSSMVGVNLPQK" />
497 <has_text text="02CPTAC_LUAD_W_BI_20180518_KR_f15:25149:2" />
498 <has_n_columns n="22" />
499 </assert_contents>
500 </output>
501 <output name="log_txt">
502 <assert_contents>
503 <has_text text="Ignore peptide (reason: exist in reference database): AHSSMVGVNLPQK" />
504 </assert_contents>
505 </output>
506 </test>
507
508 <!-- Test-2 PepQueryDB peptide gencode:human pep.txt -->
509 <test expect_num_outputs="2">
510 <conditional name="validation">
511 <param name="task_type" value="novel"/>
512 </conditional>
513 <section name="req_inputs">
514 <conditional name="input_type">
515 <param name="input_type_selector" value="peptide"/>
516 <conditional name="multiple">
517 <param name="peptide_input_selector" value="multiple" />
518 <param name="input" ftype="tabular" value="pep.txt"/>
519 </conditional>
520 </conditional>
521 <conditional name="db_type">
522 <param name="db_type_selector" value="download" />
523 <param name="db_id" value="gencode:human"/>
524 </conditional>
525 <conditional name="ms_dataset">
526 <param name="ms_dataset_type" value="PepQueryDB"/>
527 <param name="dataset" value="CPTAC_LUAD_Discovery_Study_Proteome_PDC000153" />
528 </conditional>
529 <param name="indexType" value="1"/>
530 </section>
531 <param name="parameter_set" value=""/>
532 <section name="modifications">
533 <param name="fixed_mod" value="1"/>
534 <param name="var_mod" value="2"/>
535 <param name="max_mods" value="3"/>
536 <param name="unmodified" value="True"/>
537 <param name="aa" value="False"/>
538 </section>
539 <section name="digestion">
540 <param name="enzyme" value="1"/>
541 <param name="max_missed_cleavages" value="2"/>
542 </section>
543 <section name="ms_params">
544 <section name="tolerance_params">
545 <param name="precursor_tolerance" value="10"/>
546 <param name="precursor_unit" value="ppm"/>
547 <param name="tolerance" value="0.6"/>
548 </section>
549 <section name="search">
550 <param name="frag_method" value="1"/>
551 <param name="scoring_method" value="1"/>
552 <param name="extra_score_validation" value="False"/>
553 <param name="min_charge" value="2"/>
554 <param name="max_charge" value="3"/>
555 <param name="min_peaks" value="10"/>
556 <param name="isotope_error" value="0"/>
557 <param name="min_score" value="12"/>
558 <param name="min_length" value="7"/>
559 <param name="max_length" value="45"/>
560 <param name="num_random_peptides" value="1000"/>
561 </section>
562 </section>
563 <output name="psm_rank_txt">
564 <assert_contents>
565 <has_text text="LVVVGADGVGK" />
566 <not_has_text text="AHSSMVGVNLPQK" />
567 <has_text text="02CPTAC_LUAD_W_BI_20180518_KR_f15:25149:2" />
568 <has_n_columns n="22" />
569 </assert_contents>
570 </output>
571 <output name="log_txt">
572 <assert_contents>
573 <has_text text="Ignore peptide (reason: exist in reference database): AHSSMVGVNLPQK" />
574 </assert_contents>
575 </output>
576 </test>
577
578 <!-- Test-3 MGF peptide Uniprot.fasta -->
579 <test expect_num_outputs="2">
580 <conditional name="validation">
581 <param name="task_type" value="novel"/>
582 </conditional>
583 <section name="req_inputs">
584 <conditional name="input_type">
585 <param name="input_type_selector" value="peptide"/>
586 <conditional name="multiple">
587 <param name="peptide_input_selector" value="single" />
588 <param name="input" value="ELGSSDLTAR"/>
589 </conditional>
590 </conditional>
591 <conditional name="db_type">
592 <param name="db_type_selector" value="history" />
593 <param name="db_file" ftype="fasta" value="Uniprot.fasta"/>
594 </conditional>
595 <conditional name="ms_dataset">
596 <param name="ms_dataset_type" value="history"/>
597 <param name="spectrum_files" ftype="mgf" value="iTRAQ_f4.mgf"/>
598 </conditional>
599 <param name="indexType" value="1"/>
600 </section>
601 <param name="parameter_set" value=""/>
602 <section name="modifications">
603 <!-- 21: iTRAQ 4-plex of K [144.1020624208] -->
604 <!-- 22: iTRAQ 4-plex of peptide N-term [144.1020624208] -->
605 <param name="fixed_mod" value="1,21,22"/>
606 <!-- 2: Oxidation of M [15.99491461956] -->
607 <param name="var_mod" value="2"/>
608 <param name="max_mods" value="3"/>
609 <param name="unmodified" value="True"/>
610 <param name="aa" value="False"/>
611 </section>
612 <section name="digestion">
613 <param name="enzyme" value="1"/>
614 <param name="max_missed_cleavages" value="2"/>
615 </section>
616 <section name="ms_params">
617 <section name="tolerance_params">
618 <param name="precursor_tolerance" value="10"/>
619 <param name="precursor_unit" value="ppm"/>
620 <param name="tolerance" value="0.6"/>
621 </section>
622 <section name="search">
623 <param name="frag_method" value="1"/>
624 <param name="scoring_method" value="1"/>
625 <param name="extra_score_validation" value="False"/>
626 <param name="min_charge" value="2"/>
627 <param name="max_charge" value="3"/>
628 <param name="min_peaks" value="10"/>
629 <param name="isotope_error" value="0"/>
630 <param name="min_score" value="12"/>
631 <param name="min_length" value="7"/>
632 <param name="max_length" value="45"/>
633 <param name="num_random_peptides" value="1000"/>
634 </section>
635 </section>
636 <output name="psm_rank_txt">
637 <assert_contents>
638 <has_text text="ELGSSDLTAR" />
639 <has_line_matching expression="ELGSSDLTAR\tiTRAQ 4-plex of peptide N-term@0\[144.1\d+\]\t2\tiTRAQ_f4:3:2\t2\t1191.62\d+\t-3.04\d+\t-0.003\d+\t0.0\t1191.6\d+\t596.8\d+\t24.1\d+\t0\t0\t1\t995\t0.002\d+\t1\t0\tYes\t24.1\d+\t24.1\d+"/>
640 <has_n_columns n="22" />
641 </assert_contents>
642 </output>
643 </test>
644
645 <!-- Test-4 PepQueryDB known peptide gencode:human pep.txt -->
646 <test expect_num_outputs="2">
647 <conditional name="validation">
648 <param name="task_type" value="known"/>
649 </conditional>
650 <section name="req_inputs">
651 <conditional name="input_type">
652 <param name="input_type_selector" value="peptide"/>
653 <conditional name="multiple">
654 <param name="peptide_input_selector" value="single" />
655 <param name="input" value="AHSSMVGVNLPQK"/>
656 </conditional>
657 </conditional>
658 <conditional name="db_type">
659 <param name="db_type_selector" value="download" />
660 <param name="db_id" value="gencode:human"/>
661 </conditional>
662 <conditional name="ms_dataset">
663 <param name="ms_dataset_type" value="PepQueryDB"/>
664 <param name="dataset" value="CPTAC_LUAD_Discovery_Study_Proteome_PDC000153" />
665 </conditional>
666 <param name="indexType" value="1"/>
667 </section>
668 <section name="modifications">
669 <param name="unmodified" value="True"/>
670 </section>
671 <output name="psm_rank_txt">
672 <assert_contents>
673 <has_text text="AHSSMVGVNLPQK" />
674 <has_text text="6CPTAC_LUAD_W_BI_20180718_KL_f12:20286:3" />
675 <has_n_columns n="22" />
676 </assert_contents>
677 </output>
678 </test>
679
680 </tests>
681 <help><![CDATA[
682 **PepQuery2**
683
684 PepQuery_ is a universal targeted peptide search engine for identifying or validating known and novel peptides of interest in any local or publicly available mass spectrometry-based proteomics datasets.
685
686
687 PepQuery_ is a peptide-centric search engine for novel peptide identification and validation. Cancer genomics studies have identified a large number of genomic alterations that may lead to novel, cancer-specific protein sequences. Proteins resulted from these genomic alterations are attractive candidates for cancer biomarkers and therapeutic targets. The leading approach to proteomic validation of genomic alterations is to analyze tandem mass spectrometry (MS/MS) data using customized proteomics databases created from genomics data. Such analysis is time-consuming and requires thorough training and detailed knowledge in proteomics data analysis, leading to a gap between MS/MS data and the cancer genomics community. PepQuery does not require customized databases and allows quick and easy proteomic validation of genomic alterations.
688
689 PepQuery2 leverages a new MS/MS indexing approach and cloud storage to enable ultrafast, targeted identification of both novel and known peptides. PepQuery2 allows users to search more than one billion MS/MS data indexed in the PepQueryDB from any computers with internet access. It also supports direct analysis of user provided MS/MS data, any public datasets in PRIDE, MassIVE, jPOSTrepo and iProX, or Universal Spectrum Identifiers (USIs) from ProteomeXchange.
690
691 **Inputs**
692 - A sequence to match, one of the following:
693
694 - A peptide string (or strings separated by commas)
695 - A history dataset with a list of peptides
696 - A protein string or a history dataset with a protein fasta
697 - A DNA string that is at least 60 base pairs in length
698
699
700 - MS/MS data used for identification, one of the following:
701
702 - Mass Spectrometry history datasets in MGF, mzML, or mzXML format
703 - An Indexed MS/MS dataset (from previous PepQuery2 run or from **PepQuery2 index** tool.)
704 - PepQueryDB dataset IDs
705
706 ..
707
708 Multiple datasets from PepQueryDB must be separated by comma. A pattern to match datasets in PepQueryDB is also supported, for example, use 'CPTAC' to search all datasets contain 'CPTAC'. In addition, dataset selection from PepQueryDB based on data type (w:global proteome, p:phosphorylation, a:acetylation, u:ubiquitination, g:glycosylation) is also supported. For example, use 'p' to search all phosphoproteomics datasets in PepQueryDB. The **PepQuery2 Show Sets** tool will list available PepQueryDB datasets.
709
710
711 - Dataset IDs from public proteomics data repositories: PRIDE, MassIVE, jPOSTrepo and iProX
712
713 ..
714
715 Dataset ID from public proteomics data repositories, one dataset is supported for each analysis. For example, use 'PXD000529' to use all MS/MS data from dataset PXD000529 or use 'PXD000529:LM3' to use data files containing LM3 from dataset PXD000529
716
717 - A reference protein fasta database, novel peptides matching a reference sequence will be excluded.
718
719 - A protein fasta file
720 - The ID for a public reference protein database from RefSeq, GENCODE, Ensembl or UniProt.
721
722
723 **Options**
724
725 - MS/MS searching parameter set name
726
727 ..
728
729 Setting a *parameter set name* will change defaults for various options, These may be overridden by manually setting the option.
730 The **PepQuery2 Show Sets** tool *PepQuery Predefined Parameter Sets* will list those available along with the option values that will be set.
731 The **PepQuery2 Show Sets** tool *PepQuery Datasets* column *parameter_set* column for each PepQueryDB dataset.
732
733
734 - Override default options
735
736 ..
737
738 Values for modifications are provided in a select list.
739 The **PepQuery2 Show Sets** tool *PepQuery Modifications* lists all available modifications.
740
741 **Outputs**
742 - Log.txt:
743
744 - Logging output from PepQuery2
745 - When searching for *novel* peptides ignored peptide have a log message similar to:
746
747 - Ignore peptide (reason: exist in reference database): *PEPTIDE*
748
749 - When searching for *known* proteins, ignored protein have a log message similar to:
750
751 - Target protein is not present in database *DATABASE_NAME*: *PROTEIN_NAME*, ignored!
752
753 - Parameters:
754
755 - parameters used in the search
756
757 - PSM - tabular with columns:
758
759 - peptide modification n spectrum_title charge exp_mass ppm pep_mass mz score n_db total_db n_random total_random pvalue
760
761 - PSM Rank - tabular with columns:
762
763 - peptide mo/dification n spectrum_title charge exp_mass ppm pep_mass mz score n_db total_db n_random total_random pvalue rank n_ptm
764
765 - An MGF with the best matching spectrums
766
767 - Detail - tabular with columns:
768
769 - spectrum_title peptide modification pep_mass score
770
771 - PSM annotation - tabular with columns:
772
773 - peptide Query calc_mr observed_mz charge pepSeq m_label m_mz m_intensity mz intensity
774
775 - PTM - tabular with columns:
776
777 - spectrum_title peptide charge exp_mass pep_mass tol_ppm tol_da isotope_error modification score
778
779 - PTM Detail - tabular with columns:
780
781 - peptide modification n spectrum_title charge exp_mass tol_ppm tol_da isotope_error pep_mass mz score n_db total_db n_random total_random pvalue rank ptm_spectrum_title ptm_peptide ptm_charge ptm_exp_mass ptm_pep_mass ptm_tol_ppm ptm_tol_da ptm_isotope_error ptm_modification ptm_score
782
783 - An Indexed MS/MS dataset *when MS/MS data is MGF, mzML, or mzXML history datasets*
784
785
786 .. _PepQuery: http://pepquery.org/document.html
787
788 ]]></help>
789 <expand macro="citations" />
790 </tool>