comparison moff.xml @ 0:b4098353ee73 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/moFF commit bc0fad49e3ba73fa5b5b326e940adf9e11854d94
author galaxyp
date Fri, 05 Jan 2018 12:47:36 -0500
parents
children 8f0e76ad46ef
comparison
equal deleted inserted replaced
-1:000000000000 0:b4098353ee73
1 <tool id="proteomics_moff" name="moFF" version="@VERSION@">
2 <description>extracts MS1 intensities from spectrum files</description>
3 <macros>
4 <token name="@VERSION@">1.2</token>
5 <!-- xml macros, used for shared Galaxy parameter inputs -->
6 <xml name="ident_input_macro" token_allow_multiple="true" token_input_type="data">
7 <!-- this is exactly the same across all three, except for allowing multiple in MBR and all but not in moff -->
8 <conditional name="ident_input">
9 <param name="input_type_selector" type="select" label="Choose the format for the identification file:">
10 <option value="ps">Peptide Shaker PSM report (standard, not extended)</option>
11 <option value="generic">Another tabular identification file</option>
12 </param>
13 <when value="ps">
14 <param name="ident_input_file" type="@INPUT_TYPE@" format="tabular" label="Peptide Shaker PSM report" multiple="@ALLOW_MULTIPLE@"/>
15 </when>
16 <when value="generic">
17 <param name="ident_input_file" type="@INPUT_TYPE@" format="tabular" label="A general tabular format" multiple="@ALLOW_MULTIPLE@"
18 help="Must have specific columns; see below to select these columns from your file. The file should have at most one header line. "/>
19 <param name="remove_header" type="boolean" value="false" label="Remove the header line?" help="This is necessary if the file has a line with column headers"/>
20 <param name="peptide"
21 type="data_column"
22 data_ref="ident_input_file"
23 label="Column with peptide-spectrum-match sequence"/>
24 <param name="prot"
25 type="data_column"
26 data_ref="ident_input_file"
27 label="Column with protein ID"/>
28 <param name="mod_peptide"
29 type="data_column"
30 data_ref="ident_input_file"
31 label="Column with peptide-spectrum-match sequence that contains possible modifications"/>
32 <param name="rt"
33 type="data_column"
34 data_ref="ident_input_file"
35 label="Column with PSM retention time (in second)"/>
36 <param name="mz"
37 type="data_column"
38 data_ref="ident_input_file"
39 label="Column with m/z (mass over charge)"/>
40 <param name="mass"
41 type="data_column"
42 data_ref="ident_input_file"
43 label="Column with mass of the peptide"/>
44 <param name="charge"
45 type="data_column"
46 data_ref="ident_input_file"
47 label="Column with charge of ionized peptide"/>
48 </when>
49 </conditional>
50 </xml>
51 <xml name="raw_input_macro" token_allow_multiple="true" token_input_type="data">
52 <conditional name="msms_input">
53 <param name="input_type_selector" type="select" label="Choose the format for the MS/MS file">
54 <option value="raw">Thermo RAW file</option>
55 <option value="mzml">mzML</option>
56 </param>
57 <when value="raw">
58 <param argument="--inputraw" type="@INPUT_TYPE@" multiple="@ALLOW_MULTIPLE@" format="raw" label="RAW file(s)"/>
59 </when>
60 <when value="mzml">
61 <param argument="--inputraw" type="@INPUT_TYPE@" multiple="@ALLOW_MULTIPLE@" format="mzml" label="mzML file(s)"/>
62 </when>
63 </conditional>
64 </xml>
65 <!-- tokens (code snippets used in <command>) -->
66 <token name="@IDENT_INPUT_ARG_MULTIPLE@"><![CDATA[
67 ## this is where the ident input gets passed to moff/moff_all/moff_mbr
68 --inputtsv
69 #for $key in $task.ident_input.ident_input_file.keys():
70 './ident_inputs/${task.ident_input.ident_input_file[$key].display_name}'
71 #end for
72 ]]></token>
73 <token name="@IDENT_INPUT_ARG_SINGLE@"><![CDATA[
74 ## this is where the ident input gets passed to moff/moff_all/moff_mbr
75 --inputtsv './ident_inputs/${task.ident_input.ident_input_file.display_name}'
76 ]]></token>
77 <token name="@WRANGLE_IDENT_INPUT_SINGLE@"><![CDATA[
78 mkdir ./ident_inputs &&
79 #if $task.ident_input.input_type_selector == "ps":
80 ln -s '$task.ident_input.ident_input_file' './ident_inputs/$task.ident_input.ident_input_file.display_name' &&
81 #else
82 ## optionally remove first line
83 #if $task.ident_input.remove_header:
84 sed -i '1d' '$task.ident_input.ident_input_file' &&
85 #end if
86 ## header row with correct names: "peptide", "prot", "mod_peptide", "rt", "mz", "mass", and "charge"
87 echo -e "peptide\tprot\tmod_peptide\trt\tmz\tmass\tcharge" > tempfile.tab &&
88 awk 'BEGIN{OFS="\t"; FS="\t"}{print \$pep,\$prot,\$mod,\$rt,\$mz,\$mass,\$charge}' pep="${task.ident_input.peptide}" prot="$task.ident_input.prot" mod="$task.ident_input.mod_peptide" rt="$task.ident_input.rt" mz="$task.ident_input.mz" mass="$task.ident_input.mass" charge="$task.ident_input.charge" '$task.ident_input.ident_input_file' >> tempfile.tab &&
89 mv tempfile.tab '$task.ident_input.ident_input_file' &&
90 ln -s '$task.ident_input.ident_input_file' './ident_inputs/$task.ident_input.ident_input_file.display_name' &&
91 #end if
92 ]]></token>
93 <token name="@WRANGLE_IDENT_INPUT_MULTIPLE@"><![CDATA[
94 mkdir ./ident_inputs &&
95 #if $task.ident_input.input_type_selector == "ps":
96 #for $key in $task.ident_input.ident_input_file.keys():
97 ln -s '${task.ident_input.ident_input_file[$key]}' './ident_inputs/${task.ident_input.ident_input_file[$key].display_name}' &&
98 #end for
99 #else
100 #for $key in $task.ident_input.ident_input_file.keys():
101 ## optionally remove first line
102 #if $task.ident_input.remove_header:
103 sed -i '1d' '$task.ident_input.ident_input_file[$key]' &&
104 #end if
105 ## header row with correct names: "peptide", "prot", "mod_peptide", "rt", "mz", "mass", and "charge"
106 echo -e "peptide\tprot\tmod_peptide\trt\tmz\tmass\tcharge" > tempfile.tab &&
107 awk 'BEGIN{OFS="\t"; FS="\t"}{print \$pep,\$prot,\$mod,\$rt,\$mz,\$mass,\$charge}' pep="${task.ident_input.peptide}" prot="$task.ident_input.prot" mod="$task.ident_input.mod_peptide" rt="$task.ident_input.rt" mz="$task.ident_input.mz" mass="$task.ident_input.mass" charge="$task.ident_input.charge" '$filename' >> tempfile.tab &&
108 mv tempfile.tab '$task.ident_input.ident_input_file[$key]' &&
109 ln -s '$task.ident_input.ident_input_file[$key]' './ident_inputs/$task.ident_input.ident_input_file[$key].display_name' &&
110 #end for
111 #end if
112 ]]></token>
113 <token name="@RAW_INPUT_ARG_SINGLE@"><![CDATA[
114 --inputraw './raws/$task.msms_input.inputraw.display_name'
115 ]]></token>
116 <token name="@RAW_INPUT_ARG_MULTIPLE@"><![CDATA[
117 --inputraw
118 #for $key in $task.msms_input.inputraw.keys():
119 './raws/$task.msms_input.inputraw[$key].display_name'
120 #end for
121 ]]></token>
122 <token name="@WRANGLE_RAW_INPUT_SINGLE@"><![CDATA[
123 mkdir ./raws &&
124 ## for files, need to softlink the display name to the history item
125 ln -s '$task.msms_input.inputraw' './raws/$task.msms_input.inputraw.display_name' &&
126 ]]></token>
127 <token name="@WRANGLE_RAW_INPUT_MULTIPLE@"><![CDATA[
128 mkdir ./raws &&
129 ## for files, need to softlink the display name to the history item
130 #for $key in $task.msms_input.inputraw.keys():
131 ln -s '$task.msms_input.inputraw[$key]' './raws/$task.msms_input.inputraw[$key].display_name' &&
132 #end for
133 ]]></token>
134 </macros>
135 <requirements>
136 <requirement type="package" version="@VERSION@">moff</requirement>
137 </requirements>
138 <command detect_errors="aggressive"><![CDATA[
139 mkdir ./out &&
140 #if $task.task_selector == "moff":
141 @WRANGLE_IDENT_INPUT_SINGLE@
142 @WRANGLE_RAW_INPUT_SINGLE@
143 moff.py
144 @IDENT_INPUT_ARG_SINGLE@
145 @RAW_INPUT_ARG_SINGLE@
146 --tol $task.tol
147 --rt_w $task.rt_w
148 --rt_p $task.rt_p
149 --output_folder ./out
150 #if ($task.peptide_summary):
151 --peptide_summary 1
152 #end if
153 &&
154 #if $task.peptide_summary:
155 mv ./out/peptide_summary_intensity_moFF_run.tab '$output_peptide_summary' &&
156 #end if
157 mv ./out/*moff_result.txt '$output_table'
158 &&
159 mv ./out/*.log '$output_logs'
160 #else if $task.task_selector == "mbr":
161 @WRANGLE_IDENT_INPUT_MULTIPLE@
162 moff_mbr.py
163 --inputF ./ident_inputs
164 --ext $task.ext
165 &&
166 mv ./ident_inputs/mbr_output/* ./out
167 #else:
168 ## moff_all (mbr followed by apex)
169 @WRANGLE_IDENT_INPUT_MULTIPLE@
170 @WRANGLE_RAW_INPUT_MULTIPLE@
171 moff_all.py
172 @IDENT_INPUT_ARG_MULTIPLE@
173 @RAW_INPUT_ARG_MULTIPLE@
174 --tol $task.tol
175 --rt_w $task.rt_w
176 --rt_p $task.rt_p
177 --rt_p_match $task.rt_p_match
178 --output_folder ./out
179 --ext txt
180 #if $task.peptide_summary:
181 --peptide_summary 1
182 #end if
183 &&
184 #if $task.peptide_summary:
185 mv ./out/peptide_summary_intensity_moFF_run.tab '$output_peptide_summary' &&
186 #end if
187 echo -ne
188 #end if
189 ]]></command>
190 <inputs>
191 <conditional name="task">
192 <param name="task_selector" type="select" label="Choose which module to run">
193 <option value="moff" selected="true">Apex intensity</option>
194 <option value="mbr">Match between runs</option>
195 <option value="all">All (match-between-runs followed by quantitation)</option>
196 </param>
197 <when value = "moff">
198 <expand macro="ident_input_macro" allow_multiple="false"/>
199 <expand macro="raw_input_macro" allow_multiple="false"/>
200 <param argument="--tol" type="float" value="10" label="Tolerance parameter"
201 help="Specify the tolerance parameter in ppm." />
202 <param argument="--rt_w" type="float" value="3.0" label="Retention time window"
203 help="Specify rt window for xic in minutes." />
204 <param argument="--rt_p" type="float" value="1" label="Time window for the peak"
205 help="Specify the time windows for the peak in minutes." />
206 <param argument="--rt_p_match" type="float" value="1.5" label="Time window for the matched peak"
207 help="Specify the time windows for the matched peak in minutes." />
208 <param argument="--peptide_summary" type="boolean" value="true" label="Output the peptide summary?"/>
209 </when>
210 <when value="mbr">
211 <expand macro="ident_input_macro" allow_multiple="false" input_type="data_collection"/>
212 <param argument="--ext" type="text" value="tab" label="Provide the extension used in the display file name (without the period)"/>
213 </when>
214 <when value="all">
215 <expand macro="ident_input_macro" allow_multiple="false" input_type="data_collection"/>
216 <expand macro="raw_input_macro" allow_multiple="false" input_type="data_collection"/>
217 <param argument="--tol" type="float" value="10" label="Tolerance parameter"
218 help="Specify the tolerance parameter in ppm." />
219 <param argument="--rt_w" type="float" value="3.0" label="Retention time window"
220 help="Specify rt window for xic in minutes." />
221 <param argument="--rt_p" type="float" value="1" label="Time window for the peak"
222 help="Specify the time windows for the peak in minutes." />
223 <param argument="--rt_p_match" type="float" value="1.2" label="Time window for the matched peak"
224 help="Specify the time windows for the matched peak in minutes." />
225 <param argument="--peptide_summary" type="boolean" value="true" label="Output the peptide summary?"/>
226 </when>
227 </conditional>
228 </inputs>
229 <outputs>
230 <data format="tabular" name="output_table" label="${tool.name} quantification: ${on_string}">
231 <filter>task['task_selector']=='moff'</filter>
232 </data>
233 <data format="txt" name="output_logs" label="${tool.name} log: ${on_string}">
234 <filter>task['task_selector']=='moff'</filter>
235 </data>
236 <collection name="ident_output" type="list" label="${tool.name} quantification: ${on_string}">
237 <filter>task['task_selector']=='all' or task['task_selector']=='mbr'</filter>
238 <!--discover datasets method -->
239 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.txt" directory="out" format="tabular"/>
240 </collection>
241 <collection name="log_output" type="list" label="${tool.name} logs: ${on_string}">
242 <filter>task['task_selector']=='all' or task['task_selector']=='mbr'</filter>
243 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.log" directory="out" format="txt"/>
244 </collection>
245 <data format="tabular" name="output_peptide_summary" label="${tool.name} peptide summary: ${on_string}">
246 <filter>task['peptide_summary']</filter>
247 </data>
248 </outputs>
249 <tests>
250 <!-- test moff_all -->
251 <test>
252 <param name="task_selector" value="all"/>
253 <param name="input_type_selector" value="ps"/>
254 <param name="ident_input_file">
255 <collection type="list">
256 <element name="mbr_test1" value="input/mbr_test1.tabular"/>
257 <element name="mbr_test2" value="input/mbr_test2.tabular"/>
258 </collection>
259 </param>
260 <param name="inputraw">
261 <collection type="list">
262 <element name="mbr_test1" value="input/mbr_test1.mzml"/>
263 <element name="mbr_test2" value="input/mbr_test2.mzml"/>
264 </collection>
265 </param>
266 <param name="peptide_summary" value="true"/>
267 <output name="output_peptide_summary" ftype="tabular">
268 <assert_contents>
269 <has_text text="sumIntensity_mbr_test1"/>
270 <has_text text="sumIntensity_mbr_test2"/>
271 </assert_contents>
272 </output>
273 <output_collection name="ident_output" type="list">
274 <element name="mbr_test1_match_moff_result" value="output1/mbr_test1_match_moff_result.txt"/>
275 <element name="mbr_test2_match_moff_result" value="output1/mbr_test2_match_moff_result.txt"/>
276 </output_collection>
277 <output_collection name="log_output" type="list">
278 <element name="mbr_test1_match__moff">
279 <assert_contents>
280 <has_line line="peptide at line 200 --> MZ: 783.4200 RT: 134.6997 matched (yes=1/no=0): 0"/>
281 </assert_contents>
282 </element>
283 <element name="mbr_test2_match__moff">
284 <assert_contents>
285 <has_line line="peptide at line 132 --> MZ: 767.8700 RT: 98.1975 matched (yes=1/no=0): 0"/>
286 </assert_contents>
287 </element>
288 </output_collection>
289 </test>
290 <!-- test moff alone -->
291 <test>
292 <param name="task_selector" value="moff"/>
293 <param name="input_type_selector" value="ps"/>
294 <param name="ident_input_file" value="input/test.tabular" ftype="tabular"/>
295 <param name="msms_input" value="mzml"/>
296 <param name="inputraw" value="input/test.mzml" ftype="mzml"/>
297 <param name="peptide_summary" value="true"/>
298 <output name="output_peptide_summary" ftype="tabular" file="output2/moff_test_pepsum.tab"/>
299 <output name="output_logs">
300 <assert_contents>
301 <has_line line="peptide at line 294 --> MZ: 677.3300 RT: 60.6078"/>
302 </assert_contents>
303 </output>
304 </test>
305 <!-- test the generic input -->
306 <test>
307 <param name="task_selector" value="moff"/>
308 <param name="input_type_selector" value="generic"/>
309 <param name="ident_input_file" value="input/test.tabular" ftype="tabular"/>
310 <param name="remove_header" value="true"/>
311 <param name="msms_input" value="mzml"/>
312 <param name="inputraw" value="input/test.mzml" ftype="mzml"/>
313 <param name="peptide" value="3"/>
314 <param name="prot" value="2"/>
315 <param name="mod_peptide" value="7"/>
316 <param name="rt" value="13"/>
317 <param name="mz" value="14"/>
318 <param name="mass" value="17"/>
319 <param name="charge" value="15"/>
320 <param name="peptide_summary" value="true"/>
321 <output name="output_peptide_summary" ftype="tabular" file="output2/moff_test_pepsum.tab"/>
322 <output name="output_logs">
323 <assert_contents>
324 <has_line line="peptide at line 294 --> MZ: 677.3300 RT: 60.6078"/>
325 </assert_contents>
326 </output>
327 </test>
328 <!-- test mbr -->
329 <test>
330 <param name="task_selector" value="mbr"/>
331 <param name="input_type_selector" value="ps"/>
332 <param name="ident_input_file">
333 <collection type="list">
334 <element name="mbr_test1" value="input/mbr_test1.tabular"/>
335 <element name="mbr_test2" value="input/mbr_test2.tabular"/>
336 </collection>
337 </param>
338 <param name="ext" value="tabular"/>
339 <output_collection name="ident_output" type="list" count="2">
340 <element name="mbr_test1_match" file="input/mbr_output/mbr_test1_match.txt"/>
341 <element name="mbr_test2_match" file="input/mbr_output/mbr_test2_match.txt"/>
342 </output_collection>
343 </test>
344 </tests>
345 <help>
346 <![CDATA[
347 **Description**
348
349 moFF (a Modest Feature Finder) is an OS independent tool designed to extract
350 apex MS1 intensity using a set of identified MS2 peptides.
351 It currently uses a Go library to directly extract data from Thermo Raw spectrum files,
352 eliminating the need for conversions from other formats.
353 Moreover, moFF also allows one to work directly with mzML files.
354
355 **Usage**
356
357 *Modules:*
358
359 1. Apex Intensity: this is used for a single pair of files, one identification and one spectrum file.
360 2. Match between runs (MBR): for multiple identification files, share MS2 identified peptides between runs and predict the retention time.
361 3. All (match between runs followed by apex intensity): this is used for more than one pair of identification and spectrum files.
362
363 If both match between runs and apex intensity are desired, it is best to run them both at once (i.e., run the 'All' module).
364 The MBR module is mainly useful for observing the intermediate steps of the algorithm - its outputs are not able to be used as inputs in moFF or in other tools.
365
366
367 *Inputs:*
368
369 - Identification file: this can either be a generic tabular file or the standard PSM report from PeptideShaker.
370 If it is a generic tabular file, please select the columns corresponding to the required information.
371
372 - MS/MS file: this can either be a Thermo raw file or an mzML file.
373
374 A given pair of files must have the *exact* same display name, not including the extension;
375 e.g. ``example1.tabular`` and ``example1.mzml``.
376 If the display names are different, simply change them in the history menu.
377
378 For multiple files (the MBR or All modules), the identification and spectrum files must be provided as dataset collections.
379 This allows for usage of the output dataset collections in workflows.
380
381 *Parameters:*
382
383 All the parameters related to the the time windows (``rt_w``, ``rt_p``, ``rt_p_match``) are basically the
384 half of the entire time windows where the apex peak is searched or the XIC is retrieved.
385 For correct rt windows, we suggest you set the ``rt_p`` value equal to or slighly greater than the
386 dynamic exclusion duration set in your machine. We suggest also to set the
387 ``rt_p_match`` always slightly bigger than tha values used for ``rt_p``.
388
389 *Outputs:*
390
391 When used in the single file mode ("Apex intensity" module), the outputs are 2 (or 3) files: a log file, a quantitation file,
392 and (optionally) a peptide summary, with intensities aggregated across peptides. When used in the multiple file mode ("All"),
393 the outputs are a dataset collection of log files (one per identification file), a dataset collection of quantification files, and (optionally) a peptide summary.
394
395 If used with a generic tabular format, the only columns in the output file are the 7 columns selected while using moFF plus the columns that moFF adds. Other columns are discarded.
396
397 **More Information**
398
399 See the moFF Github site at https://github.com/compomics/moFF,
400 and the publication at https://dx.doi.org/10.1038/nmeth.4075
401
402 ]]>
403 </help>
404 <citations>
405 <citation type="doi">10.1038/nmeth.4075</citation>
406 </citations>
407 </tool>