diff moff.xml @ 0:b4098353ee73 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/moFF commit bc0fad49e3ba73fa5b5b326e940adf9e11854d94
author galaxyp
date Fri, 05 Jan 2018 12:47:36 -0500
parents
children 8f0e76ad46ef
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moff.xml	Fri Jan 05 12:47:36 2018 -0500
@@ -0,0 +1,407 @@
+<tool id="proteomics_moff" name="moFF" version="@VERSION@">
+    <description>extracts MS1 intensities from spectrum files</description>
+    <macros>
+        <token name="@VERSION@">1.2</token>
+        <!-- xml macros, used for shared Galaxy parameter inputs -->
+        <xml name="ident_input_macro" token_allow_multiple="true" token_input_type="data">
+        <!-- this is exactly the same across all three, except for allowing multiple in MBR and all but not in moff -->
+            <conditional name="ident_input">
+                    <param name="input_type_selector" type="select" label="Choose the format for the identification file:">
+                        <option value="ps">Peptide Shaker PSM report (standard, not extended)</option>
+                        <option value="generic">Another tabular identification file</option>
+                    </param>
+                    <when value="ps">
+                        <param name="ident_input_file" type="@INPUT_TYPE@" format="tabular" label="Peptide Shaker PSM report" multiple="@ALLOW_MULTIPLE@"/>
+                    </when>
+                    <when value="generic">
+                        <param name="ident_input_file" type="@INPUT_TYPE@" format="tabular" label="A general tabular format" multiple="@ALLOW_MULTIPLE@"
+                            help="Must have specific columns; see below to select these columns from your file. The file should have at most one header line. "/>
+                        <param name="remove_header" type="boolean" value="false" label="Remove the header line?" help="This is necessary if the file has a line with column headers"/>
+                        <param name="peptide"
+                            type="data_column"
+                            data_ref="ident_input_file"
+                            label="Column with peptide-spectrum-match sequence"/>
+                        <param name="prot"
+                            type="data_column"
+                            data_ref="ident_input_file"
+                            label="Column with protein ID"/>
+                        <param name="mod_peptide"
+                            type="data_column"
+                            data_ref="ident_input_file"
+                            label="Column with peptide-spectrum-match sequence that contains possible modifications"/>
+                        <param name="rt"
+                            type="data_column"
+                            data_ref="ident_input_file"
+                            label="Column with PSM retention time (in second)"/>
+                        <param name="mz"
+                            type="data_column"
+                            data_ref="ident_input_file"
+                            label="Column with m/z (mass over charge)"/>
+                        <param name="mass"
+                            type="data_column"
+                            data_ref="ident_input_file"
+                            label="Column with mass of the peptide"/>
+                        <param name="charge"
+                            type="data_column"
+                            data_ref="ident_input_file"
+                            label="Column with charge of ionized peptide"/>
+                    </when>
+            </conditional>
+        </xml>
+        <xml name="raw_input_macro" token_allow_multiple="true" token_input_type="data">
+            <conditional name="msms_input">
+                <param name="input_type_selector" type="select" label="Choose the format for the MS/MS file">
+                    <option value="raw">Thermo RAW file</option>
+                    <option value="mzml">mzML</option>
+                </param>
+                <when value="raw">
+                    <param argument="--inputraw" type="@INPUT_TYPE@" multiple="@ALLOW_MULTIPLE@" format="raw" label="RAW file(s)"/>
+                </when>
+                <when value="mzml">
+                    <param argument="--inputraw" type="@INPUT_TYPE@" multiple="@ALLOW_MULTIPLE@" format="mzml" label="mzML file(s)"/>
+                </when>
+            </conditional>
+        </xml>
+        <!-- tokens (code snippets used in <command>) -->
+        <token name="@IDENT_INPUT_ARG_MULTIPLE@"><![CDATA[
+            ## this is where the ident input gets passed to moff/moff_all/moff_mbr
+            --inputtsv
+            #for $key in $task.ident_input.ident_input_file.keys():
+                './ident_inputs/${task.ident_input.ident_input_file[$key].display_name}'
+            #end for
+        ]]></token>
+        <token name="@IDENT_INPUT_ARG_SINGLE@"><![CDATA[
+            ## this is where the ident input gets passed to moff/moff_all/moff_mbr
+            --inputtsv './ident_inputs/${task.ident_input.ident_input_file.display_name}'
+        ]]></token>
+        <token name="@WRANGLE_IDENT_INPUT_SINGLE@"><![CDATA[
+            mkdir ./ident_inputs &&
+            #if $task.ident_input.input_type_selector == "ps":
+                ln -s '$task.ident_input.ident_input_file' './ident_inputs/$task.ident_input.ident_input_file.display_name' &&
+            #else
+                ## optionally remove first line
+                #if $task.ident_input.remove_header:
+                    sed -i '1d' '$task.ident_input.ident_input_file' &&
+                #end if
+                ## header row with correct names: "peptide", "prot", "mod_peptide", "rt", "mz", "mass", and "charge"
+                echo -e "peptide\tprot\tmod_peptide\trt\tmz\tmass\tcharge" > tempfile.tab &&
+                awk 'BEGIN{OFS="\t"; FS="\t"}{print \$pep,\$prot,\$mod,\$rt,\$mz,\$mass,\$charge}' pep="${task.ident_input.peptide}" prot="$task.ident_input.prot" mod="$task.ident_input.mod_peptide" rt="$task.ident_input.rt" mz="$task.ident_input.mz" mass="$task.ident_input.mass" charge="$task.ident_input.charge" '$task.ident_input.ident_input_file' >> tempfile.tab &&
+                mv tempfile.tab '$task.ident_input.ident_input_file' &&
+                ln -s '$task.ident_input.ident_input_file' './ident_inputs/$task.ident_input.ident_input_file.display_name' &&
+            #end if
+        ]]></token>
+        <token name="@WRANGLE_IDENT_INPUT_MULTIPLE@"><![CDATA[
+            mkdir ./ident_inputs &&
+            #if $task.ident_input.input_type_selector == "ps":
+                #for $key in $task.ident_input.ident_input_file.keys():
+                    ln -s '${task.ident_input.ident_input_file[$key]}' './ident_inputs/${task.ident_input.ident_input_file[$key].display_name}' &&
+                #end for
+            #else
+                #for $key in $task.ident_input.ident_input_file.keys():
+                    ## optionally remove first line
+                    #if $task.ident_input.remove_header:
+                        sed -i '1d' '$task.ident_input.ident_input_file[$key]' &&
+                    #end if
+                    ## header row with correct names: "peptide", "prot", "mod_peptide", "rt", "mz", "mass", and "charge"
+                    echo -e "peptide\tprot\tmod_peptide\trt\tmz\tmass\tcharge" > tempfile.tab &&
+                    awk 'BEGIN{OFS="\t"; FS="\t"}{print \$pep,\$prot,\$mod,\$rt,\$mz,\$mass,\$charge}' pep="${task.ident_input.peptide}" prot="$task.ident_input.prot" mod="$task.ident_input.mod_peptide" rt="$task.ident_input.rt" mz="$task.ident_input.mz" mass="$task.ident_input.mass" charge="$task.ident_input.charge" '$filename' >> tempfile.tab &&
+                    mv tempfile.tab '$task.ident_input.ident_input_file[$key]' &&
+                    ln -s '$task.ident_input.ident_input_file[$key]' './ident_inputs/$task.ident_input.ident_input_file[$key].display_name' &&
+                #end for
+            #end if
+        ]]></token>
+        <token name="@RAW_INPUT_ARG_SINGLE@"><![CDATA[
+            --inputraw './raws/$task.msms_input.inputraw.display_name'
+        ]]></token>
+        <token name="@RAW_INPUT_ARG_MULTIPLE@"><![CDATA[
+            --inputraw
+            #for $key in $task.msms_input.inputraw.keys():
+                './raws/$task.msms_input.inputraw[$key].display_name'
+            #end for
+        ]]></token>
+        <token name="@WRANGLE_RAW_INPUT_SINGLE@"><![CDATA[
+            mkdir ./raws &&
+            ## for files, need to softlink the display name to the history item
+            ln -s '$task.msms_input.inputraw' './raws/$task.msms_input.inputraw.display_name' &&
+        ]]></token>
+        <token name="@WRANGLE_RAW_INPUT_MULTIPLE@"><![CDATA[
+            mkdir ./raws &&
+            ## for files, need to softlink the display name to the history item
+            #for $key in $task.msms_input.inputraw.keys():
+                ln -s '$task.msms_input.inputraw[$key]' './raws/$task.msms_input.inputraw[$key].display_name' &&
+            #end for
+        ]]></token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@VERSION@">moff</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+        mkdir ./out &&
+        #if $task.task_selector == "moff":
+            @WRANGLE_IDENT_INPUT_SINGLE@
+            @WRANGLE_RAW_INPUT_SINGLE@
+            moff.py
+                @IDENT_INPUT_ARG_SINGLE@
+                @RAW_INPUT_ARG_SINGLE@
+                --tol $task.tol
+                --rt_w $task.rt_w
+                --rt_p $task.rt_p
+                --output_folder ./out
+                #if ($task.peptide_summary):
+                    --peptide_summary 1
+                #end if
+            &&
+            #if $task.peptide_summary:
+                mv ./out/peptide_summary_intensity_moFF_run.tab '$output_peptide_summary' &&
+            #end if
+            mv ./out/*moff_result.txt '$output_table'
+            &&
+            mv ./out/*.log '$output_logs'
+        #else if $task.task_selector == "mbr":
+           @WRANGLE_IDENT_INPUT_MULTIPLE@
+           moff_mbr.py
+                --inputF ./ident_inputs
+                --ext $task.ext
+           &&
+           mv ./ident_inputs/mbr_output/* ./out
+        #else:
+           ## moff_all (mbr followed by apex)
+           @WRANGLE_IDENT_INPUT_MULTIPLE@
+           @WRANGLE_RAW_INPUT_MULTIPLE@
+           moff_all.py
+               @IDENT_INPUT_ARG_MULTIPLE@
+               @RAW_INPUT_ARG_MULTIPLE@
+               --tol $task.tol
+               --rt_w $task.rt_w
+               --rt_p $task.rt_p
+               --rt_p_match $task.rt_p_match
+               --output_folder ./out
+               --ext txt
+               #if $task.peptide_summary:
+                   --peptide_summary 1
+               #end if
+           &&
+           #if $task.peptide_summary:
+               mv ./out/peptide_summary_intensity_moFF_run.tab '$output_peptide_summary' &&
+           #end if
+           echo -ne
+        #end if
+    ]]></command>
+    <inputs>
+        <conditional name="task">
+            <param name="task_selector" type="select" label="Choose which module to run">
+                <option value="moff" selected="true">Apex intensity</option>
+                <option value="mbr">Match between runs</option>
+                <option value="all">All (match-between-runs followed by quantitation)</option>
+            </param>
+            <when value = "moff">
+                <expand macro="ident_input_macro" allow_multiple="false"/>
+                <expand macro="raw_input_macro" allow_multiple="false"/>
+                <param argument="--tol" type="float" value="10" label="Tolerance parameter"
+                    help="Specify the tolerance parameter in ppm." />
+                <param argument="--rt_w" type="float" value="3.0" label="Retention time window"
+                    help="Specify rt window for xic in minutes." />
+                <param argument="--rt_p" type="float" value="1" label="Time window for the peak"
+                    help="Specify the time windows for the peak in minutes." />
+                <param argument="--rt_p_match" type="float" value="1.5" label="Time window for the matched peak"
+                    help="Specify the time windows for the matched peak in minutes." />
+                <param argument="--peptide_summary" type="boolean" value="true" label="Output the peptide summary?"/>
+            </when>
+            <when value="mbr">
+                <expand macro="ident_input_macro" allow_multiple="false" input_type="data_collection"/>
+                <param argument="--ext" type="text" value="tab" label="Provide the extension used in the display file name (without the period)"/>
+            </when>
+            <when value="all">
+                <expand macro="ident_input_macro" allow_multiple="false" input_type="data_collection"/>
+                <expand macro="raw_input_macro" allow_multiple="false" input_type="data_collection"/>
+                <param argument="--tol" type="float" value="10" label="Tolerance parameter"
+                    help="Specify the tolerance parameter in ppm." />
+                <param argument="--rt_w" type="float" value="3.0" label="Retention time window"
+                    help="Specify rt window for xic in minutes." />
+                <param argument="--rt_p" type="float" value="1" label="Time window for the peak"
+                    help="Specify the time windows for the peak in minutes." />
+                <param argument="--rt_p_match" type="float" value="1.2" label="Time window for the matched peak"
+                    help="Specify the time windows for the matched peak in minutes." />
+                <param argument="--peptide_summary" type="boolean" value="true" label="Output the peptide summary?"/>
+            </when>
+        </conditional> 
+    </inputs>
+    <outputs>
+        <data format="tabular" name="output_table" label="${tool.name} quantification: ${on_string}">
+            <filter>task['task_selector']=='moff'</filter>
+        </data>
+        <data format="txt" name="output_logs" label="${tool.name} log: ${on_string}">
+            <filter>task['task_selector']=='moff'</filter>
+        </data>
+        <collection name="ident_output" type="list" label="${tool.name} quantification: ${on_string}">
+            <filter>task['task_selector']=='all' or task['task_selector']=='mbr'</filter>
+            <!--discover datasets method -->
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.txt" directory="out" format="tabular"/>
+        </collection>
+        <collection name="log_output" type="list" label="${tool.name} logs: ${on_string}">
+            <filter>task['task_selector']=='all' or task['task_selector']=='mbr'</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.log" directory="out" format="txt"/>
+        </collection>
+        <data format="tabular" name="output_peptide_summary" label="${tool.name} peptide summary: ${on_string}">
+            <filter>task['peptide_summary']</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- test moff_all -->
+        <test>
+            <param name="task_selector" value="all"/>
+            <param name="input_type_selector" value="ps"/>
+            <param name="ident_input_file">
+                <collection type="list">
+                    <element name="mbr_test1" value="input/mbr_test1.tabular"/>
+                    <element name="mbr_test2" value="input/mbr_test2.tabular"/>
+                </collection>
+            </param>
+            <param name="inputraw">
+                <collection type="list">
+                    <element name="mbr_test1" value="input/mbr_test1.mzml"/>
+                    <element name="mbr_test2" value="input/mbr_test2.mzml"/>
+                </collection>
+            </param>
+            <param name="peptide_summary" value="true"/>
+            <output name="output_peptide_summary" ftype="tabular">
+                <assert_contents>
+                    <has_text text="sumIntensity_mbr_test1"/>
+                    <has_text text="sumIntensity_mbr_test2"/>
+                </assert_contents>
+            </output>
+            <output_collection name="ident_output" type="list">
+                <element name="mbr_test1_match_moff_result" value="output1/mbr_test1_match_moff_result.txt"/>
+                <element name="mbr_test2_match_moff_result" value="output1/mbr_test2_match_moff_result.txt"/>
+            </output_collection>
+            <output_collection name="log_output" type="list">
+                <element name="mbr_test1_match__moff">
+                    <assert_contents>
+                        <has_line line="peptide at line 200 -->  MZ: 783.4200 RT: 134.6997 matched (yes=1/no=0): 0"/>
+                    </assert_contents>
+                </element>
+                <element name="mbr_test2_match__moff">
+                    <assert_contents>
+                        <has_line line="peptide at line 132 -->  MZ: 767.8700 RT: 98.1975 matched (yes=1/no=0): 0"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <!-- test moff alone -->
+        <test>
+            <param name="task_selector" value="moff"/>
+            <param name="input_type_selector" value="ps"/>
+            <param name="ident_input_file" value="input/test.tabular" ftype="tabular"/>
+            <param name="msms_input" value="mzml"/>
+            <param name="inputraw" value="input/test.mzml" ftype="mzml"/>
+            <param name="peptide_summary" value="true"/>
+            <output name="output_peptide_summary" ftype="tabular" file="output2/moff_test_pepsum.tab"/>
+            <output name="output_logs">
+                <assert_contents>
+                    <has_line line="peptide at line 294 -->  MZ: 677.3300 RT: 60.6078"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- test the generic input -->
+        <test>
+            <param name="task_selector" value="moff"/>
+            <param name="input_type_selector" value="generic"/>
+            <param name="ident_input_file" value="input/test.tabular" ftype="tabular"/>
+            <param name="remove_header" value="true"/>
+            <param name="msms_input" value="mzml"/>
+            <param name="inputraw" value="input/test.mzml" ftype="mzml"/>
+            <param name="peptide" value="3"/>
+            <param name="prot" value="2"/>
+            <param name="mod_peptide" value="7"/>
+            <param name="rt" value="13"/>
+            <param name="mz" value="14"/>
+            <param name="mass" value="17"/>
+            <param name="charge" value="15"/>
+            <param name="peptide_summary" value="true"/>
+            <output name="output_peptide_summary" ftype="tabular" file="output2/moff_test_pepsum.tab"/>
+            <output name="output_logs">
+                <assert_contents>
+                    <has_line line="peptide at line 294 -->  MZ: 677.3300 RT: 60.6078"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- test mbr -->
+        <test>
+            <param name="task_selector" value="mbr"/>
+            <param name="input_type_selector" value="ps"/>
+            <param name="ident_input_file">
+                <collection type="list">
+                    <element name="mbr_test1" value="input/mbr_test1.tabular"/>
+                    <element name="mbr_test2" value="input/mbr_test2.tabular"/>
+                </collection>
+            </param>
+            <param name="ext" value="tabular"/>
+            <output_collection name="ident_output" type="list" count="2">
+                <element name="mbr_test1_match" file="input/mbr_output/mbr_test1_match.txt"/>
+                <element name="mbr_test2_match" file="input/mbr_output/mbr_test2_match.txt"/>
+            </output_collection>
+        </test>
+    </tests>
+    <help>
+    <![CDATA[
+**Description**
+
+moFF (a Modest Feature Finder) is an OS independent tool designed to extract
+apex MS1 intensity using a set of identified MS2 peptides.
+It currently uses a Go library to directly extract data from Thermo Raw spectrum files,
+eliminating the need for conversions from other formats.
+Moreover, moFF also allows one to work directly with mzML files.
+
+**Usage**
+
+*Modules:*
+
+1. Apex Intensity: this is used for a single pair of files, one identification and one spectrum file. 
+2. Match between runs (MBR): for multiple identification files, share MS2 identified peptides between runs and predict the retention time.
+3. All (match between runs followed by apex intensity): this is used for more than one pair of identification and spectrum files.
+
+If both match between runs and apex intensity are desired, it is best to run them both at once (i.e., run the 'All' module).
+The MBR module is mainly useful for observing the intermediate steps of the algorithm - its outputs are not able to be used as inputs in moFF or in other tools.
+
+
+*Inputs:*
+
+- Identification file: this can either be a generic tabular file or the standard PSM report from PeptideShaker.
+  If it is a generic tabular file, please select the columns corresponding to the required information.
+
+- MS/MS file: this can either be a Thermo raw file or an mzML file.
+
+A given pair of files must have the *exact* same display name, not including the extension;
+e.g. ``example1.tabular`` and ``example1.mzml``.
+If the display names are different, simply change them in the history menu.
+
+For multiple files (the MBR or All modules), the identification and spectrum files must be provided as dataset collections.
+This allows for usage of the output dataset collections in workflows.
+
+*Parameters:*
+
+All the parameters related to the the time windows (``rt_w``, ``rt_p``, ``rt_p_match``) are basically the
+half of the entire time windows where the apex peak is searched or the XIC is retrieved.
+For correct rt windows, we suggest you set the ``rt_p`` value equal to or slighly greater than the
+dynamic exclusion duration set in your machine. We suggest also to set the
+``rt_p_match`` always slightly bigger than tha values used for ``rt_p``.
+
+*Outputs:*
+
+When used in the single file mode ("Apex intensity" module), the outputs are 2 (or 3) files: a log file, a quantitation file,
+and (optionally) a peptide summary, with intensities aggregated across peptides. When used in the multiple file mode ("All"),
+the outputs are a dataset collection of log files (one per identification file), a dataset collection of quantification files, and (optionally) a peptide summary.
+
+If used with a generic tabular format, the only columns in the output file are the 7 columns selected while using moFF plus the columns that moFF adds. Other columns are discarded.
+
+**More Information**
+
+See the moFF Github site at https://github.com/compomics/moFF, 
+and the publication at https://dx.doi.org/10.1038/nmeth.4075 
+
+    ]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1038/nmeth.4075</citation>
+    </citations>
+</tool>