Mercurial > repos > galaxyp > proteomics_moff

diff moff.xml @ 4:7af419c90f5f draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/moFF commit e6392f9c9e5ff88b1711667305c59b15a751758c
author: galaxyp
date: Thu, 28 Mar 2019 05:25:24 -0400
parents: 226287d75d96
children: a96af68dafb2
--- a/moff.xml	Wed Sep 26 07:15:36 2018 -0400
+++ b/moff.xml	Thu Mar 28 05:25:24 2019 -0400
@@ -1,7 +1,7 @@
-<tool id="proteomics_moff" name="moFF" version="@VERSION@.2">
+<tool id="proteomics_moff" name="moFF" version="@VERSION@.0">
     <description>extracts MS1 intensities from spectrum files</description>
     <macros>
-        <token name="@VERSION@">1.2.1</token>
+        <token name="@VERSION@">2.0.2</token>
         <!-- xml macros, used for shared Galaxy parameter inputs -->
         <xml name="ident_input_macro" token_allow_multiple="true" token_input_type="data">
         <!-- this is exactly the same across all three, except for allowing multiple in MBR and all but not in moff -->
@@ -48,20 +48,20 @@
                     </when>
             </conditional>
         </xml>
-        <xml name="raw_input_macro" token_allow_multiple="true" token_input_type="data">
-            <conditional name="msms_input">
-                <param name="input_type_selector" type="select" label="Choose the format for the MS/MS file">
-                    <option value="raw">Thermo RAW file</option>
-                    <option value="mzml">mzML</option>
-                </param>
-                <when value="raw">
-                    <param argument="--inputraw" type="@INPUT_TYPE@" multiple="@ALLOW_MULTIPLE@" format="raw" label="RAW file(s)"/>
-                </when>
-                <when value="mzml">
-                    <param argument="--inputraw" type="@INPUT_TYPE@" multiple="@ALLOW_MULTIPLE@" format="mzml" label="mzML file(s)"/>
-                </when>
-            </conditional>
-        </xml>
+	<xml name="filt_matched_peptide">
+        <conditional name="match_filter">
+            <param name="filter_flags" type="select" label="Activate filtering of matched peptides">
+                <option selected="True" value="nofilter">Do not activate</option>
+                <option value="filter">Filter by flags to exclude or require</option>
+            </param>
+            <when value="filter">
+                <param argument="--sample_size" label="sample_size"  type="float" value="0.20" help="percentage of MS2 peptide used to estimated the threshold. Default value: 0.20" />
+                <param argument="--quantile_thr_filtering" label="-quantile_thr_filtering" type="float" value="0.75" help="quantile value used to compute the filtering threshold for the matched peak"/>
+                <param argument="--ptm_file" type="data" format="tabular" label="ptm_file"  optional= "True"  help="load your ptm file in order to overwrite internal method"/>
+            </when>
+            <when value="nofilter"/>
+        </conditional>
+	</xml>
         <!-- tokens (code snippets used in <command>) -->
         <token name="@FORMAT@"><![CDATA[
             #if $task.task_selector != 'mbr'
@@ -76,14 +76,14 @@
         ]]></token>
         <token name="@IDENT_INPUT_ARG_MULTIPLE@"><![CDATA[
             ## this is where the ident input gets passed to moff/moff_all/moff_mbr
-            --inputtsv
+            --tsv_list
             #for $value in $task.ident_input.ident_input_file:
                 './ident_inputs/$value.element_identifier$format'
             #end for
         ]]></token>
         <token name="@IDENT_INPUT_ARG_SINGLE@"><![CDATA[
             ## this is where the ident input gets passed to moff/moff_all/moff_mbr
-            --inputtsv './ident_inputs/${task.ident_input.ident_input_file.element_identifier}$format'
+            --tsv_list './ident_inputs/${task.ident_input.ident_input_file.element_identifier}$format'
         ]]></token>
         <token name="@WRANGLE_IDENT_INPUT_SINGLE@"><![CDATA[
             mkdir ./ident_inputs &&
@@ -122,23 +122,23 @@
             #end if
         ]]></token>
         <token name="@RAW_INPUT_ARG_SINGLE@"><![CDATA[
-            --inputraw './raws/$task.msms_input.inputraw.element_identifier$format'
+            --raw_list './raws/$task.msms_input.raw_list.element_identifier$format'
         ]]></token>
         <token name="@RAW_INPUT_ARG_MULTIPLE@"><![CDATA[
-            --inputraw
-            #for $value in $task.msms_input.inputraw:
+            --raw_list
+            #for $value in $task.msms_input.raw_list:
                 './raws/$value.element_identifier$format'
             #end for
         ]]></token>
         <token name="@WRANGLE_RAW_INPUT_SINGLE@"><![CDATA[
             mkdir ./raws &&
             ## for files, need to softlink the name to the history item
-            ln -s '$task.msms_input.inputraw' './raws/$task.msms_input.inputraw.element_identifier$format' &&
+            ln -s '$task.msms_input.raw_list' './raws/$task.msms_input.raw_list.element_identifier$format' &&
         ]]></token>
         <token name="@WRANGLE_RAW_INPUT_MULTIPLE@"><![CDATA[
             mkdir ./raws &&
             ## for files, need to softlink the name to the history item
-            #for $value in $task.msms_input.inputraw:
+            #for $value in $task.msms_input.raw_list:
                 ln -s '$value' './raws/$value.element_identifier$format' &&
             #end for
         ]]></token>
@@ -153,50 +153,74 @@
         #if $task.task_selector == "moff":
             @WRANGLE_IDENT_INPUT_SINGLE@
             @WRANGLE_RAW_INPUT_SINGLE@
-            moff.py
+            moff_all.py
                 @IDENT_INPUT_ARG_SINGLE@
                 @RAW_INPUT_ARG_SINGLE@
                 --tol $task.tol
-                --rt_w $task.rt_w
-                --rt_p $task.rt_p
-                --output_folder ./out
-                #if ($task.peptide_summary):
-                    --peptide_summary 1
-                #end if
-            &&
-            #if $task.peptide_summary:
-                mv ./out/peptide_summary_intensity_moFF_run.tab '$output_peptide_summary' &&
-            #end if
-            mv ./out/*moff_result.txt '$output_table'
-            &&
-            mv ./out/*.log '$output_logs'
+                --mbr $task.mbr
+                --xic_length $task.xic_length
+                --rt_peak_win $task.rt_peak_win
+                --rt_peak_win_match $task.rt_peak_win_match
+	            --loc_out ./out
+                    #if str( $task.match_filter.filter_flags ) == "filter":
+                        --match_filter
+                        --sample_size $task.match_filter.sample_size
+                        --quantile_thr_filtering $task.match_filter.quantile_thr_filtering
+                        #if ($task.match_filter.ptm_file):
+                            --ptm_file '$task.match_filter.ptm_file'
+                        #else:
+                            --ptm_file '$__tool_directory__/tool-data/ptm_setting_ps.json' 
+                        #end if
+                    #end if
+          #if ($task.peptide_summary):
+              --peptide_summary
+          #end if
+          #if $task.peptide_summary:
+              && mv ./out/peptide_summary_intensity_moFF_run.tab '$output_peptide_summary'
+          #end if
+          &&
+          mv ./out/*moff_result.txt '$output_table'
+          &&
+          mv ./out/*.log '$output_logs'
         #else if $task.task_selector == "mbr":
            @WRANGLE_IDENT_INPUT_MULTIPLE@
-           moff_mbr.py
-                --inputF ./ident_inputs
-                --ext $task.ext
+           moff_all.py
+                @IDENT_INPUT_ARG_MULTIPLE@
+                --mbr $task.mbr
+				--raw_list
            &&
-           mv ./ident_inputs/mbr_output/* ./out
+           mv ./mbr_output/* ./out
         #else:
            ## moff_all (mbr followed by apex)
-           @WRANGLE_IDENT_INPUT_MULTIPLE@
+           @WRANGLE_IDENT_INPUT_SINGLE@
            @WRANGLE_RAW_INPUT_MULTIPLE@
            moff_all.py
-               @IDENT_INPUT_ARG_MULTIPLE@
+               @IDENT_INPUT_ARG_SINGLE@
                @RAW_INPUT_ARG_MULTIPLE@
                --tol $task.tol
-               --rt_w $task.rt_w
-               --rt_p $task.rt_p
-               --rt_p_match $task.rt_p_match
-               --output_folder ./out
-               #if $task.peptide_summary:
-                   --peptide_summary 1
+               --mbr $task.mbr
+               --xic_length $task.xic_length
+               --rt_peak_win $task.rt_peak_win
+               --rt_peak_win_match $task.rt_peak_win_match
+               --loc_out ./out
+               #if ($task.peptide_summary):
+                   --peptide_summary
                #end if
-           &&
+               #if str( $task.match_filter.filter_flags ) == "filter":
+                    --match_filter
+                    --sample_size $task.match_filter.sample_size
+                    --quantile_thr_filtering $task.match_filter.quantile_thr_filtering
+                    #if ($task.match_filter.ptm_file):
+                        --ptm_file '$task.match_filter.ptm_file'
+                    #else:
+                        --ptm_file '$__tool_directory__/tool-data/ptm_setting_ps.json'
+                    #end if
+               #end if
+           
            #if $task.peptide_summary:
-               mv ./out/peptide_summary_intensity_moFF_run.tab '$output_peptide_summary'
-           #end if
-        #end if
+               && mv ./out/peptide_summary_intensity_moFF_run.tab '$output_peptide_summary'
+		   #end if
+	   #end if
     ]]></command>
     <inputs>
         <conditional name="task">
@@ -207,33 +231,67 @@
             </param>
             <when value = "moff">
                 <expand macro="ident_input_macro" allow_multiple="false"/>
-                <expand macro="raw_input_macro" allow_multiple="false"/>
+                <conditional name="msms_input">
+                    <param name="input_type_selector" type="select" label="Choose the format for the MS/MS file">
+                        <option value="raw">Thermo RAW file</option>
+                        <option value="mzml">mzML</option>
+                    </param>
+                    <when value="raw">
+                        <param argument="--raw_list" type="data" multiple="false" format="thermo.raw" label="RAW file(s)"/>
+                    </when>
+                    <when value="mzml">
+                        <param argument="--raw_list" type="data" multiple="false" format="mzml" label="mzML file(s)"/>
+                    </when>
+                </conditional>
+
+
                 <param argument="--tol" type="float" value="10" label="Tolerance parameter"
                     help="Specify the tolerance parameter in ppm." />
-                <param argument="--rt_w" type="float" value="3.0" label="Retention time window"
+                <param argument="--mbr" type="text" value="off" label="moFF workflow"
+                    help="select the moFF workflow" />
+                <param argument="--xic_length" type="float" value="3.0" label="retention time windows for XiC"
                     help="Specify rt window for xic in minutes." />
-                <param argument="--rt_p" type="float" value="1" label="Time window for the peak"
-                    help="Specify the time windows for the peak in minutes." />
+                <param argument="--rt_peak_win" type="float" value="1" label="retention time window for apex detection"
+					help="Specify rt window for the peak in minutes." />
+				<param argument="--rt_peak_win_match" type="float" value="1.2" label="retention time window for the matched peak"
+                    help="Specify the retention time window for the matched peak in minutes." />
+                <expand macro="filt_matched_peptide"/>	
                 <param argument="--peptide_summary" type="boolean" value="true" label="Output the peptide summary?"/>
             </when>
             <when value="mbr">
                 <expand macro="ident_input_macro" allow_multiple="false" input_type="data_collection"/>
-                <param argument="--ext" type="text" value="tab" label="Provide the extension used in the display file name (without the period)"/>
+                <param argument="--mbr" type="text" value="only" label="moFF workflow"
+					help="select the moFF workflow" />
             </when>
             <when value="all">
-                <expand macro="ident_input_macro" allow_multiple="false" input_type="data_collection"/>
-                <expand macro="raw_input_macro" allow_multiple="false" input_type="data_collection"/>
+                <expand macro="ident_input_macro" allow_multiple="false" input_type="data"/>
+                <conditional name="msms_input">
+                    <param name="input_type_selector" type="select" label="Choose the format for the MS/MS file">
+                        <option value="raw">Thermo RAW file</option>
+                        <option value="mzml">mzML</option>
+                    </param>
+                    <when value="raw">
+                        <param argument="--raw_list" type="data" multiple="true" min="2" format="thermo.raw" label="RAW file(s)"/>
+                    </when>
+                    <when value="mzml">
+                        <param argument="--raw_list" type="data" multiple="true" min="2" format="mzml" label="mzML file(s)"/>
+                    </when>
+                </conditional>
+
                 <param argument="--tol" type="float" value="10" label="Tolerance parameter"
                     help="Specify the tolerance parameter in ppm." />
-                <param argument="--rt_w" type="float" value="3.0" label="Retention time window"
+                <param argument="--mbr" type="text" value="on" label="moFF workflow"
+                    help="select the moFF workflow" />
+                <param argument="--xic_length" type="float" value="3.0" label="retention time windows for XiC"
                     help="Specify rt window for xic in minutes." />
-                <param argument="--rt_p" type="float" value="1" label="Time window for the peak"
-                    help="Specify the time windows for the peak in minutes." />
-                <param argument="--rt_p_match" type="float" value="1.2" label="Time window for the matched peak"
-                    help="Specify the time windows for the matched peak in minutes." />
+                <param argument="--rt_peak_win" type="float" value="1" label="retention time window for apex detection"
+                    help="Specify the retention time window for the peak in minutes." />
+                <param argument="--rt_peak_win_match" type="float" value="1.2" label="retention time window for the matched peak"
+					help="Specify the retention time window for the matched peak in minutes." />
+                <expand macro="filt_matched_peptide"/>
                 <param argument="--peptide_summary" type="boolean" value="true" label="Output the peptide summary?"/>
             </when>
-        </conditional> 
+     </conditional>
     </inputs>
     <outputs>
         <data format="tabular" name="output_table" label="${tool.name} on ${on_string}: quantification">
@@ -255,153 +313,10 @@
             <discover_datasets pattern="(?P&lt;designation&gt;.*)\.log" directory="out" format="txt"/>
         </collection>
         <data format="tabular" name="output_peptide_summary" label="${tool.name} on ${on_string}: peptide summary">
-            <filter>task['peptide_summary'] and (task['task_selector']=='all' or task['task_selector']=='moff')</filter>
+            <filter>(task['task_selector']=='all' or task['task_selector']=='moff') and task['peptide_summary'] </filter>
         </data>
     </outputs>
     <tests>
-        <!-- test moff_all -->
-        <test>
-            <param name="task|task_selector" value="all"/>
-            <param name="ident_input|input_type_selector" value="ps"/>
-            <param name="ident_input_file">
-                <collection type="list">
-                    <element name="mbr_test1" value="input/mbr_test1.tabular"/>
-                    <element name="mbr_test2" value="input/mbr_test2.tabular"/>
-                </collection>
-            </param>
-            <param name="msms_input|input_type_selector" value="mzml"/>
-            <param name="inputraw">
-                <collection type="list">
-                    <element name="mbr_test1" value="input/mbr_test1.mzml"/>
-                    <element name="mbr_test2" value="input/mbr_test2.mzml"/>
-                </collection>
-            </param>
-            <param name="peptide_summary" value="true"/>
-            <output name="output_peptide_summary" ftype="tabular">
-                <assert_contents>
-                    <has_text text="sumIntensity_mbr_test1"/>
-                    <has_text text="sumIntensity_mbr_test2"/>
-                </assert_contents>
-            </output>
-            <output_collection name="ident_output" type="list">
-                <element name="mbr_test1_match_moff_result">
-                    <assert_contents>
-                        <has_text text="NH2-QVEEAVQSDDK-COOH"/>
-                    </assert_contents>
-                </element>
-                <element name="mbr_test2_match_moff_result">
-                    <assert_contents>
-                        <has_text text="NH2-RDVGINNTVK-COOH"/>
-                    </assert_contents>
-                </element>
-            </output_collection>
-            <output_collection name="log_output" type="list">
-                <element name="mbr_test1_match__moff">
-                    <assert_contents>
-                        <has_line line="peptide at line 200 -->  MZ: 783.4200 RT: 134.6997 matched (yes=1/no=0): 0"/>
-                    </assert_contents>
-                </element>
-                <element name="mbr_test2_match__moff">
-                    <assert_contents>
-                        <has_line line="peptide at line 132 -->  MZ: 767.8700 RT: 98.1975 matched (yes=1/no=0): 0"/>
-                    </assert_contents>
-                </element>
-            </output_collection>
-        </test>
-        <!-- test moff alone -->
-        <test>
-            <param name="task|task_selector" value="moff"/>
-            <param name="ident_input|input_type_selector" value="ps"/>
-            <param name="ident_input_file" value="input/test.tabular" ftype="tabular"/>
-            <param name="msms_input|input_type_selector" value="mzml"/>
-            <param name="inputraw" value="input/test.mzml" ftype="mzml"/>
-            <param name="peptide_summary" value="true"/>
-            <output name="output_peptide_summary" ftype="tabular" file="output2/moff_test_pepsum.tab"/>
-            <output name="output_logs">
-                <assert_contents>
-                    <has_line line="peptide at line 294 -->  MZ: 677.3300 RT: 60.6078"/>
-                </assert_contents>
-            </output>
-        </test>
-        <!-- test the generic input -->
-        <test>
-            <param name="task|task_selector" value="moff"/>
-            <param name="ident_input|input_type_selector" value="generic"/>
-            <param name="ident_input_file" value="input/test.tabular" ftype="tabular"/>
-            <param name="remove_header" value="true"/>
-            <param name="msms_input|input_type_selector" value="mzml"/>
-            <param name="inputraw" value="input/test.mzml" ftype="mzml"/>
-            <param name="peptide" value="3"/>
-            <param name="prot" value="2"/>
-            <param name="mod_peptide" value="7"/>
-            <param name="rt" value="13"/>
-            <param name="mz" value="14"/>
-            <param name="mass" value="17"/>
-            <param name="charge" value="15"/>
-            <param name="peptide_summary" value="true"/>
-            <output name="output_peptide_summary" ftype="tabular" file="output2/moff_test_pepsum.tab"/>
-            <output name="output_logs">
-                <assert_contents>
-                    <has_line line="peptide at line 294 -->  MZ: 677.3300 RT: 60.6078"/>
-                </assert_contents>
-            </output>
-        </test>
-        <test>
-            <param name="task|task_selector" value="all"/>
-            <param name="ident_input|input_type_selector" value="generic"/>
-            <param name="ident_input_file">
-                <collection type="list">
-                    <element name="mbr_test1" value="input/mbr_test1.tabular"/>
-                    <element name="mbr_test2" value="input/mbr_test2.tabular"/>
-                </collection>
-            </param>
-            <param name="remove_header" value="true"/>
-            <param name="peptide" value="3"/>
-            <param name="prot" value="2"/>
-            <param name="mod_peptide" value="7"/>
-            <param name="rt" value="13"/>
-            <param name="mz" value="14"/>
-            <param name="mass" value="17"/>
-            <param name="charge" value="15"/>
-            <param name="msms_input|input_type_selector" value="mzml"/>
-            <param name="inputraw">
-                <collection type="list">
-                    <element name="mbr_test1" value="input/mbr_test1.mzml"/>
-                    <element name="mbr_test2" value="input/mbr_test2.mzml"/>
-                </collection>
-            </param>
-            <param name="peptide_summary" value="true"/>
-            <output name="output_peptide_summary" ftype="tabular">
-                <assert_contents>
-                    <has_text text="sumIntensity_mbr_test1"/>
-                    <has_text text="sumIntensity_mbr_test2"/>
-                </assert_contents>
-            </output>
-            <output_collection name="ident_output" type="list">
-                <element name="mbr_test1_match_moff_result">
-                    <assert_contents>
-                        <has_text text="NH2-QVEEAVQSDDK-COOH"/>
-                    </assert_contents>
-                </element>
-                <element name="mbr_test2_match_moff_result">
-                    <assert_contents>
-                        <has_text text="NH2-RDVGINNTVK-COOH"/>
-                    </assert_contents>
-                </element>
-            </output_collection>
-            <output_collection name="log_output" type="list">
-                <element name="mbr_test1_match__moff">
-                    <assert_contents>
-                        <has_line line="peptide at line 200 -->  MZ: 783.4200 RT: 134.6997 matched (yes=1/no=0): 0"/>
-                    </assert_contents>
-                </element>
-                <element name="mbr_test2_match__moff">
-                    <assert_contents>
-                        <has_line line="peptide at line 132 -->  MZ: 767.8700 RT: 98.1975 matched (yes=1/no=0): 0"/>
-                    </assert_contents>
-                </element>
-            </output_collection>
-        </test>
         <!-- test mbr -->
         <test>
             <param name="task|task_selector" value="mbr"/>
@@ -417,10 +332,6 @@
                 <element name="mbr_test1_match">
                     <assert_contents>
                         <has_text text="NH2-QVEEAVQSDDK-COOH"/>
-                    </assert_contents>
-                </element>
-                <element name="mbr_test2_match">
-                    <assert_contents>
                         <has_text text="NH2-RDVGINNTVK-COOH"/>
                     </assert_contents>
                 </element>
@@ -429,6 +340,7 @@
     </tests>
     <help>
     <![CDATA[
+
 **Description**
 
 moFF (a Modest Feature Finder) is an OS independent tool designed to extract
@@ -441,28 +353,24 @@
 
 *Modules:*
 
-1. Apex Intensity: this is used for a single pair of files, one identification and one spectrum file. 
+1. Apex Intensity: this is used for a single pair of files, one identification and one spectrum file.
 2. Match between runs (MBR): for multiple identification files, share MS2 identified peptides between runs and predict the retention time.
 3. All (match between runs followed by apex intensity): this is used for more than one pair of identification and spectrum files.
 
 If both match between runs and apex intensity are desired, it is best to run them both at once (i.e., run the 'All' module).
 The MBR module is mainly useful for observing the intermediate steps of the algorithm - its outputs are not able to be used as inputs in moFF or in other tools.
-
-If quantification of multiple files without MBR is desired, the apex intensity module may be run with multiple files or a dataset collection in batch mode. 
-In either case, moFF must be given the paired files at the same time - thus the best method is to construct a dataset collection in which the raw and identification files are in the same order. 
-
+If quantification of multiple files without MBR is desired, the apex intensity module may be run with multiple files or a dataset collection in batch mode.
+In either case, moFF must be given the paired files at the same time - thus the best method is to construct a dataset collection in which the raw and identification files are in the same order.
 
 *Inputs:*
 
 - Identification file: this can either be a generic tabular file or the standard PSM report from PeptideShaker.
   If it is a generic tabular file, please select the columns corresponding to the required information.
-
 - MS/MS file: this can either be a Thermo raw file or an mzML file.
 
 A given pair of files must have the *exact* same display name, not including the extension;
 e.g. ``example1.tabular`` and ``example1.mzml``.
 If the display names are different, simply change them in the history menu.
-
 For multiple files (the MBR or All modules), the identification and spectrum files must be provided as dataset collections.
 This allows for usage of the output dataset collections in workflows.
 
@@ -473,20 +381,16 @@
 For correct rt windows, we suggest you set the ``rt_p`` value equal to or slighly greater than the
 dynamic exclusion duration set in your machine. We suggest also to set the
 ``rt_p_match`` always slightly bigger than tha values used for ``rt_p``.
-
 *Outputs:*
-
 When used in the single file mode ("Apex intensity" module), the outputs are 2 (or 3) files: a log file, a quantitation file,
 and (optionally) a peptide summary, with intensities aggregated across peptides. When used in the multiple file mode ("All"),
 the outputs are a dataset collection of log files (one per identification file), a dataset collection of quantification files, and (optionally) a peptide summary.
-
 If used with a generic tabular format, the only columns in the output file are the 7 columns selected while using moFF plus the columns that moFF adds. Other columns are discarded.
 
 **More Information**
 
-See the moFF Github site at https://github.com/compomics/moFF, 
-and the publication at https://dx.doi.org/10.1038/nmeth.4075 
-
+See the moFF Github site at https://github.com/compomics/moFF,
+and the publication at https://dx.doi.org/10.1038/nmeth.4075
     ]]>
     </help>
     <citations>
author	galaxyp
date	Thu, 28 Mar 2019 05:25:24 -0400
parents	226287d75d96
children	a96af68dafb2