Mercurial > repos > galaxyp > proteomics_moff
diff moff.xml @ 0:b4098353ee73 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/moFF commit bc0fad49e3ba73fa5b5b326e940adf9e11854d94
author | galaxyp |
---|---|
date | Fri, 05 Jan 2018 12:47:36 -0500 |
parents | |
children | 8f0e76ad46ef |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moff.xml Fri Jan 05 12:47:36 2018 -0500 @@ -0,0 +1,407 @@ +<tool id="proteomics_moff" name="moFF" version="@VERSION@"> + <description>extracts MS1 intensities from spectrum files</description> + <macros> + <token name="@VERSION@">1.2</token> + <!-- xml macros, used for shared Galaxy parameter inputs --> + <xml name="ident_input_macro" token_allow_multiple="true" token_input_type="data"> + <!-- this is exactly the same across all three, except for allowing multiple in MBR and all but not in moff --> + <conditional name="ident_input"> + <param name="input_type_selector" type="select" label="Choose the format for the identification file:"> + <option value="ps">Peptide Shaker PSM report (standard, not extended)</option> + <option value="generic">Another tabular identification file</option> + </param> + <when value="ps"> + <param name="ident_input_file" type="@INPUT_TYPE@" format="tabular" label="Peptide Shaker PSM report" multiple="@ALLOW_MULTIPLE@"/> + </when> + <when value="generic"> + <param name="ident_input_file" type="@INPUT_TYPE@" format="tabular" label="A general tabular format" multiple="@ALLOW_MULTIPLE@" + help="Must have specific columns; see below to select these columns from your file. The file should have at most one header line. "/> + <param name="remove_header" type="boolean" value="false" label="Remove the header line?" help="This is necessary if the file has a line with column headers"/> + <param name="peptide" + type="data_column" + data_ref="ident_input_file" + label="Column with peptide-spectrum-match sequence"/> + <param name="prot" + type="data_column" + data_ref="ident_input_file" + label="Column with protein ID"/> + <param name="mod_peptide" + type="data_column" + data_ref="ident_input_file" + label="Column with peptide-spectrum-match sequence that contains possible modifications"/> + <param name="rt" + type="data_column" + data_ref="ident_input_file" + label="Column with PSM retention time (in second)"/> + <param name="mz" + type="data_column" + data_ref="ident_input_file" + label="Column with m/z (mass over charge)"/> + <param name="mass" + type="data_column" + data_ref="ident_input_file" + label="Column with mass of the peptide"/> + <param name="charge" + type="data_column" + data_ref="ident_input_file" + label="Column with charge of ionized peptide"/> + </when> + </conditional> + </xml> + <xml name="raw_input_macro" token_allow_multiple="true" token_input_type="data"> + <conditional name="msms_input"> + <param name="input_type_selector" type="select" label="Choose the format for the MS/MS file"> + <option value="raw">Thermo RAW file</option> + <option value="mzml">mzML</option> + </param> + <when value="raw"> + <param argument="--inputraw" type="@INPUT_TYPE@" multiple="@ALLOW_MULTIPLE@" format="raw" label="RAW file(s)"/> + </when> + <when value="mzml"> + <param argument="--inputraw" type="@INPUT_TYPE@" multiple="@ALLOW_MULTIPLE@" format="mzml" label="mzML file(s)"/> + </when> + </conditional> + </xml> + <!-- tokens (code snippets used in <command>) --> + <token name="@IDENT_INPUT_ARG_MULTIPLE@"><![CDATA[ + ## this is where the ident input gets passed to moff/moff_all/moff_mbr + --inputtsv + #for $key in $task.ident_input.ident_input_file.keys(): + './ident_inputs/${task.ident_input.ident_input_file[$key].display_name}' + #end for + ]]></token> + <token name="@IDENT_INPUT_ARG_SINGLE@"><![CDATA[ + ## this is where the ident input gets passed to moff/moff_all/moff_mbr + --inputtsv './ident_inputs/${task.ident_input.ident_input_file.display_name}' + ]]></token> + <token name="@WRANGLE_IDENT_INPUT_SINGLE@"><![CDATA[ + mkdir ./ident_inputs && + #if $task.ident_input.input_type_selector == "ps": + ln -s '$task.ident_input.ident_input_file' './ident_inputs/$task.ident_input.ident_input_file.display_name' && + #else + ## optionally remove first line + #if $task.ident_input.remove_header: + sed -i '1d' '$task.ident_input.ident_input_file' && + #end if + ## header row with correct names: "peptide", "prot", "mod_peptide", "rt", "mz", "mass", and "charge" + echo -e "peptide\tprot\tmod_peptide\trt\tmz\tmass\tcharge" > tempfile.tab && + awk 'BEGIN{OFS="\t"; FS="\t"}{print \$pep,\$prot,\$mod,\$rt,\$mz,\$mass,\$charge}' pep="${task.ident_input.peptide}" prot="$task.ident_input.prot" mod="$task.ident_input.mod_peptide" rt="$task.ident_input.rt" mz="$task.ident_input.mz" mass="$task.ident_input.mass" charge="$task.ident_input.charge" '$task.ident_input.ident_input_file' >> tempfile.tab && + mv tempfile.tab '$task.ident_input.ident_input_file' && + ln -s '$task.ident_input.ident_input_file' './ident_inputs/$task.ident_input.ident_input_file.display_name' && + #end if + ]]></token> + <token name="@WRANGLE_IDENT_INPUT_MULTIPLE@"><![CDATA[ + mkdir ./ident_inputs && + #if $task.ident_input.input_type_selector == "ps": + #for $key in $task.ident_input.ident_input_file.keys(): + ln -s '${task.ident_input.ident_input_file[$key]}' './ident_inputs/${task.ident_input.ident_input_file[$key].display_name}' && + #end for + #else + #for $key in $task.ident_input.ident_input_file.keys(): + ## optionally remove first line + #if $task.ident_input.remove_header: + sed -i '1d' '$task.ident_input.ident_input_file[$key]' && + #end if + ## header row with correct names: "peptide", "prot", "mod_peptide", "rt", "mz", "mass", and "charge" + echo -e "peptide\tprot\tmod_peptide\trt\tmz\tmass\tcharge" > tempfile.tab && + awk 'BEGIN{OFS="\t"; FS="\t"}{print \$pep,\$prot,\$mod,\$rt,\$mz,\$mass,\$charge}' pep="${task.ident_input.peptide}" prot="$task.ident_input.prot" mod="$task.ident_input.mod_peptide" rt="$task.ident_input.rt" mz="$task.ident_input.mz" mass="$task.ident_input.mass" charge="$task.ident_input.charge" '$filename' >> tempfile.tab && + mv tempfile.tab '$task.ident_input.ident_input_file[$key]' && + ln -s '$task.ident_input.ident_input_file[$key]' './ident_inputs/$task.ident_input.ident_input_file[$key].display_name' && + #end for + #end if + ]]></token> + <token name="@RAW_INPUT_ARG_SINGLE@"><![CDATA[ + --inputraw './raws/$task.msms_input.inputraw.display_name' + ]]></token> + <token name="@RAW_INPUT_ARG_MULTIPLE@"><![CDATA[ + --inputraw + #for $key in $task.msms_input.inputraw.keys(): + './raws/$task.msms_input.inputraw[$key].display_name' + #end for + ]]></token> + <token name="@WRANGLE_RAW_INPUT_SINGLE@"><![CDATA[ + mkdir ./raws && + ## for files, need to softlink the display name to the history item + ln -s '$task.msms_input.inputraw' './raws/$task.msms_input.inputraw.display_name' && + ]]></token> + <token name="@WRANGLE_RAW_INPUT_MULTIPLE@"><![CDATA[ + mkdir ./raws && + ## for files, need to softlink the display name to the history item + #for $key in $task.msms_input.inputraw.keys(): + ln -s '$task.msms_input.inputraw[$key]' './raws/$task.msms_input.inputraw[$key].display_name' && + #end for + ]]></token> + </macros> + <requirements> + <requirement type="package" version="@VERSION@">moff</requirement> + </requirements> + <command detect_errors="aggressive"><![CDATA[ + mkdir ./out && + #if $task.task_selector == "moff": + @WRANGLE_IDENT_INPUT_SINGLE@ + @WRANGLE_RAW_INPUT_SINGLE@ + moff.py + @IDENT_INPUT_ARG_SINGLE@ + @RAW_INPUT_ARG_SINGLE@ + --tol $task.tol + --rt_w $task.rt_w + --rt_p $task.rt_p + --output_folder ./out + #if ($task.peptide_summary): + --peptide_summary 1 + #end if + && + #if $task.peptide_summary: + mv ./out/peptide_summary_intensity_moFF_run.tab '$output_peptide_summary' && + #end if + mv ./out/*moff_result.txt '$output_table' + && + mv ./out/*.log '$output_logs' + #else if $task.task_selector == "mbr": + @WRANGLE_IDENT_INPUT_MULTIPLE@ + moff_mbr.py + --inputF ./ident_inputs + --ext $task.ext + && + mv ./ident_inputs/mbr_output/* ./out + #else: + ## moff_all (mbr followed by apex) + @WRANGLE_IDENT_INPUT_MULTIPLE@ + @WRANGLE_RAW_INPUT_MULTIPLE@ + moff_all.py + @IDENT_INPUT_ARG_MULTIPLE@ + @RAW_INPUT_ARG_MULTIPLE@ + --tol $task.tol + --rt_w $task.rt_w + --rt_p $task.rt_p + --rt_p_match $task.rt_p_match + --output_folder ./out + --ext txt + #if $task.peptide_summary: + --peptide_summary 1 + #end if + && + #if $task.peptide_summary: + mv ./out/peptide_summary_intensity_moFF_run.tab '$output_peptide_summary' && + #end if + echo -ne + #end if + ]]></command> + <inputs> + <conditional name="task"> + <param name="task_selector" type="select" label="Choose which module to run"> + <option value="moff" selected="true">Apex intensity</option> + <option value="mbr">Match between runs</option> + <option value="all">All (match-between-runs followed by quantitation)</option> + </param> + <when value = "moff"> + <expand macro="ident_input_macro" allow_multiple="false"/> + <expand macro="raw_input_macro" allow_multiple="false"/> + <param argument="--tol" type="float" value="10" label="Tolerance parameter" + help="Specify the tolerance parameter in ppm." /> + <param argument="--rt_w" type="float" value="3.0" label="Retention time window" + help="Specify rt window for xic in minutes." /> + <param argument="--rt_p" type="float" value="1" label="Time window for the peak" + help="Specify the time windows for the peak in minutes." /> + <param argument="--rt_p_match" type="float" value="1.5" label="Time window for the matched peak" + help="Specify the time windows for the matched peak in minutes." /> + <param argument="--peptide_summary" type="boolean" value="true" label="Output the peptide summary?"/> + </when> + <when value="mbr"> + <expand macro="ident_input_macro" allow_multiple="false" input_type="data_collection"/> + <param argument="--ext" type="text" value="tab" label="Provide the extension used in the display file name (without the period)"/> + </when> + <when value="all"> + <expand macro="ident_input_macro" allow_multiple="false" input_type="data_collection"/> + <expand macro="raw_input_macro" allow_multiple="false" input_type="data_collection"/> + <param argument="--tol" type="float" value="10" label="Tolerance parameter" + help="Specify the tolerance parameter in ppm." /> + <param argument="--rt_w" type="float" value="3.0" label="Retention time window" + help="Specify rt window for xic in minutes." /> + <param argument="--rt_p" type="float" value="1" label="Time window for the peak" + help="Specify the time windows for the peak in minutes." /> + <param argument="--rt_p_match" type="float" value="1.2" label="Time window for the matched peak" + help="Specify the time windows for the matched peak in minutes." /> + <param argument="--peptide_summary" type="boolean" value="true" label="Output the peptide summary?"/> + </when> + </conditional> + </inputs> + <outputs> + <data format="tabular" name="output_table" label="${tool.name} quantification: ${on_string}"> + <filter>task['task_selector']=='moff'</filter> + </data> + <data format="txt" name="output_logs" label="${tool.name} log: ${on_string}"> + <filter>task['task_selector']=='moff'</filter> + </data> + <collection name="ident_output" type="list" label="${tool.name} quantification: ${on_string}"> + <filter>task['task_selector']=='all' or task['task_selector']=='mbr'</filter> + <!--discover datasets method --> + <discover_datasets pattern="(?P<designation>.*)\.txt" directory="out" format="tabular"/> + </collection> + <collection name="log_output" type="list" label="${tool.name} logs: ${on_string}"> + <filter>task['task_selector']=='all' or task['task_selector']=='mbr'</filter> + <discover_datasets pattern="(?P<designation>.*)\.log" directory="out" format="txt"/> + </collection> + <data format="tabular" name="output_peptide_summary" label="${tool.name} peptide summary: ${on_string}"> + <filter>task['peptide_summary']</filter> + </data> + </outputs> + <tests> + <!-- test moff_all --> + <test> + <param name="task_selector" value="all"/> + <param name="input_type_selector" value="ps"/> + <param name="ident_input_file"> + <collection type="list"> + <element name="mbr_test1" value="input/mbr_test1.tabular"/> + <element name="mbr_test2" value="input/mbr_test2.tabular"/> + </collection> + </param> + <param name="inputraw"> + <collection type="list"> + <element name="mbr_test1" value="input/mbr_test1.mzml"/> + <element name="mbr_test2" value="input/mbr_test2.mzml"/> + </collection> + </param> + <param name="peptide_summary" value="true"/> + <output name="output_peptide_summary" ftype="tabular"> + <assert_contents> + <has_text text="sumIntensity_mbr_test1"/> + <has_text text="sumIntensity_mbr_test2"/> + </assert_contents> + </output> + <output_collection name="ident_output" type="list"> + <element name="mbr_test1_match_moff_result" value="output1/mbr_test1_match_moff_result.txt"/> + <element name="mbr_test2_match_moff_result" value="output1/mbr_test2_match_moff_result.txt"/> + </output_collection> + <output_collection name="log_output" type="list"> + <element name="mbr_test1_match__moff"> + <assert_contents> + <has_line line="peptide at line 200 --> MZ: 783.4200 RT: 134.6997 matched (yes=1/no=0): 0"/> + </assert_contents> + </element> + <element name="mbr_test2_match__moff"> + <assert_contents> + <has_line line="peptide at line 132 --> MZ: 767.8700 RT: 98.1975 matched (yes=1/no=0): 0"/> + </assert_contents> + </element> + </output_collection> + </test> + <!-- test moff alone --> + <test> + <param name="task_selector" value="moff"/> + <param name="input_type_selector" value="ps"/> + <param name="ident_input_file" value="input/test.tabular" ftype="tabular"/> + <param name="msms_input" value="mzml"/> + <param name="inputraw" value="input/test.mzml" ftype="mzml"/> + <param name="peptide_summary" value="true"/> + <output name="output_peptide_summary" ftype="tabular" file="output2/moff_test_pepsum.tab"/> + <output name="output_logs"> + <assert_contents> + <has_line line="peptide at line 294 --> MZ: 677.3300 RT: 60.6078"/> + </assert_contents> + </output> + </test> + <!-- test the generic input --> + <test> + <param name="task_selector" value="moff"/> + <param name="input_type_selector" value="generic"/> + <param name="ident_input_file" value="input/test.tabular" ftype="tabular"/> + <param name="remove_header" value="true"/> + <param name="msms_input" value="mzml"/> + <param name="inputraw" value="input/test.mzml" ftype="mzml"/> + <param name="peptide" value="3"/> + <param name="prot" value="2"/> + <param name="mod_peptide" value="7"/> + <param name="rt" value="13"/> + <param name="mz" value="14"/> + <param name="mass" value="17"/> + <param name="charge" value="15"/> + <param name="peptide_summary" value="true"/> + <output name="output_peptide_summary" ftype="tabular" file="output2/moff_test_pepsum.tab"/> + <output name="output_logs"> + <assert_contents> + <has_line line="peptide at line 294 --> MZ: 677.3300 RT: 60.6078"/> + </assert_contents> + </output> + </test> + <!-- test mbr --> + <test> + <param name="task_selector" value="mbr"/> + <param name="input_type_selector" value="ps"/> + <param name="ident_input_file"> + <collection type="list"> + <element name="mbr_test1" value="input/mbr_test1.tabular"/> + <element name="mbr_test2" value="input/mbr_test2.tabular"/> + </collection> + </param> + <param name="ext" value="tabular"/> + <output_collection name="ident_output" type="list" count="2"> + <element name="mbr_test1_match" file="input/mbr_output/mbr_test1_match.txt"/> + <element name="mbr_test2_match" file="input/mbr_output/mbr_test2_match.txt"/> + </output_collection> + </test> + </tests> + <help> + <![CDATA[ +**Description** + +moFF (a Modest Feature Finder) is an OS independent tool designed to extract +apex MS1 intensity using a set of identified MS2 peptides. +It currently uses a Go library to directly extract data from Thermo Raw spectrum files, +eliminating the need for conversions from other formats. +Moreover, moFF also allows one to work directly with mzML files. + +**Usage** + +*Modules:* + +1. Apex Intensity: this is used for a single pair of files, one identification and one spectrum file. +2. Match between runs (MBR): for multiple identification files, share MS2 identified peptides between runs and predict the retention time. +3. All (match between runs followed by apex intensity): this is used for more than one pair of identification and spectrum files. + +If both match between runs and apex intensity are desired, it is best to run them both at once (i.e., run the 'All' module). +The MBR module is mainly useful for observing the intermediate steps of the algorithm - its outputs are not able to be used as inputs in moFF or in other tools. + + +*Inputs:* + +- Identification file: this can either be a generic tabular file or the standard PSM report from PeptideShaker. + If it is a generic tabular file, please select the columns corresponding to the required information. + +- MS/MS file: this can either be a Thermo raw file or an mzML file. + +A given pair of files must have the *exact* same display name, not including the extension; +e.g. ``example1.tabular`` and ``example1.mzml``. +If the display names are different, simply change them in the history menu. + +For multiple files (the MBR or All modules), the identification and spectrum files must be provided as dataset collections. +This allows for usage of the output dataset collections in workflows. + +*Parameters:* + +All the parameters related to the the time windows (``rt_w``, ``rt_p``, ``rt_p_match``) are basically the +half of the entire time windows where the apex peak is searched or the XIC is retrieved. +For correct rt windows, we suggest you set the ``rt_p`` value equal to or slighly greater than the +dynamic exclusion duration set in your machine. We suggest also to set the +``rt_p_match`` always slightly bigger than tha values used for ``rt_p``. + +*Outputs:* + +When used in the single file mode ("Apex intensity" module), the outputs are 2 (or 3) files: a log file, a quantitation file, +and (optionally) a peptide summary, with intensities aggregated across peptides. When used in the multiple file mode ("All"), +the outputs are a dataset collection of log files (one per identification file), a dataset collection of quantification files, and (optionally) a peptide summary. + +If used with a generic tabular format, the only columns in the output file are the 7 columns selected while using moFF plus the columns that moFF adds. Other columns are discarded. + +**More Information** + +See the moFF Github site at https://github.com/compomics/moFF, +and the publication at https://dx.doi.org/10.1038/nmeth.4075 + + ]]> + </help> + <citations> + <citation type="doi">10.1038/nmeth.4075</citation> + </citations> +</tool>