Mercurial > repos > lecorguille > xcms_group
view abims_xcms_group.xml @ 13:13558e8a4778 draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 4897a06ef248e2e74e57a496dd68adbda3c828f1
author | lecorguille |
---|---|
date | Wed, 29 Nov 2017 09:46:03 -0500 |
parents | 4c8507667cd6 |
children | 833d2c821d9c |
line wrap: on
line source
<tool id="abims_xcms_group" name="xcms.group" version="2.1.1"> <description>Group peaks together across samples using overlapping m/z bins and calculation of smoothed peak distributions in chromatographic time.</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <expand macro="stdio"/> <command><![CDATA[ @COMMAND_XCMS_SCRIPT@ xfunction group image '$image' xsetRdataOutput '$xsetRData' rplotspdf '$rplotsPdf' method $methods.method #if $methods.method == "density": ## minsamp $methods.minsamp minfrac $methods.minfrac bw $methods.bw mzwid $methods.mzwid sleep 0.001 #if $methods.density_options.option == "show": max $methods.density_options.max #end if #elif $methods.method == "mzClust": mzppm $methods.mzppm mzabs $methods.mzabs minfrac $methods.minfrac ## minsamp $methods.minsamp #else: mzVsRTbalance $methods.mzVsRTbalance mzCheck $methods.mzCheck rtCheck $methods.rtCheck kNN $methods.kNN #end if @COMMAND_PEAKLIST@ @COMMAND_LOG_EXIT@ ]]></command> <inputs> <param name="image" type="data" format="rdata.xcms.raw,rdata.xcms.group,rdata.xcms.retcor,rdata" label="xset RData file" help="output file from another function xcms (xcmsSet, retcor etc.)" /> <conditional name="methods"> <param name="method" type="select" label="Method to use for grouping" help="[method] See the help section below"> <option value="density" selected="true">density</option> <option value="mzClust" >mzClust</option> <option value="nearest" >nearest</option> </param> <when value="density"> <param name="bw" type="float" value="30" label="Bandwidth" help="[bw] bandwidth (standard deviation or half width at half maximum) of gaussian smoothing kernel to apply to the peak density chromatogram" /> <param name="minfrac" type="float" value="0.5" label="Minimum fraction of samples necessary" help="[minfrac] in at least one of the sample groups for it to be a valid group" /> <param name="mzwid" type="float" value="0.25" label="Width of overlapping m/z slices" help="[mzwid] to use for creating peak density chromatograms and grouping peaks across samples " /> <!-- <param name="minsamp" type="hidden" value="1" label="minsamp" help="minimum number of samples necessary in at least one of the sample groups for it to be a valid group " /> --> <conditional name="density_options"> <param name="option" type="select" label="Advanced options"> <option value="show">show</option> <option value="hide" selected="true">hide</option> </param> <when value="show"> <param name="max" type="integer" value="50" label="Maximum number of groups to identify in a single m/z slice" help="[max]" /> </when> <when value="hide"> </when> </conditional> </when> <when value="mzClust"> <param name="mzppm" type="integer" value="20 " label="Relative error used for clustering/grouping in ppm" help="[mzppm]" /> <param name="mzabs" type="float" value="0" label="Absolute error used for clustering/grouping" help="[mzabs]" /> <param name="minfrac" type="float" value="0" label="Minimum fraction of each class in one bin" help="[minfrac] minimum fraction of samples necessary in at least one of the sample groups for it to be a valid group" /> <!-- <param name="minsamp" type="hidden" value="1" label="minsamp" help="minimum number of samples necessary in at least one of the sample groups for it to be a valid group " /> --> </when> <when value="nearest"> <param name="mzVsRTbalance" type="integer" value="10 " label="Multiplicator for mz value before calculating the (euclidean) distance between two peaks." help="[mzVsRTbalance]" /> <param name="mzCheck" type="float" value="0.2" label="Maximum tolerated distance for mz" help="[mzCheck]" /> <param name="rtCheck" type="integer" value="15" label="Maximum tolerated distance for RT" help="[rtCheck]" /> <param name="kNN" type="integer" value="10" label="Number of nearest Neighbours to check" help="[kNN]" /> </when> </conditional> <!-- <param name="sleepy" type="float" value="0.001" label="sleep" help="seconds to pause between plotting successive steps of the peak grouping algorithm. peaks are plotted as points showing relative intensity. identified groups are flanked by dotted vertical lines"> <validator type="in_range" message="Must be more than 0" min="0.001" max="inf"/> </param> --> <expand macro="input_peaklist"/> </inputs> <outputs> <data name="xsetRData" format="rdata.xcms.group" label="${image.name[:-6]}.group.RData"/> <data name="rplotsPdf" format="pdf" label="${image.name[:-6]}.group.Rplots.pdf"/> <expand macro="output_peaklist" function="group"/> <data name="log" format="txt" label="xset.log.txt" hidden="true" /> </outputs> <tests> <!--<test> <param name="image" value="xset.RData"/> <param name="methods|method" value="density"/> <param name="methods|bw" value="5"/> <param name="methods|minfrac" value="0.3"/> <param name="methods|mzwid" value="0.01"/> <param name="methods|density_options|option" value="show"/> <param name="methods|density_options|max" value="50"/> <output name="log"> <assert_contents> <has_text text="object with 4 samples" /> <has_text text="Time range: 0.7-1139.7 seconds (0-19 minutes)" /> <has_text text="Mass range: 50.0021-999.9863 m/z" /> <has_text text="Peaks: 59359 (about 14840 per sample)" /> <has_text text="Peak Groups: 48998" /> <has_text text="Sample classes: bio, blank" /> </assert_contents> </output> </test>--> <test> <param name="image" value="faahKO.xset.RData"/> <param name="methods|method" value="density"/> <param name="methods|bw" value="5"/> <param name="methods|minfrac" value="0.3"/> <param name="methods|mzwid" value="0.01"/> <param name="methods|density_options|option" value="show"/> <param name="methods|density_options|max" value="50"/> <conditional name="peaklist"> <param name="convertRTMinute" value="false" /> <param name="peaklistBool" value="true" /> <param name="numDigitsMZ" value="4" /> <param name="numDigitsRT" value="1" /> </conditional> <output name="log"> <assert_contents> <has_text text="object with 4 samples" /> <has_text text="Time range: 2506.1-4477.9 seconds (41.8-74.6 minutes)" /> <has_text text="Mass range: 200.1-600 m/z" /> <has_text text="Peaks: 9251 (about 2313 per sample)" /> <has_text text="Peak Groups: 8275" /> <has_text text="Sample classes: KO, WT" /> </assert_contents> </output> <output name="variableMetadata" file="faahKO.xset.group.variableMetadata.tsv" /> <output name="dataMatrix" file="faahKO.xset.group.dataMatrix.tsv" /> </test> <test> <param name="image" value="faahKO-single-class.xset.merged.RData"/> <param name="methods|method" value="density"/> <param name="methods|bw" value="5"/> <param name="methods|minfrac" value="0.3"/> <param name="methods|mzwid" value="0.01"/> <param name="methods|density_options|option" value="show"/> <param name="methods|density_options|max" value="50"/> <conditional name="peaklist"> <param name="convertRTMinute" value="false" /> <param name="peaklistBool" value="true" /> <param name="numDigitsMZ" value="4" /> <param name="numDigitsRT" value="1" /> </conditional> <output name="log"> <assert_contents> <has_text text="object with 4 samples" /> <has_text text="Time range: 2506.1-4477.9 seconds (41.8-74.6 minutes)" /> <has_text text="Mass range: 200.1-600 m/z" /> <has_text text="Peaks: 9251 (about 2313 per sample)" /> <has_text text="Peak Groups: 8275" /> <has_text text="Sample classes: KO, WT" /> </assert_contents> </output> <output name="variableMetadata" file="faahKO.xset.group.variableMetadata.tsv" /> <output name="dataMatrix" file="faahKO.xset.group.dataMatrix.tsv" /> </test> <test> <param name="image" value="faahKO-single.xset.merged.RData"/> <param name="methods|method" value="density"/> <param name="methods|bw" value="5"/> <param name="methods|minfrac" value="0.3"/> <param name="methods|mzwid" value="0.01"/> <param name="methods|density_options|option" value="show"/> <param name="methods|density_options|max" value="50"/> <output name="log"> <assert_contents> <has_text text="object with 4 samples" /> <has_text text="Time range: 2506.1-4477.9 seconds (41.8-74.6 minutes)" /> <has_text text="Mass range: 200.1-600 m/z" /> <has_text text="Peaks: 9251 (about 2313 per sample)" /> <has_text text="Peak Groups: 664" /> <has_text text="Sample classes: ." /> </assert_contents> </output> </test> <test> <param name="image" value="MM-single.xset.merged.RData"/> <param name="methods|method" value="density"/> <param name="methods|bw" value="5"/> <param name="methods|minfrac" value="0.3"/> <param name="methods|mzwid" value="0.01"/> <param name="methods|density_options|option" value="show"/> <param name="methods|density_options|max" value="50"/> <output name="log"> <assert_contents> <has_text text="object with 2 samples" /> <has_text text="Time range: 19.7-307.3 seconds (0.3-5.1 minutes)" /> <has_text text="Mass range: 117.0357-936.7059 m/z" /> <has_text text="Peaks: 236 (about 118 per sample)" /> <has_text text="Peak Groups: 236" /> <has_text text="Sample classes: ." /> </assert_contents> </output> </test> <!--<test> <param name="image" value="xset.group.retcor.RData"/> <param name="methods|method" value="density"/> <param name="methods|bw" value="5"/> <param name="methods|minfrac" value="0.3"/> <param name="methods|mzwid" value="0.01"/> <param name="methods|density_options|option" value="show"/> <param name="methods|density_options|max" value="50"/> <output name="log"> <assert_contents> <has_text text="object with 4 samples" /> <has_text text="Time range: 0.2-1140.1 seconds (0-19 minutes)" /> <has_text text="Mass range: 50.0021-999.9863 m/z" /> <has_text text="Peaks: 59359 (about 14840 per sample)" /> <has_text text="Peak Groups: 48958" /> <has_text text="Sample classes: bio, blank" /> </assert_contents> </output> </test>--> <test> <param name="image" value="faahKO.xset.group.retcor.RData"/> <param name="methods|method" value="density"/> <param name="methods|bw" value="5"/> <param name="methods|minfrac" value="0.3"/> <param name="methods|mzwid" value="0.01"/> <param name="methods|density_options|option" value="show"/> <param name="methods|density_options|max" value="50"/> <output name="log"> <assert_contents> <has_text text="object with 4 samples" /> <has_text text="Time range: 2507.7-4481.7 seconds (41.8-74.7 minutes)" /> <has_text text="Mass range: 200.1-600 m/z" /> <has_text text="Peaks: 9251 (about 2313 per sample)" /> <has_text text="Peak Groups: 8157" /> <has_text text="Sample classes: KO, WT" /> </assert_contents> </output> </test> <test> <param name="image" value="faahKO-single.xset.merged.group.retcor.RData"/> <param name="methods|method" value="density"/> <param name="methods|bw" value="5"/> <param name="methods|minfrac" value="0.3"/> <param name="methods|mzwid" value="0.01"/> <param name="methods|density_options|option" value="show"/> <param name="methods|density_options|max" value="50"/> <output name="log"> <assert_contents> <has_text text="object with 4 samples" /> <has_text text="Time range: 2507.7-4481.7 seconds (41.8-74.7 minutes)" /> <has_text text="Mass range: 200.1-600 m/z" /> <has_text text="Peaks: 9251 (about 2313 per sample)" /> <has_text text="Peak Groups: 8157" /> <has_text text="Sample classes: KO, WT" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ @HELP_AUTHORS@ ========== Xcms.Group ========== ----------- Description ----------- After peak identification with xcmsSet, this tool groups the peaks which represent the same analyte across samples using overlapping m/z bins and calculation of smoothed peak distributions in chromatographic time. Allows rejection of features, which are only partially detected within the replicates of a sample class. ----------------- Workflow position ----------------- **Upstream tools** ========================= ================= =================== ========== Name output file format parameter ========================= ================= =================== ========== xcms.xcmsSet xset.RData rdata.xcms.raw RData file ------------------------- ----------------- ------------------- ---------- xcms.xcmsSet Merger xset.RData rdata.xcms.raw RData file ------------------------- ----------------- ------------------- ---------- xcms.retcor xset.RData rdata.xcms.retcor RData file ========================= ================= =================== ========== **Downstream tools** +---------------------------+--------------------------------------+ | Name | Output file | Format | +===========================+=================+====================+ |xcms.retcor | xset.RData | rdata.xcms.group | +---------------------------+--------------------------------------+ |xcms.fillPeaks | xset.RData | rdata.xcms.group | +---------------------------+--------------------------------------+ The output file is an xcmsSet.RData file. You can continue your analysis using it in **xcms.retcor** tool as an next step and then **xcms.fillPeaks**. **General schema of the metabolomic workflow** .. image:: xcms_group_workflow.png ----------- Input files ----------- +---------------------------+-----------------------+ | Parameter : num + label | Format | +===========================+=======================+ | Or : RData file | rdata.xcms.raw | +---------------------------+-----------------------+ | Or : RData file | rdata.xcms.retcor | +---------------------------+-----------------------+ ---------- Parameters ---------- Method to use for grouping -------------------------- **mzClust** | Runs high resolution alignment on single spectra samples stored in the RData file generated by the **xcmsSet tool**. **density** | Groups peaks together across samples using overlapping m/z bins and calculation of smoothed peak distributions in chromatographic time. **nearest** | Groups peaks together across samples by creating a master peak list and assigning corresponding peaks from all samples. It is inspired by the alignment algorithm of mzMine. ------------ Output files ------------ xset.group.Rplots.pdf xset.group.RData: rdata.xcms.group format | Rdata file that will be necessary in the third and fourth step of the workflow (xcms.retcor and xcms.fillpeaks). ------ .. class:: infomark The output file is an xset.group.RData file. You can continue your analysis using it in **xcms.retcor** tool. --------------------------------------------------- --------------- Working example --------------- Input files ----------- | RData file -> **xset.RData** Parameters ---------- | Method -> **density** | bw -> **5** | minfrac -> **0.3** | mzwid -> **0.01** | Advanced options: **show** | max -> **50** Output files ------------ | **1) xset.RData: RData file** | **2) Example of an xset.group.Rplots pdf file** .. image:: xcms_group.png :width: 700 --------------------------------------------------- Changelog/News -------------- **Version 2.1.1 - 29/11/2017** - BUGFIX: To avoid issues with accented letter in the parentFile tag of the mzXML files, we changed a hidden mechanim to LC_ALL=C **Version 2.1.0 - 07/02/2017** - IMPROVEMENT: Add an option to export the peak list at this step without have to wait camara.annotate - IMPROVEMENT: xcms.group can deal with merged individual data from "xcms.xcmsSet Merger" - BUGFIX: the default value of "density" -> "Maximum number of groups to identify in a single m/z slice" which was of 5 have been changed to fix with the XMCS default values to 50 **Version 2.0.6 - 06/07/2016** - UPGRADE: upgrate the xcms version from 1.44.0 to 1.46.0 **Version 2.0.5 - 04/04/2016** - TEST: refactoring to pass planemo test using conda dependencies **Version 2.0.4 - 10/02/2016** - BUGFIX: better management of errors. Datasets remained green although the process failed - UPDATE: refactoring of internal management of inputs/outputs - UPDATE: refactoring to feed the new report tool **Version 2.0.2 - 02/06/2015** - IMPROVEMENT: new datatype/dataset formats (rdata.xcms.raw, rdata.xcms.group, rdata.xcms.retcor ...) will facilitate the sequence of tools and so avoid incompatibility errors. - IMPROVEMENT: parameter labels have changed to facilitate their reading. ]]></help> <expand macro="citation" /> </tool>