view abims_xcms_group.xml @ 13:13558e8a4778 draft

planemo upload for repository commit 4897a06ef248e2e74e57a496dd68adbda3c828f1
author lecorguille
date Wed, 29 Nov 2017 09:46:03 -0500
parents 4c8507667cd6
children 833d2c821d9c
line wrap: on
line source

<tool id="abims_xcms_group" name="" version="2.1.1">

    <description>Group peaks together across samples using overlapping m/z bins and calculation of smoothed peak distributions in chromatographic time.</description>


    <expand macro="requirements"/>
    <expand macro="stdio"/>

        xfunction group
        image '$image'

        xsetRdataOutput '$xsetRData'
        rplotspdf '$rplotsPdf'

        method $methods.method
        #if $methods.method == "density":
            ## minsamp $methods.minsamp
            minfrac $methods.minfrac
            bw $
            mzwid $methods.mzwid
            sleep 0.001
        #if $methods.density_options.option == "show":
            max $methods.density_options.max
        #end if
        #elif $methods.method == "mzClust":
            mzppm $methods.mzppm
            mzabs $methods.mzabs
            minfrac $methods.minfrac
            ## minsamp $methods.minsamp
            mzVsRTbalance $methods.mzVsRTbalance
            mzCheck $methods.mzCheck
            rtCheck $methods.rtCheck
            kNN $methods.kNN
        #end if



        <param name="image" type="data" format="rdata.xcms.raw,,rdata.xcms.retcor,rdata" label="xset RData file" help="output file from another function xcms (xcmsSet, retcor etc.)" />
        <conditional name="methods">
            <param name="method" type="select" label="Method to use for grouping" help="[method] See the help section below">
                <option value="density" selected="true">density</option>
                <option value="mzClust" >mzClust</option>
                <option value="nearest" >nearest</option>
            <when value="density">
                <param name="bw" type="float" value="30" label="Bandwidth" help="[bw] bandwidth (standard deviation or half width at half maximum) of gaussian smoothing kernel to apply to the peak density chromatogram" />
                <param name="minfrac" type="float" value="0.5" label="Minimum fraction of samples necessary" help="[minfrac] in at least one of the sample groups for it to be a valid group" />
                <param name="mzwid" type="float" value="0.25" label="Width of overlapping m/z slices" help="[mzwid] to use for creating peak density chromatograms and grouping peaks across samples " />
                <param name="minsamp" type="hidden" value="1" label="minsamp" help="minimum number of samples necessary in at least one of the sample groups for it to be a valid group " />
            <conditional name="density_options">
            <param name="option" type="select" label="Advanced options">
                <option value="show">show</option>
                <option value="hide" selected="true">hide</option>
            <when value="show">
                <param name="max" type="integer" value="50" label="Maximum number of groups to identify in a single m/z slice" help="[max]" />
            <when value="hide">

            <when value="mzClust">
                <param name="mzppm" type="integer" value="20 " label="Relative error used for clustering/grouping in ppm" help="[mzppm]" />
                <param name="mzabs" type="float" value="0" label="Absolute error used for clustering/grouping" help="[mzabs]" />
                <param name="minfrac" type="float" value="0" label="Minimum fraction of each class in one bin" help="[minfrac] minimum fraction of samples necessary in at least one of the sample groups for it to be a valid group" />
                <param name="minsamp" type="hidden" value="1" label="minsamp" help="minimum number of samples necessary in at least one of the sample groups for it to be a valid group " />
            <when value="nearest">
                <param name="mzVsRTbalance" type="integer" value="10 " label="Multiplicator for mz value before calculating the (euclidean) distance between two peaks." help="[mzVsRTbalance]" />
                <param name="mzCheck" type="float" value="0.2" label="Maximum tolerated distance for mz" help="[mzCheck]" />
                <param name="rtCheck" type="integer" value="15" label="Maximum tolerated distance for RT" help="[rtCheck]" />
                <param name="kNN" type="integer" value="10" label="Number of nearest Neighbours to check" help="[kNN]" />
        <param name="sleepy" type="float" value="0.001" label="sleep" help="seconds to pause between plotting successive steps of the peak grouping algorithm. peaks are plotted as points showing relative intensity. identified groups are flanked by dotted vertical lines">
            <validator type="in_range" message="Must be more than 0" min="0.001" max="inf"/>

        <expand macro="input_peaklist"/>

        <data name="xsetRData" format="" label="${[:-6]}.group.RData"/>
        <data name="rplotsPdf" format="pdf" label="${[:-6]}.group.Rplots.pdf"/>
        <expand macro="output_peaklist" function="group"/>
        <data name="log" format="txt" label="xset.log.txt"  hidden="true" />

            <param name="image" value="xset.RData"/>
            <param name="methods|method" value="density"/>
            <param name="methods|bw" value="5"/>
            <param name="methods|minfrac" value="0.3"/>
            <param name="methods|mzwid" value="0.01"/>
            <param name="methods|density_options|option" value="show"/>
            <param name="methods|density_options|max" value="50"/>
            <output name="log">
                    <has_text text="object with 4 samples" />
                    <has_text text="Time range: 0.7-1139.7 seconds (0-19 minutes)" />
                    <has_text text="Mass range: 50.0021-999.9863 m/z" />
                    <has_text text="Peaks: 59359 (about 14840 per sample)" />
                    <has_text text="Peak Groups: 48998" />
                    <has_text text="Sample classes: bio, blank" />
            <param name="image" value="faahKO.xset.RData"/>
            <param name="methods|method" value="density"/>
            <param name="methods|bw" value="5"/>
            <param name="methods|minfrac" value="0.3"/>
            <param name="methods|mzwid" value="0.01"/>
            <param name="methods|density_options|option" value="show"/>
            <param name="methods|density_options|max" value="50"/>
            <conditional name="peaklist">
                <param name="convertRTMinute" value="false" />
                <param name="peaklistBool" value="true" />
                <param name="numDigitsMZ" value="4" />
                <param name="numDigitsRT" value="1" />
            <output name="log">
                    <has_text text="object with 4 samples" />
                    <has_text text="Time range: 2506.1-4477.9 seconds (41.8-74.6 minutes)" />
                    <has_text text="Mass range: 200.1-600 m/z" />
                    <has_text text="Peaks: 9251 (about 2313 per sample)" />
                    <has_text text="Peak Groups: 8275" />
                    <has_text text="Sample classes: KO, WT" />
            <output name="variableMetadata" file="" />
            <output name="dataMatrix" file="" />
            <param name="image" value="faahKO-single-class.xset.merged.RData"/>
            <param name="methods|method" value="density"/>
            <param name="methods|bw" value="5"/>
            <param name="methods|minfrac" value="0.3"/>
            <param name="methods|mzwid" value="0.01"/>
            <param name="methods|density_options|option" value="show"/>
            <param name="methods|density_options|max" value="50"/>
            <conditional name="peaklist">
                <param name="convertRTMinute" value="false" />
                <param name="peaklistBool" value="true" />
                <param name="numDigitsMZ" value="4" />
                <param name="numDigitsRT" value="1" />
            <output name="log">
                    <has_text text="object with 4 samples" />
                    <has_text text="Time range: 2506.1-4477.9 seconds (41.8-74.6 minutes)" />
                    <has_text text="Mass range: 200.1-600 m/z" />
                    <has_text text="Peaks: 9251 (about 2313 per sample)" />
                    <has_text text="Peak Groups: 8275" />
                    <has_text text="Sample classes: KO, WT" />
            <output name="variableMetadata" file="" />
            <output name="dataMatrix" file="" />
            <param name="image" value="faahKO-single.xset.merged.RData"/>
            <param name="methods|method" value="density"/>
            <param name="methods|bw" value="5"/>
            <param name="methods|minfrac" value="0.3"/>
            <param name="methods|mzwid" value="0.01"/>
            <param name="methods|density_options|option" value="show"/>
            <param name="methods|density_options|max" value="50"/>
            <output name="log">
                    <has_text text="object with 4 samples" />
                    <has_text text="Time range: 2506.1-4477.9 seconds (41.8-74.6 minutes)" />
                    <has_text text="Mass range: 200.1-600 m/z" />
                    <has_text text="Peaks: 9251 (about 2313 per sample)" />
                    <has_text text="Peak Groups: 664" />
                    <has_text text="Sample classes: ." />
            <param name="image" value="MM-single.xset.merged.RData"/>
            <param name="methods|method" value="density"/>
            <param name="methods|bw" value="5"/>
            <param name="methods|minfrac" value="0.3"/>
            <param name="methods|mzwid" value="0.01"/>
            <param name="methods|density_options|option" value="show"/>
            <param name="methods|density_options|max" value="50"/>
            <output name="log">
                    <has_text text="object with 2 samples" />
                    <has_text text="Time range: 19.7-307.3 seconds (0.3-5.1 minutes)" />
                    <has_text text="Mass range: 117.0357-936.7059 m/z" />
                    <has_text text="Peaks: 236 (about 118 per sample)" />
                    <has_text text="Peak Groups: 236" />
                    <has_text text="Sample classes: ." />
            <param name="image" value=""/>
            <param name="methods|method" value="density"/>
            <param name="methods|bw" value="5"/>
            <param name="methods|minfrac" value="0.3"/>
            <param name="methods|mzwid" value="0.01"/>
            <param name="methods|density_options|option" value="show"/>
            <param name="methods|density_options|max" value="50"/>
            <output name="log">
                    <has_text text="object with 4 samples" />
                    <has_text text="Time range: 0.2-1140.1 seconds (0-19 minutes)" />
                    <has_text text="Mass range: 50.0021-999.9863 m/z" />
                    <has_text text="Peaks: 59359 (about 14840 per sample)" />
                    <has_text text="Peak Groups: 48958" />
                    <has_text text="Sample classes: bio, blank" />
            <param name="image" value=""/>
            <param name="methods|method" value="density"/>
            <param name="methods|bw" value="5"/>
            <param name="methods|minfrac" value="0.3"/>
            <param name="methods|mzwid" value="0.01"/>
            <param name="methods|density_options|option" value="show"/>
            <param name="methods|density_options|max" value="50"/>
            <output name="log">
                    <has_text text="object with 4 samples" />
                    <has_text text="Time range: 2507.7-4481.7 seconds (41.8-74.7 minutes)" />
                    <has_text text="Mass range: 200.1-600 m/z" />
                    <has_text text="Peaks: 9251 (about 2313 per sample)" />
                    <has_text text="Peak Groups: 8157" />
                    <has_text text="Sample classes: KO, WT" />
            <param name="image" value=""/>
            <param name="methods|method" value="density"/>
            <param name="methods|bw" value="5"/>
            <param name="methods|minfrac" value="0.3"/>
            <param name="methods|mzwid" value="0.01"/>
            <param name="methods|density_options|option" value="show"/>
            <param name="methods|density_options|max" value="50"/>
            <output name="log">
                    <has_text text="object with 4 samples" />
                    <has_text text="Time range: 2507.7-4481.7 seconds (41.8-74.7 minutes)" />
                    <has_text text="Mass range: 200.1-600 m/z" />
                    <has_text text="Peaks: 9251 (about 2313 per sample)" />
                    <has_text text="Peak Groups: 8157" />
                    <has_text text="Sample classes: KO, WT" />





After peak identification with xcmsSet, this tool groups the peaks which represent the same analyte across samples using overlapping m/z bins and calculation of smoothed peak distributions in chromatographic time. Allows rejection of features, which are only partially detected within the replicates of a sample class.

Workflow position

**Upstream tools**

========================= ================= =================== ==========
Name                      output file       format              parameter
========================= ================= =================== ==========
xcms.xcmsSet              xset.RData        rdata.xcms.raw      RData file
------------------------- ----------------- ------------------- ----------
xcms.xcmsSet Merger       xset.RData        rdata.xcms.raw      RData file
------------------------- ----------------- ------------------- ----------
xcms.retcor               xset.RData        rdata.xcms.retcor   RData file
========================= ================= =================== ==========

**Downstream tools**

| Name                      | Output file     | Format             |
|xcms.retcor                | xset.RData      |   |
|xcms.fillPeaks             | xset.RData      |   |

The output file is an xcmsSet.RData file. You can continue your analysis using it in **xcms.retcor** tool as an next step and then **xcms.fillPeaks**.

**General schema of the metabolomic workflow**

.. image:: xcms_group_workflow.png

Input files

| Parameter : num + label   |   Format              |
| Or : RData file            |   rdata.xcms.raw     |
| Or : RData file            |   rdata.xcms.retcor  |


Method to use for grouping


    | Runs high resolution alignment on single spectra samples stored in the RData file generated by the **xcmsSet tool**.


    | Groups peaks together across samples using overlapping m/z bins and calculation of smoothed peak distributions in chromatographic time.


    | Groups peaks together across samples by creating a master peak list and assigning corresponding peaks from all samples. It is inspired by the alignment algorithm of mzMine.

Output files
------------ format

    | Rdata file that will be necessary in the third and fourth step of the workflow (xcms.retcor and xcms.fillpeaks).


.. class:: infomark

The output file is an file. You can continue your analysis using it in **xcms.retcor** tool.


Working example

Input files

    | RData file -> **xset.RData**


    | Method -> **density**
    | bw     -> **5**
    | minfrac -> **0.3**
    | mzwid    -> **0.01**
    | Advanced options: **show**
    | max -> **50**

Output files

    | **1) xset.RData: RData file**

    | **2) Example of an pdf file**

.. image:: xcms_group.png
        :width: 700



**Version 2.1.1 - 29/11/2017**

- BUGFIX: To avoid issues with accented letter in the parentFile tag of the mzXML files, we changed a hidden mechanim to LC_ALL=C

**Version 2.1.0 - 07/02/2017**

- IMPROVEMENT: Add an option to export the peak list at this step without have to wait camara.annotate

- IMPROVEMENT: can deal with merged individual data from "xcms.xcmsSet Merger"

- BUGFIX: the default value of "density" -> "Maximum number of groups to identify in a single m/z slice" which was of 5 have been changed to fix with the XMCS default values to 50

**Version 2.0.6 - 06/07/2016**

- UPGRADE: upgrate the xcms version from 1.44.0 to 1.46.0

**Version 2.0.5 - 04/04/2016**

- TEST: refactoring to pass planemo test using conda dependencies

**Version 2.0.4 - 10/02/2016**

- BUGFIX: better management of errors. Datasets remained green although the process failed

- UPDATE: refactoring of internal management of inputs/outputs

- UPDATE: refactoring to feed the new report tool

**Version 2.0.2 - 02/06/2015**

- IMPROVEMENT: new datatype/dataset formats (rdata.xcms.raw,, rdata.xcms.retcor ...) will facilitate the sequence of tools and so avoid incompatibility errors.

- IMPROVEMENT: parameter labels have changed to facilitate their reading.


    <expand macro="citation" />
