Mercurial > repos > lecorguille > xcms_xcmsset

<tool id="abims_xcms_xcmsSet" name="xcms.xcmsSet" version="2.0.7">

    <description>Filtration and Peak Identification using xcmsSet function from xcms R package to preprocess LC/MS data for relative quantification and statistical analysis </description>

    <requirements>
        <requirement type="package" version="3.1.2">R</requirement>
        <requirement type="binary">Rscript</requirement>
        <requirement type="package" version="1.44.0">xcms</requirement>
        <requirement type="package" version="2.2.0">xcms_w4m_script</requirement>
    </requirements>

    <stdio>
        <exit_code range="1:" level="fatal" />
    </stdio>

    <command><![CDATA[
        xcms.r
        #if $inputs.input == "lib":
            library $__app__.config.user_library_import_dir/$__user_email__/$inputs.library
        #elif $inputs.input == "zip_file":
            zipfile $inputs.zip_file
        #end if

        xfunction xcmsSet

        xsetRdataOutput $xsetRData
        sampleMetadataOutput $sampleMetadata
        ticspdf $ticsRawPdf
        bicspdf $bpcsRawPdf

        ## profmethod $profmethod
        nSlaves \${GALAXY_SLOTS:-1} method $methods.method
        #if $methods.method == "centWave":
            ppm $methods.ppm
            peakwidth "c($methods.peakwidth)"
        #if $methods.options_scanrange.option == "show":
                scanrange "c($methods.options_scanrange.scanrange)"
        #end if
        #if $methods.options_c.option == "show":
            mzdiff $methods.options_c.mzdiff
            snthresh $methods.options_c.snthresh
            integrate $methods.options_c.integrate
            noise $methods.options_c.noise
            prefilter "c($methods.options_c.prefilter)"
        #end if
        #elif $methods.method == "matchedFilter":
            step $methods.step
            fwhm $methods.fwhm
            #if $methods.options_m.option == "show":
                ## sigma "$methods.options_m.sigma"
                max $methods.options_m.max
                snthresh $methods.options_m.snthresh
                ## mzdiff $methods.options_m.mzdiff
                steps $methods.options_m.steps
                ## sleep $methods.options_m.sleep
            #end if
        #elif $methods.method == "MSW":
            snthr $methods.snthr
            nearbyPeak $methods.nearbyPeak
            winSize.noise $methods.winSize_noise
            amp.Th $methods.amp_Th
            scales "c($methods.scales)"
            SNR.method "$methods.SNR_method"
        #end if
        ;
        return=\$?;
        mv log.txt $log;
        cat $log;
        sh -c "exit \$return"
    ]]></command>

    <inputs>

        <conditional name="inputs">
            <param name="input" type="select" label="Choose your inputs method" >
                <option value="zip_file" selected="true">Zip file from your history containing your chromatograms</option>
                <option value="lib" >Library directory name</option>
            </param>
            <when value="zip_file">
                <param name="zip_file" type="data" format="no_unzip.zip,zip" label="Zip file" />
             </when>
            <when value="lib">
                <param name="library" type="text" size="40" label="Library directory name" help="The name of your directory containing all your data" >
                <validator type="empty_field"/>
            </param>
                </when>

        </conditional>


<!--
        <param name="profmethod" type="select" label="Method to use for profile generation (profmethod)" >
            <option value="bin" selected="true">bin</option>
            <option value="binlin">binlin</option>
            <option value="binlinbase">binlinbase</option>
            <option value="intlin">intlin</option>
        </param>
        <param name="nSlaves" type="integer" value="9" label="MPI-slaves CPU" help="number of MPI-slaves to use for parallel peak detection" />
-->
        <conditional name="methods">
            <param name="method" type="select" label="Extraction method for peaks detection" help="[method] See the help section below">
                <option value="centWave" >centWave</option>
                <option value="matchedFilter" selected="true">matchedFilter</option>
                <option value="MSW">MSW</option>
            </param>

            <!-- centWave Filter options -->
            <when value="centWave">
                <param name="ppm" type="integer" value="25" label="Max tolerated ppm m/z deviation in consecutive scans in ppm" help="[ppm]" />
                <param name="peakwidth" type="text" value="20,50" label="Min,Max peak width in seconds" help="[peakwidth]" />
                <conditional name="options_scanrange">
                    <param name="option" type="select" label="Scan range option " >
                        <option value="show">show</option>
                        <option value="hide" selected="true">hide</option>
                    </param>
                    <when value="show">
                        <param name="scanrange" type="text" value="" label="scanrange" help="scan range to process, for example (16,365)" >
                            <validator type="empty_field"/>
                        </param>
                    </when>
                    <when value="hide">
                    </when>
                </conditional>

                <conditional name="options_c">
                    <param name="option" type="select" label="Advanced options" >
                        <option value="show">show</option>
                        <option value="hide" selected="true">hide</option>
                    </param>
                    <when value="show">
                        <param name="snthresh" type="integer" value="10" label="Signal/Noise threshold" help="[snthresh] Signal to noise ratio cutoff" />
                        <param name="mzdiff" type="float" value="-0.001" label="Min m/z difference" help="[mzdiff] Min m/z difference for peaks with overlapping RT " />
                        <param name="integrate" type="select" label="peak limits method" help="[integrate]" >
                            <option value="1">peak limits based on smoothed 2nd derivative (less precise)</option>
                            <option value="2">peak limits based on real data (more sensitive to noise)</option>
                        </param>
                        <param name="prefilter" type="text" value="3,100" label="Prefilter step for the first phase" help="[prefilter] Separate by coma k,I. Mass traces are only retained if they contain at least ‘k’ peaks with intensity >= ‘I’"/>
                        <param name="noise" type="integer" value="0" label="Noise filter" help="[noise] optional argument which is useful for data that was centroided without any intensity threshold, centroids with intensity smaller than ‘noise’ are omitted from ROI detection"/>
                    </when>
                    <when value="hide">
                    </when>
                </conditional>
            </when>

        <!-- matched Filter options -->
            <when value="matchedFilter">
                <param name="step" type="float" value="0.01" label="Step size to use for profile generation" help="[step] The peak detection algorithm creates extracted ion base peak chromatograms (EIBPC) on a fixed step size" />
                <param name="fwhm" type="integer" value="30" label="Full width at half maximum of matched filtration gaussian model peak" help="[fwhm] Only used to calculate the actual sigma" />
                <conditional name="options_m">
                    <param name="option" type="select" label="Advanced options" >
                        <option value="show">show</option>
                        <option value="hide" selected="true">hide</option>
                    </param>
                    <when value="show">
<!--
                        <param name="sigma" type="hidden" value="fwhm/2.3548" label="sigma" help="standard deviation (fwhm/2.3548)" />
-->
                        <param name="max" type="integer" value="5" label="Maximum number of peaks per extracted ion chromatogram" help="[max]" />
                        <param name="snthresh" type="integer" value="10" label="Signal to noise ratio cutoff" help="[snthresh]" />
                        <param name="steps" type="integer" value="2" label="Number of steps to merge prior to filtration" help="[steps] The peak identification algorithm combines a given number of EIBPCs prior to filtration and peak detection, as defined by the steps argument" />
<!--
                        <param name="mzdiff" type="text" size="20" value="0.8-step*steps" label="m/z difference" help="min m/z difference for peaks with overlapping RT " />
-->
                    </when>
                    <when value="hide">
                    </when>
                </conditional>
            </when>

        <!-- MSW Filter options -->
            <when value="MSW">
                <param name="nearbyPeak" type="select" label="Determine whether to include the nearby small peaks of major peaks" help="[nearbyPeak]" >
                    <option value="TRUE">TRUE</option>
                    <option value="FALSE">FALSE</option>
                </param>
                <param name="winSize_noise" type="integer" value="500" label="The local window size to estimate the noise level" help="[winSize.noise]" />
                <param name="snthr" type="integer" value="3" label="SNR (Signal to Noise Ratio) threshold" help="[snthr]" />
                <param name="amp_Th" type="float" value="0.002" label="Minimum required relative amplitude of the peak" help="[amp.Th] Ratio to the maximum of CWT coefficients" />
                <param name="scales" type="text" value="seq(1,22,3)" label="Scales for the Continuous Wavelet Transform (CWT)" help="[scales] Scales are linked to the width of the peaks that are to be detected. Tape as indicaded seq('n,n,n') or c(n,n) : seq(from, to, by steps), c - linear vector " />
                <param name="SNR_method" type="text" value="data.mean" label="SNR (Signal to Noise Ratio) method" help="[SNR.method] Method to estimate noise level. Currently, only 95 percentage quantile is supported." />
            </when>
        </conditional>
    </inputs>

    <outputs>
        <data name="xsetRData" format="rdata.xcms.raw" label="xset.RData" />
        <data name="sampleMetadata" format="tabular" label="sampleMetadata.tsv" />
        <data name="ticsRawPdf"   format="pdf" label="xset.TICs_raw.pdf" />
        <data name="bpcsRawPdf"   format="pdf" label="xset.BPCs_raw.pdf" />
        <data name="log" format="txt" label="xset.log.txt" />
    </outputs>

    <tests>
        <test>
            <param name="inputs.input" value="zip_file" />
            <param name="inputs.zip_file" value="sacuri.zip" />
            <param name="methods.method" value="matchedFilter" />
            <param name="methods.step" value="0.01" />
            <param name="methods.fwhm" value="4" />
            <param name="methods.options_m.option" value="show" />
            <param name="methods.options_m.max" value="50" />
            <param name="methods.options_m.snthresh" value="1" />
            <param name="methods.options_m.steps" value="2" />
            <!--<output name="xsetRData" file="xset.RData" />-->
            <!--<output name="sampleMetadata" file="sampleMetadata.tsv" />-->
            <!--<output name="ticsRawPdf" file="xset.TICs_raw.pdf" />-->
            <!--<output name="bpcsRawPdf" file="xset.BPCs_raw.pdf" />-->
            <output name="log">
                <assert_contents>
                    <has_text text="object with 9 samples" />
                    <has_text text="Time range: 0.7-1140 seconds (0-19 minutes)" />
                    <has_text text="Mass range: 50.0019-999.9863 m/z" />
                    <has_text text="Peaks: 135846 (about 15094 per sample)" />
                    <has_text text="Peak Groups: 0" />
                    <has_text text="Sample classes: bio, blank" />
                </assert_contents>
            </output>
        </test>
    </tests>

    <help><![CDATA[

.. class:: infomark

**Authors**  Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu

.. class:: infomark

**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@univ-nantes.fr - part of Workflow4Metabolomics.org [W4M]

 | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool.

---------------------------------------------------

============
Xcms.xcmsSet
============

-----------
Description
-----------

This tool is used for preprocessing analyte data from multiple LC/MS files (formats NetCDF, mzXML and mzData). It extracts ion from each sample independently and using a statistic model, peaks are filtered and integrated.
You can read a tutorial on how to perform xcms preprocessing which is available here_.

.. _here: http://web11.sb-roscoff.fr/download/w4m/howto/w4m_HowToPerformXcmsPreprocessing_v02.pdf


-----------------
Workflow position
-----------------

**Upstream tools**

========================= ================= ======= =========
Name                      output file       format  parameter
========================= ================= ======= =========
NA                        NA                zip     NA
========================= ================= ======= =========


**Downstream tools**

+---------------------------+--------------------+-----------------+
| Name                      | Output file        | Format          |
+===========================+====================+=================+
|xcms.group                 | xset.RData         | rdata.xcms.raw  |
+---------------------------+--------------------+-----------------+
|PCA ellipsoid by factors   | sampleMetadata.tsv | Tabular         |
+---------------------------+--------------------+-----------------+
|Anova                      | sampleMetadata.tsv | Tabular         |
+---------------------------+--------------------+-----------------+


**Example of a metabolomic workflow**

.. image:: XCMS_Galaxy_workflow.png


------

.. class:: infomark

The output file is an xset.RData file. You can continue your analysis using it in **xcms.group** tool.

---------------------------------------------------


-----------
Input files
-----------

+---------------------------+------------+
| Parameter : num + label   |   Format   |
+===========================+============+
| 1 : Choose your inputs    |   zip      |
+---------------------------+------------+

**Choose your inputs**

You have two methods for your inputs:

    | Zip file (recommended): You can put a zip file containing your inputs: myinputs.zip (containing all your conditions as sub-directories).
    | library folder: You must specify the name of your "library" (folder) created within your space project (for example: /projet/externe/institut/login/galaxylibrary/yourlibrary). Your library must contain all your conditions as sub-directories.

Steps for creating the zip file
-------------------------------

**Step1: Creating your directory and hierarchize the subdirectories**


VERY IMPORTANT: If you zip your files under Windows, you must use the 7Zip software (http://www.7-zip.org/), otherwise your zip will not be well unzipped on the platform W4M (zip corrupted bug).

Your zip should contain all your conditions as sub-directories. For example, two conditions (mutant and wild):
arabidopsis/wild/01.raw
arabidopsis/mutant/01.raw

**Step2: Creating a zip file**

Create your zip file (e.g.: arabidopsis.zip).

**Step 3 : Uploading it to our Galaxy server**

If your zip file is less than 2Gb, you get use the Get Data tool to upload it.

Otherwise if your zip file is larger than 2Gb, please refer to the HOWTO on workflow4metabolomics.org (http://application.sb-roscoff.fr/download/w4m/howto/galaxy_upload_up_2Go.pdf).

For more informations, don't hesitate to send us an email at supportATworkflow4metabolomics.org).

Advices for converting your files for the XCMS input
----------------------------------------------------

We recommend you to convert your raw files to **mzXML** in centroid mode (smaller files) and the files will be compatible with the xmcs centWave method.

**We recommend you the following parameters:**

Use Filtering: **True**

Use Peak Picking: **True**

Peak Peaking -Apply to MS Levels: **All Levels (1-)** : Centroid Mode

Use zlib: **64**

Binary Encoding: **64**

m/z Encoding: **64**

Intensity Encoding: **64**


----------
Parameters
----------

Extraction method for peaks detection
-------------------------------------

**Matched Filter**

    | One parameter to consider is the Gaussian model peak width used for matched filtration,an integral part of the peak detection algorithm.
    | For a discussion of how model peak width affects the signal to noise ratio, see Danielsson et al. (2002).


**cent Wave**

    | This algorithm is most suitable for high resolution LC/{TOF,OrbiTrap,FTICR}-MS data in centroid mode.
    | Due to the fact that peak centroids are used, a binning step is not necessary.
    | The method is capable of detecting close-by-peaks and also overlapping peaks. Some efforts are made to detect the exact peak boundaries to get precise peak integrals.

**MSW**

    | Wavelet based, used for direct infusion data. Continuous wavelet transform (CWT) can be used to locate chromatographic peaks on different scales.
    | If you wish to have more details about the other parameters, you can read the following documents:
    | -Example of preprocessing data with XCMS : http://www.bioconductor.org/packages/2.12/bioc/vignettes/xcms/inst/doc/xcmsPreprocess.pdf
    | -Details and explanations for all the parameters of XCMS package: http://www.bioconductor.org/packages/release/bioc/manuals/xcms/man/xcms.pdf


------------
Output files
------------

xset.TICs_raw.pdf

    | "Total Ion Chromatograms" graph in pdf format.

xset.BPCs_raw.pdf

    | "Base Peak Chromatograms" graph in pdf format with each class samples opposed.

sampleMetadata.tsv

    | Tabular file that contains for each sample, it's associated class and polarity (positive,negative and mixed).
    | This file is necessary in the Anova and PCA step of the workflow.

xset.RData: rdata.xcms.raw format

    | Rdata file that is necessary in the second step of the workflow "xcms.group".

------

.. class:: infomark

The output file is an xset.RData file. You can continue your analysis using it in **xcms.group** tool.

---------------------------------------------------

---------------
Working example
---------------

Input files
-----------

    | zip_file -> **sacuri.zip**

Parameters
----------

    | Method -> **matchedFilter**
    | step   -> **0.01**
    | fwhm   -> **4**
    | Advanced option -> **show**
    | max: -> **50**
    | snthresh -> **1**
    | steps -> **2**


Output files
------------

    | **1) xset.RData: RData file**

    | **2) Example of a sampleMetadata.tsv  :**


+---------------------------+------------+---------+
| sampleMetadata            |   class    | polarity|
+===========================+============+=========+
|HU_neg_017                 |   bio      |negative |
+---------------------------+------------+---------+
|HU_neg_028                 |   bio      |negative |
+---------------------------+------------+---------+
|HU_neg_034                 |   bio      |negative |
+---------------------------+------------+---------+
|Blanc04                    |   blank    |negative |
+---------------------------+------------+---------+
|Blanc06                    |   blank    |negative |
+---------------------------+------------+---------+
|Blanc09                    |   blank    |negative |
+---------------------------+------------+---------+


    | **3) Example of xset.TICs_raw.pdf (Total Ion Chromatograms) :**

.. image:: xcms_tics.png


---------------------------------------------------

Changelog/News
--------------


**Version 2.0.7 - 10/02/2016**

- BUGFIX: better management of errors. Datasets remained green although the process failed

- BUGFIX/IMPROVEMENT: New checking steps around the imported data in order to raise explicte error message before or after launch XCMS: checking of bad characters in the filenames, checking of the XML integrity and checking of duplicates which can appear in the sample names during the XCMS process because of bad characters

- BUGFIX/IMPROVEMENT: New step to check and delete bad characters in the XML: accented characters in the storage path of the mass spectrometer

- UPDATE: refactoring of internal management of inputs/outputs

- UPDATE: refactoring to feed the new report tool


**Version 2.0.2 - 18/01/2016

- BUGFIX: Some zip files were tag as "corrupt" by R. We have changed the extraction mode to deal with thoses cases.


**Version 2.0.2 - 09/10/2015**

- BUGFIX: Some users reported a bug in xcms.xcmsSet. The preprocessing stops itself and doesn't import the whole dataset contained in the zip file without warning. But meanwhile, please check your samplemetadata dataset and the number of rows.


**Version 2.0.2 - 02/06/2015**

- NEW: The W4M workflows will now take as input a zip file to ease the transfer and to improve dataset exchange between tools and users. (See How_to_upload). The previous "Library directory name" is still available but we invite user to switch on the new zip system as soon as possible.

- IMPROVEMENT: new datatype/dataset formats (rdata.xcms.raw, rdata.xcms.group, rdata.xcms.retcor ...) will facilitate the sequence of tools and so avoid incompatibility errors.

- IMPROVEMENT: parameter labels have changed to facilitate their reading.

    ]]></help>


    <citations>
        <citation type="doi">10.1021/ac051437y</citation>
        <citation type="doi">10.1093/bioinformatics/btu813</citation>
    </citations>

</tool>
author	lecorguille
date	Mon, 22 Feb 2016 16:25:05 -0500
parents	c5fa73f1703f
children	0888f7ef739a