view profia_config.xml @ 1:4753e64cf694 draft

planemo upload for repository https://github.com/workflow4metabolomics/profia.git commit 0a90b8ee1577263ace397124d8b0e34d1e630f51
author ethevenot
date Wed, 03 May 2017 10:49:08 -0400
parents 39ccace77270
children 3f8ae071bdda
line wrap: on
line source

<tool id="profia" name="proFIA" version="3.0.4">
  <description>Preprocessing of FIA-HRMS data</description>
  
  <requirements>
    <requirement type="package">r-batch</requirement>
    <requirement type="package">r-FNN</requirement>
    <requirement type="package">r-maxLik</requirement>
    <requirement type="package">r-minpack.lm</requirement>
    <requirement type="package">r-pracma</requirement>
    <requirement type="package">bioconductor-xcms</requirement>
    <requirement type="package">bioconductor-plasFIA</requirement>
    <requirement type="package">bioconductor-proFIA</requirement>
  </requirements>
  
  <stdio>
    <exit_code range="1:" level="fatal" />
  </stdio>
  
  <command>
  Rscript $__tool_directory__/profia_wrapper.R

  #if $inputs.input == "lib":
  library $__app__.config.user_library_import_dir/$__user_email__/$inputs.library
  #elif $inputs.input == "zip_file":
  zipfile $inputs.zip_file
  #end if
  
  ppmN "$ppmN"
  ppmGroupN "$ppmGroupN"
  fracGroupN "$fracGroupN"
  kI "$kI"

  dataMatrix_out "$dataMatrix_out"
  sampleMetadata_out "$sampleMetadata_out"
  variableMetadata_out "$variableMetadata_out"
  figure "$figure"
  information "$information"
  </command>
  
  <inputs>
    <conditional name="inputs">
      <param name="input" type="select" label="Choose your input method" >
        <option value="zip_file" selected="true">Zip file from your history containing your raw files</option>
        <option value="lib" >Library directory name</option>
      </param>
      <when value="zip_file">
        <param name="zip_file" type="data" format="no_unzip.zip,zip" label="Zip file (see the details for file upload in the help section below)" />
      </when>
      <when value="lib">
        <param name="library" type="text" size="40" label="Library directory name" help="The name of your directory containing all your data" >
          <validator type="empty_field"/>
        </param>
      </when>     
    </conditional>
    
    <param name="ppmN" label="Maximum deviation between centroids during band detection (in ppm)" type="text" value = "5" help="[ppm]" />	  
    <param name="ppmGroupN" label="Accuracy of the mass spectrometer to be used during feature alignment (in ppm)" type="text" value = "5" help="[ppmGroup] Should be inferior or equal to the deviation parameter above." />
    <param name="fracGroupN" label=" Minimum fraction of samples in which a peak should be detected in at least one class to be kept during feature alignment" type="text" value = "0.5" help="[fracGroup]" />
    <param name="kI" label="Number of neighbour features to be used for imputation (select 0 to skip the imputation step)" type="text" value = "5" help="[k]" />
  </inputs>
  
  <outputs>
    <data name="dataMatrix_out" label="${tool.name}_dataMatrix.tsv" format="tabular" ></data>
    <data name="sampleMetadata_out" label="${tool.name}_sampleMetadata.tsv" format="tabular" ></data>
    <data name="variableMetadata_out" label="${tool.name}_variableMetadata.tsv" format="tabular" ></data>
    <data name="figure" label="${tool.name}_figure.pdf" format="pdf"/>
    <data name="information" label="${tool.name}_information.txt" format="txt"/>
  </outputs>
  
  <tests>
    <test>
      <param name="inputs|input" value="zip_file" />
      <param name="inputs|zip_file" value="input-plasFIA.zip" ftype="zip" />
      <param name="ppmN" value="2"/>
      <param name="ppmGroupN" value="1"/>
      <param name="fracGroupN" value="0.1"/>
      <param name="kI" value="2"/>
      <output name="dataMatrix_out" file="output-dataMatrix.tsv" />
      <output name="information">
        <assert_contents>
          <has_text text="722  groups have been done" />
	  <has_text text="3 samples x 644 variables" />
	  <has_text text="78 excluded variables (near zero variance)" />
          <has_text text="2101  peaks detected" />
        </assert_contents>
      </output>
    </test>
  </tests>
  
  <help>	

.. class:: infomark

**Author**	Alexis Delabriere and Etienne Thevenot (CEA, LIST, MetaboHUB Paris, etienne.thevenot@cea.fr)

---------------------------------------------------

.. class:: infomark

**Please cite**

Delabriere A., Hohenester U., Colsch B., Junot C., Fenaille F. and Thevenot E.A. *proFIA*: A data preprocessing workflow for Flow Injection Analysis coupled to High-Resolution Mass Spectrometry. *submitted*.

---------------------------------------------------

.. class:: infomark

**R package**

The **proFIA** package is available from the bioconductor repository `http://bioconductor.org/packages/proFIA &lt;http://bioconductor.org/packages/proFIA&gt;`_

---------------------------------------------------

.. class:: infomark

**Tool updates**

See the **NEWS** section at the bottom of this page
  
---------------------------------------------------

==========================================================
*proFIA*: A preprocessing workflow for FIA-HRMS data
==========================================================

-----------
Description
-----------

**Flow Injection Analysis coupled to High-Resolution Mass Spectrometry (FIA-HRMS)** is a promising approach for **high-throughput metabolomics** (Madalinski *et al.*, 2008; Fuhrer *et al.*, 2011; Draper *et al.*, 2013). FIA- HRMS data, however, cannot be preprocessed with current software tools which rely on liquid chromatography separation, or handle low resolution data only.

The **proFIA module is a workflow** allowing to preprocess FIA-HRMS raw data in **centroid** mode and open format (netCDF, mzData, mzXML, and mzML), and generates the table of peak intensities (**peak table**). The workflow consists in **peak detection and quantification** within individual sample files, followed by **alignment** between files in the m/z dimension, and **imputation** of the missing values in the final peak table (Delabriere *et al.*, submitted). For each ion, the graph representing the intensity as a function of time is called a **flowgram**. A flowgram can be modeled as I = kP + ME(P) + B + e, where k is the response factor (corresponding to the ionization properties of the analyte), P is the **sample peak** (normalized profile which is common for all analytes from a sample and depends on the flow injection conditions only), ME is the **matrix effect**, B is the **solvent baseline**, and e is the heteroscedastic noise.

The generated peak table is available in the '3 table' W4M tabular format (**dataMatrix**, **sampleMetadata**, and **variableMetadata**) for downstream statistical analysis and annotation with W4M modules.

A figure provides **diagnostics** and visualization of the preprocessed data set.

---------------------------------------------------

.. class:: infomark

**References**

| Delabriere A., Hohenester U., Junot C. and Thevenot E.A. proFIA: A data preprocessing workflow for Flow Injection Analysis coupled to High-Resolution Mass Spectrometry. *submitted*.
| Draper J., Lloyd A., Goodacre R. and Beckmann M. (2013). Flow infusion electrospray ionisation mass spectrometry for high throughput, non-targeted metabolite fingerprinting: a review. *Metabolomics* 9, 4-29. (http://dx.doi.org/10.1007/s11306-012-0449-x)
| Fuhrer T., Dominik H., Boris B. and Zamboni N. (2011). High-throughput, accurate mass metabolome profiling of cellular extracts by flow injection-time-of-flight mass spectrometry. *Analytical Chemistry* 83, 7074-7080. (http://dx.doi.org/10.1021/ac201267k)
| Madalinski G., Godat E., Alves S., Lesage D., Genin E., Levi P., Labarre J., Tabet J., Ezan E. and Junot, C. (2008). Direct introduction of biological samples into a LTQ-orbitrap hybrid mass spectrometer as a tool for fast metabolome analysis. *Analytical Chemistry* 80, 3291-3303. (http://dx.doi.org/10.1021/ac7024915)

---------------------------------------------------

-----------------
Workflow position
-----------------

.. image:: profia_workflowPositionImage.png
        :width: 600

-----------
Input files
-----------

+---------------------------+------------+
| Parameter : num + label   |   Format   |
+===========================+============+
| 1 : Choose your inputs    |   zip      |
+---------------------------+------------+

---------------------------------------------------

.. class:: warningmark

VERY IMPORTANT: Your data must be in **centroid** mode (centroidization of raw files and conversion to an open format can be achieved with the proteowizard software: http://proteowizard.sourceforge.net/).


You have two methods for your inputs:
    | Zip file (recommended): You can put a zip file containing your inputs: myinputs.zip (containing all your conditions as sub-directories; see below).
    | library folder: You must specify the name of your "library" (folder) created within your space project (for example: /projet/externe/institut/login/galaxylibrary/yourlibrary). Your library must contain all your conditions as sub-directories.

**Steps for creating the zip file**

**Step1: Creating your directory and hierarchize the subdirectories**

.. class:: warningmark

VERY IMPORTANT: If you zip your files under Windows, you must use the **7Zip** software (http://www.7-zip.org/), otherwise your zip will not be well unzipped on the platform W4M (zip corrupted bug).

1a) Prepare a parent folder with the name of your data set (e.g., 'arabidopsis') containing your files:
   | 'arabidopsis/w1.raw'
   | 'arabidopsis/w2.raw'
   | ...
   | 'arabidopsis/m1.raw'
   | 'arabidopsis/m2.raw'
   | ...
   |
   
1b) If you have several experimental conditions resulting in distinct profiles of your samples (e.g. 'wild-type' and 'mutant' genotypes), create subfolders for your files (e.g., 'wild' and 'mutant') into your parent folder:
   | 'arabidopsis/wild/w1.raw'
   | 'arabidopsis/wild/w2.raw'
   | ...
   | 'arabidopsis/mutant/m1.raw'
   | 'arabidopsis/mutant/m2.raw'
   | ...
   |
   
**Step2: Creating a zip file**
   | Zip your **parent** folder (here the 'arabidopsis' folder) containing all the subfolders and files with **7Zip**.
   |

**Step 3 : Uploading it to our Galaxy server**
   | If your zip file is less than 2Gb, you get use the **Upload File** tool and the **no_unzip.zip** type to upload it.
   | Otherwise if your zip file is larger than 2Gb, please refer to the HOWTO on workflow4metabolomics.org (http://application.sb-roscoff.fr/download/w4m/howto/galaxy_upload_up_2Go.pdf).
   | For more informations, don't hesitate to send us an email at supportATworkflow4metabolomics.org).
   |

----------
Parameters
----------
   
Maximum deviation between centroids during band detection; in ppm (default = 5)
	| m/z tolerance of centroids corresponding to the same ion from one scan to the other.
	| 

Accuracy of the mass spectrometer to be used during feature alignment; in ppm (default = 5)
	| Should be inferior or equal to the deviation parameter above.
	| 
    
Minimum fraction of samples in which a peak should be detected in at least one class to be kept during feature alignment (default = 0.5)
	| Identical to the corresponding parameter in XCMS. 
	|     

Number of neighbour features to be used for imputation (default = 5)
	| Select 0 to skip the imputation step. 
	|     	


------------
Output files
------------

dataMatrix.tabular
	| **dataMatrix** tabular separated file with the variables as rows and samples as columns. Missing values are indicated as 'NA' (i.e. when the signal was not significantly different from noise).
	|
	
sampleMetadata.tabular
	| **sampleMetadata** tabular separated file containing the sample metadata as columns.
	| 
	
variableMetadata.tabular
	| **variableMetadata** tabular separated file containing the variable metadata as columns. The **timeShifted** flag is set to 1 when the flowgram is time shifted compared to the sample peak (probably due to liquid retention in the FI tube). The **corSampPeakMean** metric is the correlation between the feature flowgram and the sample peak (values are in [-1, 1]). A value below 0.2 suggests that the feature signal is affected by a strong matrix effect. The **meanSolvent** is the mean baseline signal in the feature flowgrams. The **signalOverSolventPvalueMean** is the mean p-value of the tests discriminating between signal and baseline solvent.
	| 

figure.pdf
	| Visualization and diagnostics about the preprocessed data set; **Feature quality**: Number of detected features per sample for each of the three categories: 'Well-behaved' features have a peak shape close to the sample peak (optimal FIA acquisition is achieved when the majority of the features fall into this category); 'Shifted' indicates a time shift compared to the sample peak, and probably results from retention in the FI tube; 'Significant Matrix Effect' corresponds to a correlation between the feature and the samples peaks of less than 0.2, which is usually caused by a strong matrix effect; **Sample peaks**: Visualization of the peak model for each sample; should have close shapes in case of similar FIA conditions; **m/z density**: may allow to detect a missing m/z value, and in turn, suggest that the *ppm* parameter should be modified; **PCA score plot** of the log10 intensities to detect sample outliers.
	| 
			
information.txt
	| Text file with all messages and warnings generated during the computation.
	|

---------------------------------------------------

---------------
Working example
---------------

Figure output
=============

.. image:: profia_workingExampleImage.png
        :width: 600
        
---------------------------------------------------

----
NEWS
----

CHANGES IN VERSION 3.0.4
========================

MINOR MODIFICATION

Details added in the documentation

CHANGES IN VERSION 3.0.2
========================

NEW FEATURE

Parallel processing


CHANGES IN VERSION 3.0.0
========================

NEW FEATURE

Creation of the tool

</help>

<citations>
  <citation type="bibtex">@Article{DelabriereSubmitted,
  Title                    = {proFIA: A data preprocessing workflow for Flow Injection Analysis coupled to High-Resolution Mass Spectrometry},
  Author                   = {Delabriere, Alexis and Hohenester, Ulli and Colsch, Benoit and Junot, Christophe and Fenaille, Francois and Thevenot, Etienne A},
  Journal                  = {submitted},
  Year                     = {submitted},
  Pages                    = {--},
  Volume                   = {},
  Doi                      = {}
  }</citation>
  <citation type="doi">10.1093/bioinformatics/btu813</citation>
</citations>

</tool>