view ionflow/ionflow.xml @ 0:3b461dc9568b draft default tip

Uploaded
author metaboflow_cam
date Mon, 09 Aug 2021 09:41:22 +0000
parents
children
line wrap: on
line source

<!--
wl-10-08-2020, Mon: commence
wl-24-08-2020, Mon: first version
wl-16-11-2020, Mon: second version
wl-23-03-2021, Tue: third version
wl-08-06-2021, Tue: fourth version: new stuff from Jacopo
-->

<tool id="ionfow" name="IonFlow" version="0.4.0">
  <description>
    Pipeline for processing and analysis of ionomics data
  </description>
  <macros>
    <import>macros.xml</import>
  </macros>
  <expand macro="requirements" />
  <expand macro="stdio" />

  <!-- =============================================================== -->
  <command detect_errors="exit_code">
    <![CDATA[

      Rscript ${__tool_directory__}/ionflow.R
        ## Input
        --ion_file '$ion_file'
        --var_id '$pre.var_id'
        --batch_id '$pre.batch_id'
        --data_id '$pre.data_id'
        --method_norm '$pre.method_norm'
        --batch_control '$pre.batch.batch_control'
        #if $pre.batch.batch_control=='yes'
          --control_lines '$pre.batch.control_lines'
          --control_use '$pre.batch.control_use'
        #end if
        --method_outliers '$pre.method_outliers'
        --thres_outl '$pre.thres_outl'
        --stand_method '$pre.stand.stand_method'
        #if $pre.stand.stand_method=='custom':
          --std_file '$pre.stand.std_file'
        #end if
        --thres_symb '$pre.thres_symb'

        ## Exploratory analysis
        --thres_ion_corr '$expl.thres_ion_corr'

        ## Clustering analysis
        --min_clust_size '$clus.min_clust_size'
        --h_tree '$clus.h_tree'
        --filter_zero_string '$clus.filter_zero_string'

        ## Enrichment analysis
        --pval '$enri.pval'
        --min_count '$enri.min_count'
        --ont '$enri.ont'
        --annot_pkg '$enri.annot_pkg'

        ## Network analysis
        --method_corr '$net.method_corr'
        --thres_corr '$net.thres_corr'

        ## output: pre-processing
        --pre_proc_pdf  '$pre_proc_pdf'
        --df_stats_out  '$df_stats_out'
        --outl_out  '$outl_out'
        --data_wide_out  '$data_wide_out'
        --data_wide_symb_out  '$data_wide_symb_out'

        ## output: exploratory analysis
        --expl_anal_pdf  '$expl_anal_pdf'

        ## output: clustering analysis
        --clus_anal_pdf  '$clus_anal_pdf'

        ## output: enrichment analysis
        --go_en_out  '$go_en_out'

        ## output: network analysis
        --gene_net_pdf  '$gene_net_pdf'
        --imbe_out  '$imbe_out'
    ]]>
  </command>

  <!-- =============================================================== -->
  <inputs>
    <param name="ion_file" type="data" format="csv"
           label="Ion data table"
           help="Ion data table with columns of Ions and meta information." />

    <!-- start of pre -->
    <section name="pre" title="Pre Processing" >

      <param name="var_id" type="integer" value="1"
             label="Specify variable column index of input data"
             help="Indicate which column will be the variable (ORF or SYMBOL)." />

      <param name="batch_id" type="integer" value="3"
             label="Specify batch ID column index of input data"
             help="Indicate which column will be batch ID." />

      <param name="data_id" type="integer" value="5"
             label="Specify data start column index of input data"
             help="Indicate which column will be the start of data matrix." />

      <param name="method_norm" type="select"
             label="Select a method for batch correction">
      <option value="median" selected="true">Median</option>
      <option value="median+std">Median plus std</option>
      <option value="none">None</option>
      </param>

      <!-- batch control -->
      <conditional name="batch">
        <param name="batch_control" type="select"
               label="Use control lines for batch correction or not" >
        <option value="yes" selected="true">Yes</option>
        <option value="no">No</option>
        </param>

        <when value="yes">
          <param name="control_lines" type="text" value="BY4741"
                 label="Specify batch control lines (rows)">
          <sanitizer>
            <valid initial="string.ascii_letters,string.digits"></valid>
          </sanitizer>
          </param>

          <param name="control_use" type="select"
                 label="Select lines for batch correction">
          <option value="control">Use control lines for batch correction</option>
          <option value="all" selected="true">Use all lines for batch correction</option>
          <option value="control.out">Use all lines except control lines for batch correction</option>
          </param>
        </when>

        <when value="no">
        </when>
      </conditional>

      <param name="method_outliers" type="select"
             label="Select a method for outlier detection">
      <option value="IQR">IQR</option>
      <option value="mad">MAD</option>
      <option value="log.FC.dist" selected="true">log FC dist</option>
      <option value="none">none</option>
      </param>

      <param name="thres_outl" type="float" value="3.0"
             label="Specify outlier detection threshold" />

      <!-- standardisation method -->
      <conditional name="stand">
        <param name="stand_method" type="select"
              label="Select a method for standardisation">
        <option value="std" selected="true">STD</option>
        <option value="mad">MAD</option>
        <option value="custom">Custom</option>
        </param>

        <when value="custom">
          <param name="std_file" type="data"  format="tabular"
                 label="STD file"
                 help="A data matrix with only two columns. The fisrt
                       column is the names of ion and the second one is std
                       values. " />
        </when>
      </conditional>

      <param name="thres_symb" type="float" value="2.0"
             label="Specify symbolisation threshold" />

    </section>
    <!-- end of pre -->

    <section name="expl" title="Exploratory analysis" >
      <param name="thres_ion_corr" type="float" value="0.15"
             label="Threshold for Ion correlation (0 - 1)" />
    </section>

    <section name="clus" title="Clustering analysis" >
      <param name="min_clust_size" type="float" value="10.0"
             label="Specify minimal cluster center number" />
      <param name="h_tree" type="float" value="0.0"
             label="Cutting height for hierarchical clustering" />
      <param name="filter_zero_string" type="boolean" truevalue="True"
             falsevalue="False" checked="True"
             label="Filter the zero string?" />
    </section>

    <section name="enri" title="Enrichment analysis" >
      <param name="pval" type="float" value="0.05"
             label="Specify p-value threshold for enrichment analysiss" />
      <param name="min_count" type="float" value="3.0"
             label="Minimal count number for enrichment analysis" />
      <param name="ont" type="select"
             label="Select gene ontology for GO Terms">
      <option value="BP" selected="true">BP</option>
      <option value="MF">MF</option>
      <option value="CC">CC</option>
      </param>
      <param name="annot_pkg" type="select"
            label="Select an annotation package">
      <option value="org.Sc.sgd.db" selected="true">Yeast(org.Sc.sgd.db)</option>
      <option value="org.Hs.eg.db">Human(org.Hs.eg.db)</option>
      <option value="org.Mm.eg.db">Mouse(org.Mm.eg.db)</option>
      </param>
    </section>

    <section name="net" title="Network analysis" >
      <param name="method_corr" type="select"
             label="Select a method for similarity measure">
      <option value="pearson">Pearson</option>
      <option value="spearman">Spearman</option>
      <option value="kendall">Kendall</option>
      <option value="cosine" selected="true">Cosine</option>
      <option value="mahal_cosine">Mahalanobis Cosine</option>
      <option value="hybrid_mahal_cosine">Hybrid Mahalanobis Cosine</option>
      </param>
      <param name="thres_corr" type="float" value="0.7" min="0" max="1"
             label="Specify similarity threshold (0 - 1)" />
    </section>

  </inputs>


  <!-- =============================================================== -->
  <outputs>
    <data format="pdf" name="pre_proc_pdf"
          label="Pre-processing plots for Ions on ${on_string}" />
    <data format="tabular" name="df_stats_out"
          label="Statistical summary of data set on ${on_string}"/>
    <data format="tabular" name="outl_out"
          label="Outlier table on ${on_string}"/>
    <data format="tabular" name="data_wide_out"
          label="Pre-processed data in wide format on ${on_string}"/>
    <data format="tabular" name="data_wide_symb_out"
          label="Symbolization data in wide format on ${on_string}"/>
    <data format="pdf" name="expl_anal_pdf"
          label="Explanatation analysis plots for Ions on ${on_string}" />
    <data format="pdf" name="clus_anal_pdf"
          label="Explanatation analysis plots for Ions on ${on_string}" />
    <data format="tabular" name="go_en_out"
          label="GO enrichment table on ${on_string}"/>
    <data format="pdf" name="gene_net_pdf"
          label="Gene network plots on ${on_string}" />
    <data format="tabular" name="imbe_out"
          label="Impact and betweenness table on ${on_string}"/>
  </outputs>

  <!-- =============================================================== -->
  <tests>
    <test>
      <param name="ion_file" value="Dataset_IonFlow_Ionome_KO_short.csv" />
      <param name="var_id" value="1" />
      <param name="batch_id" value="3" />
      <param name="data_id" value="5" />
      <param name="method_norm" value="median" />
      <param name="batch_control" value="yes" />
      <param name="control_lines" value="BY4741" />
      <param name="control_use" value="all" />
      <param name="method_outliers" value="log.FC.dist" />
      <param name="thres_outl" value="3.0" />
      <param name="stand_method" value="std" />
      <param name="thres_symb" value="2" />
      <param name="thres_ion_corr" value="0.15" />
      <param name="min_clust_size" value="10.0" />
      <param name="h_tree" value="0.0" />
      <param name="filter_zero_string" value="TRUE" />
      <param name="pval" value="0.05" />
      <param name="min_count" value="3" />
      <param name="ont" value="BP" />
      <param name="annot_pkg" value="org.Sc.sgd.db" />
      <param name="method_corr" value="cosine" />
      <param name="thres_corr" value="0.7" />
      <output name="pre_proc_pdf" file="res/pre_proc.pdf" compare="sim_size" />
      <output name="df_stats_out" file="res/df_stats.tsv" compare="diff" />
      <output name="outl_out" file="res/outl.tsv" compare="diff" />
      <output name="data_wide_out" file="res/data_wide.tsv" compare="diff" />
      <output name="data_wide_symb_out" file="res/data_wide_symb.tsv" compare="diff" />
      <output name="expl_anal_pdf" file="res/expl_anal.pdf" compare="sim_size" />
      <output name="clus_anal_pdf" file="res/clus_anal.pdf" compare="sim_size" />
      <output name="go_en_out" file="res/go_en.tsv" compare="diff" />
      <output name="gene_net_pdf" file="res/gene_net.pdf" compare="sim_size" />
      <output name="imbe_out" file="res/impact_betweenness.tsv" compare="diff" />
    </test>
  </tests>

  <!-- =============================================================== -->
<help>
IonFlow  Pipeline
=================

Description
-----------

This galaxy tool wraps R package IonFlow_ with modification to process
ionomics data to aid reproducible data sharing and big data initiatives.

The pipeline includes:

Pre-Processing
  This procedure performs batch correction with or without control lines,
  outlier detection and data standardisation. The processed concentration
  data and a symbolisation profile data are returned for further analysis.

Exploratory Analysis
  This procedure performs correlation analysis and PCA analysis in terms of
  ions. The heatmap with hierarchical clustering and network graph are based
  on the correlation analysis.

Clustering Analysis
  This step performs hierarchical clustering based on symbolised profile.
  The selected cluster centres (control by the threshold of minimal cluster
  centre number) are applied to enrichment and network analysis.

Enrichment Analysis
  This step uses the clustering results for enrichment analysis. Current suports
  three annotation packages:

  - Yeast(org.Sc.sgd.db)
  - Human(org.Hs.eg.db)
  - Mouse(org.Mm.eg.db)

Network Analysis
  This part uses hierarchical clustering of the symbolised profile
  for network analysis. Genes with correlation coefficient large than a
  threshold are then used. Some network analysis stats such as impact and
  betweenness are also returned.

.. _IonFlow: https://github.com/AlinaPeluso/MetaboFlow

Inputs
------

Ionomics data
~~~~~~~~~~~~~

The input file is an ionomics data table in tubular format. The following is
an example with the first two columns of knockout name and batch ID and
other columns are ion data. To use this input data in ``Pre-processing`` , the
user must indicate ``var_id`` as ``1`` (``Knockout``), ``batch_id`` as ``2``
(``Batch_ID``) and ``data_id`` as ``3``. If the file has the batch control
information in the first column, ``control_lines`` should indicate. For
example, if ``YDL227C`` will be used as batch control, ``control_lines =
"YDL227C"``.


 +----------+----------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+
 | Knockout | Batch_ID | Ca      | Cd    | Cu    | Fe     | K        | Mg      | Mo    | Na      | Ni    | P        | S        |
 +----------+----------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+
 | YDL227C  | 14       | 59.549  | 0.953 | 2.202 | 10.942 | 3448.070 | 693.992 | 1.603 | 259.816 | 1.573 | 4963.315 | 556.397  |
 +----------+--+-------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+
 | YDL227C  | 14       | 62.258  | 0.927 | 2.067 | 26.262 | 3493.741 | 705.008 | 2.691 | 273.640 | 4.443 | 4874.101 | 553.229  |
 +----------+--+-------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+
 | YDL227C  | 14       | 65.075  | 0.875 | 2.048 | 10.244 | 3317.611 | 691.411 | 1.878 | 278.167 | 1.448 | 4608.300 | 535.609  |
 +----------+--+-------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+
 | YDL227C  | 14       | 56.886  | 0.985 | 2.203 |  9.206 | 3330.854 | 702.734 | 1.396 | 268.609 | 1.640 | 5119.736 | 546.230  |
 +----------+----------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+

|

Custom STD data
~~~~~~~~~~~~~~~

Standard derivation values can be provide by user if standardisation method
``stand_method`` in pre-processing procedure is selected as ``custom``. The
user defined std file has tabular format such as:

  +------+----------+
  |  Ion |   sd     |
  +------+----------+
  |  Ca  | 0.150    |
  +------+----------+
  |  Fe  | 0.163    |
  +------+----------+
  |  K   | 0.094    |
  +------+----------+
  |  Mg  | 0.059    |
  +------+----------+
  |  Na  | 0.107    |
  +------+----------+
  |  Ni  | 0.078    |
  +------+----------+
  |  Zn  | 0.067    |
  +------+----------+

|

Outputs
-------

Pre-processing
~~~~~~~~~~~~~~

The output includes:

- A PDF file for the plots:  dot-plot with ion vs batch and distribution
  plot of ions.
- A tabular file for statistical summary of data set
- A tabular file for outlier table
- A tabular file for processed data set
- A tabular file for the symbolisation data set

Exploratory analysis
~~~~~~~~~~~~~~~~~~~~

A single PDF file with plots:

- Correlation map
- Heatmap with dendrogram
- PCA plot
- Correlation network graph

Clustering analysis
~~~~~~~~~~~~~~~~~~~

A single PDF file with clustering plots.

Enrichment analysis
~~~~~~~~~~~~~~~~~~~

A tabular file for GO Terms enrichment table.

Network analysis
~~~~~~~~~~~~~~~~

Two files are returned:

- A PDF file for plots
  - network graph
  - impact scatter plot
- A tabular file for impact and betweenness table


</help>
  <citations>
  </citations>
</tool>