view GSAR.xml @ 0:f0cad4d3a301 draft

Uploaded
author mora-lab
date Thu, 20 May 2021 08:22:23 +0000
parents
children
line wrap: on
line source

<tool id="GSAR" name="GSAR" version="0.1.0">
    <description>A set of multivariate statistical tests for self-contained gene set analysis</description>

    <requirements>
        <requirement type="package" version="1.24.0">bioconductor-GSAR</requirement>
        <requirement type="package" version="1.52.1">bioconductor-GSEABase</requirement>
        <requirement type="package" version="1.20.3">r-getopt</requirement>
    </requirements>

    <command detect_errors="exit_code"><![CDATA[
        Rscript '$__tool_directory__/GSAR.R' 
        --expr_file '$expression_data_file' 
        --geneSet_file '$geneSet'
        --design_file '$desigin' 
        --min_size '$adv.min_size'
        --max_size '$adv.max_size'
        --test_method '$method'
        --nperm_number '$adv.perm_num'
        --threshold_value '$MST.threshold'
        --cor_method '$MST.cor_method'
        --GSAR_output_p_value '$GSAR_p_value_for_the_geneSet'
        --GSAR_output_plot '$GSAR_Significant_pathway_plot'
    ]]></command>

    <inputs>
        <param name="expression_data_file" type="data" format="CSV" label="Expression data file" help="A csv file containing a matrix of expression values where rows correspond to genes (symbol ID) and columns correspond to samples."/>
        <param name="desigin" type="data" format="CSV" label="Design" help="A csv file containing two columns corresponding to samples, one is 'group' (which sets 1 for group1 and 2 for group2), the other one is 'label' (to set group1 and group2 name/label)."/>
        <param name="geneSet" type="data" format="rdata" label="Gene Set" help="An `rdata` file including a geneSetCollection object with 'geneSet' as name."/>
        <param name="method" type="select" label="Method" help="Statistical method for testing the gene sets.">
            <option value="GSNCAtest" selected="true">Gene sets net correlations analysis</option>
            <option value="WWtest">Wald-Wolfowitz test</option>
            <option value="KStest">Kolmogorov-Smirnov test</option>
            <option value="MDtest">Mean Deviation tests</option>
            <option value="RKStest">Radial Kolmogorov-Smirnov test</option>
            <option value="RMDtest">Radial Mean Deviation test</option>
        </param>

        <section name="adv" title="Advanced options">
            <param name="min_size" type="integer" value="10" min="5" label="Min Size for the GeneSet" help="The minimum allowed gene set size. Default value is 10." />
            <param name="max_size" type="integer" value="500" label="Max Size for the GeneSet" help="The maximum allowed gene set size. Default value is 500." />
            <param name="perm_num" type="integer" value="1000" min="100" label="Permutations number" help="Number of permutations used to estimate the null distribution of the test statistic. Default value is 1000. The minumum value is 100." />
        </section>
        
        <section name="MST" title="Option for plotting minimum spanning trees" >
            <param name="threshold" type="float" value="0.05" min="0.0001" max="1" label="Threshold value" help="Threshold value to define significant geneSet for plot minimum spanning trees. Default is 0.05." />
            <param name="cor_method" type="select" label="Correlation coefficient statistic" help="Correlation coefficient is computed while plotting minimum spanning trees for a pathway in two conditions. Possible values are 'pearson', 'spearman' and 'kendall'. Default value is 'pearson'. " >
                <option value="pearson" selected="true">pearson</option>
                <option value="spearman">spearman</option>
                <option value="kendall">kendall</option>
            </param>
        </section> 

    </inputs>

    <outputs>
        <data name="GSAR_p_value_for_the_geneSet" format="CSV" label="GSAR_p_value_for_the_geneSet" />
        <data name="GSAR_Significant_pathway_plot" format="pdf" label="GSAR_Significant_pathway_plot" />
    </outputs>

    <tests>
        <test>
            <param name="expression_data_file" value="GSAR_input_p53DataSet.csv" ftype="csv" />    
            <param name="desigin" value="GSAR_design.csv" ftype="csv" />
            <param name="method" value="GSNCAtest" />
            <section name="adv">
                <param name="min_size" value="10" />
                <param name="max_size" value="500" />
                <param name="perm_num" value="1000"/>
            </section>
            <section name="MST">
                <param name="threshold" value="0.05" />
                <param name="cor_method" value="pearson" />
            </section> 
            <output name="GSAR_p_value_for_the_geneSet" file="GSAR_p_value_for_the_geneSet.csv" ftype="csv" />
            <output name="GSAR_Significant_pathway_plot" file="GSAR_Significant_pathway_plot.pdf" ftype="pdf" />
        </test>
    </tests>

    <help><![CDATA[
        
.. class:: infomark 

**What it does**
    
    **GSAR (Gene Set Analysis in R)** is an R package which provides a set of multivariate statistical tests for self-contained gene set analysis (GSA). GSAR consists of two-sample multivariate nonparametric statistical methods testing a null hypothesis against specific alternative hypotheses, such as differences in mean (shift), variance (scale) or correlation structure. It also offers a graphical visualization tool for the correlation networks obtained from expression data to examine the change in the net correlation structure of a gene set between two conditions based on the minimum spanning trees.

--------- 

=========
**Input**
=========

**Gene expression data** 

The input is a csv file including a matrix of expression values where rows correspond to genes and columns correspond to samples.
Recommended gene id is `Symbol ID`.

**Design**

A csv file that has two columns correspond to samples, one is `'group'` (which sets 1 for group1 and 2 for group2), the other one is `'label'` (to set group1 and group2 name/label).

Example:

    ======= ======= =========
    sample  group   label
    ======= ======= =========
    WT1         1   control
    WT2         1   control
    WT3         1   control
    ...       ...   ...
    MUT31       2   test
    MUT32       2   test
    MUT33       2   test
    ======= ======= =========

**Gene Sets**

**Gene Sets** is an `rdata` file including a `geneSet` variable that is a `geneSetCollection` object built by the `GSEABase` bioconductor package. You can use the **GeneSet from Msigdb/KEGG** tool to get this file. You must pay attention to set the same gene id type as in the gene expression dataset.

**Method** 

Statistical method to use for testing the gene sets. Must be one of *GSNCA (Gene sets net correlations analysis)*, Wald-Wolfowitz test, Kolmogorov-Smirnov test, Mean Deviation test, Radial Kolmogorov-Smirnov test and Radial Mean Deviation test.

**Min Size for the Gene Set**

The minimum allowed gene set size. Default value is 10.

**Max Size for the Gene Set**

The maximum allowed gene set size. Default value is 500.

**Permutations number**

Number of permutations used to estimate the null distribution of the test statistic. Default value is 1000. The minumum value is 100.

**Threshold value**

Threshold value to define significant geneSet for plotting minimum spanning trees. Default as 0.05.

**Correlation coefficient statistic**

Correlation coefficient is computed to plot minimum spanning trees for a pathway in two conditions. Possible values are 'pearson' (default), 'spearman' and 'kendall'. Default value is 'pearson'. 

--------- 

==========
**Output**
==========

**1. A csv file containing the P-values of all gene sets**

Example

    ========= ========== 
    geneSet     p_value 
    ========= ========== 
    pathway_1   0.007
    pathway_2   0.008
    pathway_3   0.009
    pathway_4   0.010
    ...         ...
    pathway_n   0.999
    ========= ========== 
 
**2. Plot of minimum spanning trees for significant gene sets in two conditions**

    ]]></help>
    
    <citations>
        <citation type="doi">10.1186/s12859-017-1482-6</citation>
    </citations>

</tool>