Mercurial > repos > mora-lab > gsar
changeset 0:f0cad4d3a301 draft
Uploaded
author | mora-lab |
---|---|
date | Thu, 20 May 2021 08:22:23 +0000 |
parents | |
children | 8ff053661ae2 |
files | GSAR.xml |
diffstat | 1 files changed, 173 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GSAR.xml Thu May 20 08:22:23 2021 +0000 @@ -0,0 +1,173 @@ +<tool id="GSAR" name="GSAR" version="0.1.0"> + <description>A set of multivariate statistical tests for self-contained gene set analysis</description> + + <requirements> + <requirement type="package" version="1.24.0">bioconductor-GSAR</requirement> + <requirement type="package" version="1.52.1">bioconductor-GSEABase</requirement> + <requirement type="package" version="1.20.3">r-getopt</requirement> + </requirements> + + <command detect_errors="exit_code"><![CDATA[ + Rscript '$__tool_directory__/GSAR.R' + --expr_file '$expression_data_file' + --geneSet_file '$geneSet' + --design_file '$desigin' + --min_size '$adv.min_size' + --max_size '$adv.max_size' + --test_method '$method' + --nperm_number '$adv.perm_num' + --threshold_value '$MST.threshold' + --cor_method '$MST.cor_method' + --GSAR_output_p_value '$GSAR_p_value_for_the_geneSet' + --GSAR_output_plot '$GSAR_Significant_pathway_plot' + ]]></command> + + <inputs> + <param name="expression_data_file" type="data" format="CSV" label="Expression data file" help="A csv file containing a matrix of expression values where rows correspond to genes (symbol ID) and columns correspond to samples."/> + <param name="desigin" type="data" format="CSV" label="Design" help="A csv file containing two columns corresponding to samples, one is 'group' (which sets 1 for group1 and 2 for group2), the other one is 'label' (to set group1 and group2 name/label)."/> + <param name="geneSet" type="data" format="rdata" label="Gene Set" help="An `rdata` file including a geneSetCollection object with 'geneSet' as name."/> + <param name="method" type="select" label="Method" help="Statistical method for testing the gene sets."> + <option value="GSNCAtest" selected="true">Gene sets net correlations analysis</option> + <option value="WWtest">Wald-Wolfowitz test</option> + <option value="KStest">Kolmogorov-Smirnov test</option> + <option value="MDtest">Mean Deviation tests</option> + <option value="RKStest">Radial Kolmogorov-Smirnov test</option> + <option value="RMDtest">Radial Mean Deviation test</option> + </param> + + <section name="adv" title="Advanced options"> + <param name="min_size" type="integer" value="10" min="5" label="Min Size for the GeneSet" help="The minimum allowed gene set size. Default value is 10." /> + <param name="max_size" type="integer" value="500" label="Max Size for the GeneSet" help="The maximum allowed gene set size. Default value is 500." /> + <param name="perm_num" type="integer" value="1000" min="100" label="Permutations number" help="Number of permutations used to estimate the null distribution of the test statistic. Default value is 1000. The minumum value is 100." /> + </section> + + <section name="MST" title="Option for plotting minimum spanning trees" > + <param name="threshold" type="float" value="0.05" min="0.0001" max="1" label="Threshold value" help="Threshold value to define significant geneSet for plot minimum spanning trees. Default is 0.05." /> + <param name="cor_method" type="select" label="Correlation coefficient statistic" help="Correlation coefficient is computed while plotting minimum spanning trees for a pathway in two conditions. Possible values are 'pearson', 'spearman' and 'kendall'. Default value is 'pearson'. " > + <option value="pearson" selected="true">pearson</option> + <option value="spearman">spearman</option> + <option value="kendall">kendall</option> + </param> + </section> + + </inputs> + + <outputs> + <data name="GSAR_p_value_for_the_geneSet" format="CSV" label="GSAR_p_value_for_the_geneSet" /> + <data name="GSAR_Significant_pathway_plot" format="pdf" label="GSAR_Significant_pathway_plot" /> + </outputs> + + <tests> + <test> + <param name="expression_data_file" value="GSAR_input_p53DataSet.csv" ftype="csv" /> + <param name="desigin" value="GSAR_design.csv" ftype="csv" /> + <param name="method" value="GSNCAtest" /> + <section name="adv"> + <param name="min_size" value="10" /> + <param name="max_size" value="500" /> + <param name="perm_num" value="1000"/> + </section> + <section name="MST"> + <param name="threshold" value="0.05" /> + <param name="cor_method" value="pearson" /> + </section> + <output name="GSAR_p_value_for_the_geneSet" file="GSAR_p_value_for_the_geneSet.csv" ftype="csv" /> + <output name="GSAR_Significant_pathway_plot" file="GSAR_Significant_pathway_plot.pdf" ftype="pdf" /> + </test> + </tests> + + <help><![CDATA[ + +.. class:: infomark + +**What it does** + + **GSAR (Gene Set Analysis in R)** is an R package which provides a set of multivariate statistical tests for self-contained gene set analysis (GSA). GSAR consists of two-sample multivariate nonparametric statistical methods testing a null hypothesis against specific alternative hypotheses, such as differences in mean (shift), variance (scale) or correlation structure. It also offers a graphical visualization tool for the correlation networks obtained from expression data to examine the change in the net correlation structure of a gene set between two conditions based on the minimum spanning trees. + +--------- + +========= +**Input** +========= + +**Gene expression data** + +The input is a csv file including a matrix of expression values where rows correspond to genes and columns correspond to samples. +Recommended gene id is `Symbol ID`. + +**Design** + +A csv file that has two columns correspond to samples, one is `'group'` (which sets 1 for group1 and 2 for group2), the other one is `'label'` (to set group1 and group2 name/label). + +Example: + + ======= ======= ========= + sample group label + ======= ======= ========= + WT1 1 control + WT2 1 control + WT3 1 control + ... ... ... + MUT31 2 test + MUT32 2 test + MUT33 2 test + ======= ======= ========= + +**Gene Sets** + +**Gene Sets** is an `rdata` file including a `geneSet` variable that is a `geneSetCollection` object built by the `GSEABase` bioconductor package. You can use the **GeneSet from Msigdb/KEGG** tool to get this file. You must pay attention to set the same gene id type as in the gene expression dataset. + +**Method** + +Statistical method to use for testing the gene sets. Must be one of *GSNCA (Gene sets net correlations analysis)*, Wald-Wolfowitz test, Kolmogorov-Smirnov test, Mean Deviation test, Radial Kolmogorov-Smirnov test and Radial Mean Deviation test. + +**Min Size for the Gene Set** + +The minimum allowed gene set size. Default value is 10. + +**Max Size for the Gene Set** + +The maximum allowed gene set size. Default value is 500. + +**Permutations number** + +Number of permutations used to estimate the null distribution of the test statistic. Default value is 1000. The minumum value is 100. + +**Threshold value** + +Threshold value to define significant geneSet for plotting minimum spanning trees. Default as 0.05. + +**Correlation coefficient statistic** + +Correlation coefficient is computed to plot minimum spanning trees for a pathway in two conditions. Possible values are 'pearson' (default), 'spearman' and 'kendall'. Default value is 'pearson'. + +--------- + +========== +**Output** +========== + +**1. A csv file containing the P-values of all gene sets** + +Example + + ========= ========== + geneSet p_value + ========= ========== + pathway_1 0.007 + pathway_2 0.008 + pathway_3 0.009 + pathway_4 0.010 + ... ... + pathway_n 0.999 + ========= ========== + +**2. Plot of minimum spanning trees for significant gene sets in two conditions** + + ]]></help> + + <citations> + <citation type="doi">10.1186/s12859-017-1482-6</citation> + </citations> + +</tool> \ No newline at end of file