annotate GSAR.xml @ 0:f0cad4d3a301 draft

Uploaded
author mora-lab
date Thu, 20 May 2021 08:22:23 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
1 <tool id="GSAR" name="GSAR" version="0.1.0">
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
2 <description>A set of multivariate statistical tests for self-contained gene set analysis</description>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
3
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
4 <requirements>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
5 <requirement type="package" version="1.24.0">bioconductor-GSAR</requirement>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
6 <requirement type="package" version="1.52.1">bioconductor-GSEABase</requirement>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
7 <requirement type="package" version="1.20.3">r-getopt</requirement>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
8 </requirements>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
9
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
10 <command detect_errors="exit_code"><![CDATA[
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
11 Rscript '$__tool_directory__/GSAR.R'
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
12 --expr_file '$expression_data_file'
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
13 --geneSet_file '$geneSet'
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
14 --design_file '$desigin'
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
15 --min_size '$adv.min_size'
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
16 --max_size '$adv.max_size'
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
17 --test_method '$method'
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
18 --nperm_number '$adv.perm_num'
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
19 --threshold_value '$MST.threshold'
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
20 --cor_method '$MST.cor_method'
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
21 --GSAR_output_p_value '$GSAR_p_value_for_the_geneSet'
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
22 --GSAR_output_plot '$GSAR_Significant_pathway_plot'
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
23 ]]></command>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
24
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
25 <inputs>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
26 <param name="expression_data_file" type="data" format="CSV" label="Expression data file" help="A csv file containing a matrix of expression values where rows correspond to genes (symbol ID) and columns correspond to samples."/>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
27 <param name="desigin" type="data" format="CSV" label="Design" help="A csv file containing two columns corresponding to samples, one is 'group' (which sets 1 for group1 and 2 for group2), the other one is 'label' (to set group1 and group2 name/label)."/>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
28 <param name="geneSet" type="data" format="rdata" label="Gene Set" help="An `rdata` file including a geneSetCollection object with 'geneSet' as name."/>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
29 <param name="method" type="select" label="Method" help="Statistical method for testing the gene sets.">
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
30 <option value="GSNCAtest" selected="true">Gene sets net correlations analysis</option>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
31 <option value="WWtest">Wald-Wolfowitz test</option>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
32 <option value="KStest">Kolmogorov-Smirnov test</option>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
33 <option value="MDtest">Mean Deviation tests</option>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
34 <option value="RKStest">Radial Kolmogorov-Smirnov test</option>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
35 <option value="RMDtest">Radial Mean Deviation test</option>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
36 </param>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
37
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
38 <section name="adv" title="Advanced options">
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
39 <param name="min_size" type="integer" value="10" min="5" label="Min Size for the GeneSet" help="The minimum allowed gene set size. Default value is 10." />
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
40 <param name="max_size" type="integer" value="500" label="Max Size for the GeneSet" help="The maximum allowed gene set size. Default value is 500." />
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
41 <param name="perm_num" type="integer" value="1000" min="100" label="Permutations number" help="Number of permutations used to estimate the null distribution of the test statistic. Default value is 1000. The minumum value is 100." />
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
42 </section>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
43
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
44 <section name="MST" title="Option for plotting minimum spanning trees" >
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
45 <param name="threshold" type="float" value="0.05" min="0.0001" max="1" label="Threshold value" help="Threshold value to define significant geneSet for plot minimum spanning trees. Default is 0.05." />
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
46 <param name="cor_method" type="select" label="Correlation coefficient statistic" help="Correlation coefficient is computed while plotting minimum spanning trees for a pathway in two conditions. Possible values are 'pearson', 'spearman' and 'kendall'. Default value is 'pearson'. " >
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
47 <option value="pearson" selected="true">pearson</option>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
48 <option value="spearman">spearman</option>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
49 <option value="kendall">kendall</option>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
50 </param>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
51 </section>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
52
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
53 </inputs>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
54
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
55 <outputs>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
56 <data name="GSAR_p_value_for_the_geneSet" format="CSV" label="GSAR_p_value_for_the_geneSet" />
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
57 <data name="GSAR_Significant_pathway_plot" format="pdf" label="GSAR_Significant_pathway_plot" />
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
58 </outputs>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
59
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
60 <tests>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
61 <test>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
62 <param name="expression_data_file" value="GSAR_input_p53DataSet.csv" ftype="csv" />
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
63 <param name="desigin" value="GSAR_design.csv" ftype="csv" />
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
64 <param name="method" value="GSNCAtest" />
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
65 <section name="adv">
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
66 <param name="min_size" value="10" />
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
67 <param name="max_size" value="500" />
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
68 <param name="perm_num" value="1000"/>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
69 </section>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
70 <section name="MST">
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
71 <param name="threshold" value="0.05" />
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
72 <param name="cor_method" value="pearson" />
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
73 </section>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
74 <output name="GSAR_p_value_for_the_geneSet" file="GSAR_p_value_for_the_geneSet.csv" ftype="csv" />
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
75 <output name="GSAR_Significant_pathway_plot" file="GSAR_Significant_pathway_plot.pdf" ftype="pdf" />
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
76 </test>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
77 </tests>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
78
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
79 <help><![CDATA[
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
80
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
81 .. class:: infomark
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
82
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
83 **What it does**
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
84
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
85 **GSAR (Gene Set Analysis in R)** is an R package which provides a set of multivariate statistical tests for self-contained gene set analysis (GSA). GSAR consists of two-sample multivariate nonparametric statistical methods testing a null hypothesis against specific alternative hypotheses, such as differences in mean (shift), variance (scale) or correlation structure. It also offers a graphical visualization tool for the correlation networks obtained from expression data to examine the change in the net correlation structure of a gene set between two conditions based on the minimum spanning trees.
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
86
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
87 ---------
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
88
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
89 =========
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
90 **Input**
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
91 =========
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
92
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
93 **Gene expression data**
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
94
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
95 The input is a csv file including a matrix of expression values where rows correspond to genes and columns correspond to samples.
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
96 Recommended gene id is `Symbol ID`.
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
97
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
98 **Design**
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
99
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
100 A csv file that has two columns correspond to samples, one is `'group'` (which sets 1 for group1 and 2 for group2), the other one is `'label'` (to set group1 and group2 name/label).
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
101
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
102 Example:
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
103
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
104 ======= ======= =========
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
105 sample group label
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
106 ======= ======= =========
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
107 WT1 1 control
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
108 WT2 1 control
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
109 WT3 1 control
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
110 ... ... ...
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
111 MUT31 2 test
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
112 MUT32 2 test
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
113 MUT33 2 test
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
114 ======= ======= =========
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
115
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
116 **Gene Sets**
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
117
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
118 **Gene Sets** is an `rdata` file including a `geneSet` variable that is a `geneSetCollection` object built by the `GSEABase` bioconductor package. You can use the **GeneSet from Msigdb/KEGG** tool to get this file. You must pay attention to set the same gene id type as in the gene expression dataset.
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
119
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
120 **Method**
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
121
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
122 Statistical method to use for testing the gene sets. Must be one of *GSNCA (Gene sets net correlations analysis)*, Wald-Wolfowitz test, Kolmogorov-Smirnov test, Mean Deviation test, Radial Kolmogorov-Smirnov test and Radial Mean Deviation test.
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
123
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
124 **Min Size for the Gene Set**
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
125
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
126 The minimum allowed gene set size. Default value is 10.
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
127
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
128 **Max Size for the Gene Set**
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
129
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
130 The maximum allowed gene set size. Default value is 500.
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
131
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
132 **Permutations number**
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
133
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
134 Number of permutations used to estimate the null distribution of the test statistic. Default value is 1000. The minumum value is 100.
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
135
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
136 **Threshold value**
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
137
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
138 Threshold value to define significant geneSet for plotting minimum spanning trees. Default as 0.05.
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
139
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
140 **Correlation coefficient statistic**
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
141
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
142 Correlation coefficient is computed to plot minimum spanning trees for a pathway in two conditions. Possible values are 'pearson' (default), 'spearman' and 'kendall'. Default value is 'pearson'.
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
143
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
144 ---------
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
145
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
146 ==========
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
147 **Output**
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
148 ==========
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
149
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
150 **1. A csv file containing the P-values of all gene sets**
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
151
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
152 Example
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
153
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
154 ========= ==========
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
155 geneSet p_value
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
156 ========= ==========
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
157 pathway_1 0.007
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
158 pathway_2 0.008
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
159 pathway_3 0.009
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
160 pathway_4 0.010
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
161 ... ...
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
162 pathway_n 0.999
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
163 ========= ==========
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
164
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
165 **2. Plot of minimum spanning trees for significant gene sets in two conditions**
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
166
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
167 ]]></help>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
168
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
169 <citations>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
170 <citation type="doi">10.1186/s12859-017-1482-6</citation>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
171 </citations>
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
172
f0cad4d3a301 Uploaded
mora-lab
parents:
diff changeset
173 </tool>