comparison GSVA.xml @ 1:b83133fd91d5 draft default tip

Uploaded
author mora-lab
date Thu, 20 May 2021 08:30:52 +0000
parents
children
comparison
equal deleted inserted replaced
0:f94ef9b31552 1:b83133fd91d5
1 <tool id="GSVA" name="GSVA (Gene Set Variation Analysis)" version="0.1.0">
2 <description>GSVA and other three single-sample GSA methods</description>
3
4 <requirements>
5 <requirement type="package" version="1.38.0">bioconductor-GSVA</requirement>
6 <requirement type="package" version="1.0.12">r-pheatmap</requirement>
7 <requirement type="package" version="1.20.3">r-getopt</requirement>
8 </requirements>
9
10 <command detect_errors="exit_code"><![CDATA[
11 Rscript '$__tool_directory__/GSVA.R'
12 --expr '$expression_data'
13 --geneSet '$geneSet'
14 --method '$method'
15 --img_type '$imgfile.img_args.img_type'
16 --img_width '$imgfile.img_args.img_width'
17 --img_height '$imgfile.img_args.img_height'
18 --img_file '$output_img_file'
19 --GSVA_result '$GSVA_result'
20 ]]></command>
21
22 <inputs>
23 <param name="expression_data" type="data" format="csv" label="Gene expression data" help="A csv file containing a matrix of expression values where rows correspond to genes and columns correspond to samples." />
24 <param name="geneSet" type="data" format="rdata" label="Gene Sets" help="An rdata file including a 'geneSet' variable (which is a geneSetCollection object)."/>
25 <param name="method" type="select" label="Method" display="radio" help="Four single-sample methods: GSVA, ssGSEA, z-score and PLAGE. Details in the help section.">
26 <option value="gsva">GSVA</option>
27 <option value="ssgsea">ssGSEA</option>
28 <option value="zscore">z-score</option>
29 <option value="plage">PLAGE</option>
30 </param>
31
32 <section name="imgfile" title="Heatmap options" expanded="false">
33 <conditional name="img_args">
34 <param name="img_type" type="select" label="Heatmap file type">
35 <option value="PNG" selected="true">PNG</option>
36 <option value="PDF">PDF</option>
37 <option value="JPG">JPG</option>
38 </param>
39
40 <when value="PNG">
41 <param name="img_width" type="integer" value="480" min="480" label="Img width(px)" />
42 <param name="img_height" type="integer" value="480" min="480" label="Img height(px)" />
43 </when>
44
45 <when value="JPG">
46 <param name="img_width" type="integer" value="480" min="480" label="Img width(px)" />
47 <param name="img_height" type="integer" value="480" min="480" label="Img height(px)" />
48 </when>
49
50 <when value="PDF">
51 <param name="img_width" type="integer" value="7" min="7" label="Img width(inches)" />
52 <param name="img_height" type="integer" value="7" min="7" label="Img height(inches)" />
53 </when>
54
55 </conditional>
56 </section>
57
58 </inputs>
59
60 <outputs>
61 <data name="GSVA_result" format="csv" label="GSVA_enrich_result" />
62 <data format="pdf" name="output_img_file" label="GSVA_heatmap">
63 <change_format>
64 <when input="imgfile.img_args.img_type" value="PNG" format="png"/>
65 <when input="imgfile.img_args.img_type" value="JPG" format="jpg"/>
66 </change_format>
67 </data>
68 </outputs>
69
70 <tests>
71 <test>
72 <param name="expression_data" value="gsva_input2_GSE10245.csv" ftype="csv" />
73 <param name="geneSet" value="GeneSet_from_Msigdb_KEGG.rdata" ftype="rdata" />
74 <param name="method" value="gsva" />
75 <section name="imgfile">
76 <conditional name="img_args">
77 <param name="img_type" value="PNG" />
78 <param name="img_width" value="480" />
79 <param name="img_height" value="480" />
80 </conditional>
81 </section>
82 <output name="GSVA_result" file="GSVA_enrich_result.csv" ftype="csv" />
83 <output name="output_img_file" file="GSVA_heatmap.png" ftype="png" />
84 </test>
85 </tests>
86
87 <help><![CDATA[
88
89 .. class:: infomark
90
91 **What it does**
92
93 **GSVA** is a Gene Set Analysis R package that estimates variation of pathway activity over a sample population in an unsupervised manner. This tool includes four methods to analyze microarray and RNA-seq data.
94
95 - **GSVA**: Gene Set Variation Analysis (GSVA) calculates sample-wise gene set enrichment scores as a function of genes inside and outside the gene set, analogously to a competitive gene set test. And it estimates variation of gene set enrichment over the samples independently of any class label.
96
97 - **PLAGE**: Pathway Level analysis of Gene Expression (PLAGE) standardizes each gene expression profile over the samples and then estimates the pathway activity profiles for each gene set as the coefficients of the first right-singular vector of the singular value.
98
99 - **z-score**: The combined z-score method also standardizes each gene expression profile into z-scores and combine the individual gene z-scores per sample to pathway activity profile.
100
101 - **ssGSEA**: The ssGSEA method uses the difference in empirical cumulative distribution functions of gene expression rank inside and outside the gene set to calculate an enrichment statistic per sample which is further normalized by the range of values taken throughout all gene sets and samples.
102
103 --------
104
105 =========
106 **Input**
107 =========
108
109 **Gene expression data**
110
111 A csv file including a matrix of expression values where rows correspond to genes and columns correspond to samples. Recommended gene id is Entrez ID.
112
113 **Gene Sets**
114
115 **Gene Sets** is an `rdata` file including a 'geneSet' variable (which is a geneSetCollection object built by the `GSEABase` package). You can use the **GeneSet from Msigdb/KEGG** tool to get this file. You must pay attention to set the same gene id type as in the gene expression dataset.
116
117 **Method**
118
119 Method to be used in the estimation of gene set enrichment scores per sample. By default this is set to `GSVA` but other options are `ssGSEA`, `z-score` and `PLAGE`. The latter two first standardize expression profiles into z-scores over the samples and, in the case of zscore, it combines them together as their sum divided by the square-root of the size of the gene set, while in the case of PLAGE they are used to calculate the singular value decomposition (SVD) over the genes in the gene set and use the coefficients of the first right-singular vector as pathway activity profile.
120
121 --------
122
123 ==========
124 **Output**
125 ==========
126
127 **1. A gene-set by sample matrix of enrichment scores**
128
129 ========= ========== ======== ======== ======== ==== =========
130 geneSet sample_1 sample_2 sample_3 sample_4 ... sample_n
131 ========= ========== ======== ======== ======== ==== =========
132 pathway_1
133 pathway_2
134 pathway_3
135 pathway_4
136 ...
137 pathway_n
138 ========= ========== ======== ======== ======== ==== =========
139
140
141 **2. A heatmap for the matrix of enrichment scores**
142
143 You can define the heatmap file type, width and height in the tool's input.
144
145 ]]></help>
146
147 <citations>
148 <citation type="doi">10.1186/1471-2105-14-7</citation>
149 </citations>
150
151 </tool>