comparison egsea.xml @ 0:a8a083193440 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/egsea commit 7d0c7d850cd56ea3e54d8c03266f719241b20b8b
author iuc
date Thu, 25 Jan 2018 02:23:23 -0500
parents
children 73281fbdf6c1
comparison
equal deleted inserted replaced
-1:000000000000 0:a8a083193440
1 <tool id="egsea" name="EGSEA" version="1.6.0.0">
2 <description> easy and efficient ensemble gene set testing</description>
3 <requirements>
4 <requirement type="package" version="1.6.0">bioconductor-egsea</requirement>
5 <requirement type="package" version="1.4.4">r-optparse</requirement>
6 <requirement type="package" version="0.2.15">r-rjson</requirement>
7 </requirements>
8 <version_command><![CDATA[
9 echo $(R --version | grep version | grep -v GNU)", EGSEA version" $(R --vanilla --slave -e "library(EGSEA); cat(sessionInfo()\$otherPkgs\$EGSEA\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rjson version" $(R --vanilla --slave -e "library(rjson); cat(sessionInfo()\$otherPkgs\$rjson\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
10 ]]></version_command>
11 <command detect_errors="exit_code"><![CDATA[
12 ## EGSEA requires at least 2 threads
13 SLOTS=\${GALAXY_SLOTS:-2};
14 [ "\$SLOTS" -eq 1 ] && SLOTS=2;
15
16 mkdir ./report_dir &&
17 mkdir '$outReport.extra_files_path' &&
18
19 Rscript '$__tool_directory__/egsea.R'
20
21 --threads \$SLOTS
22
23 #if $input.format=="files":
24
25 ## Adapted from DESeq2 wrapper
26 #import json
27 #set $temp_factor_names = list()
28 #for $fact in $input.rep_factor:
29 #set $temp_factor = list()
30 #for $g in $fact.rep_group:
31 #set $count_files = list()
32 #for $file in $g.countsFile:
33 $count_files.append(str($file))
34 #end for
35 $temp_factor.append( {str($g.groupName): $count_files} )
36 #end for
37
38 $temp_factor.reverse()
39 $temp_factor_names.append([str($fact.factorName), $temp_factor])
40 #end for
41 --filesPath '#echo json.dumps(temp_factor_names)#'
42
43 #elif $input.format=="matrix":
44 --matrixPath '$input.counts'
45 #if $input.fact.ffile=='yes':
46 --factFile '$input.fact.finfo'
47 #else:
48 --factInput '${ '|'.join( ['%s::%s' % ($x.factorName, $x.groupNames) for x in $input.fact.rep_factor] ) }'
49 #end if
50 #end if
51
52 --contrastData '${ ','.join( ['%s' % $x.contrast for x in $rep_contrast] ) }'
53
54 --genes '$genes'
55 --species $species
56
57 --base_methods $base_methods
58 --msigdb $msigdb.msigdb_gsets
59 --keggdb $keggdb.keggdb_gsets
60 --gsdb $gsdb.gsdb_gsets
61
62 --display_top $advanced.display_top
63 --min_size $advanced.min_size
64 --fdr_cutoff $advanced.fdr_cutoff
65 --combine_method $advanced.combine_method
66 --sort_method $advanced.sort_method
67
68 --rdaOpt $advanced.rdaOpt
69
70 && cp ./report_dir/index.html '$outReport'
71 && cp -r ./report_dir/* '$outReport.extra_files_path'
72
73 #if $advanced.rscriptOpt:
74 && cp '$__tool_directory__/egsea.R' '$outRscript'
75 #end if
76
77 ]]></command>
78 <inputs>
79
80 <!-- Counts and Factors -->
81 <conditional name="input">
82 <param name="format" type="select" label="Count Files or Matrix?"
83 help="You can choose to input either separate count files (one per sample) or a single count matrix">
84 <option value="files">Separate Count Files</option>
85 <option value="matrix">Single Count Matrix</option>
86 </param>
87
88 <when value="files">
89 <repeat name="rep_factor" title="Factor" min="1">
90 <param name="factorName" type="text" label="Name" help="Name of experiment factor of interest (e.g. Genotype). One factor must be entered and there must be two or more groups per factor. Optional additional factors (e.g. Batch) can be entered using the Insert Factor button below, see Help section for more information. NOTE: Please only use letters, numbers or underscores.">
91 <sanitizer>
92 <valid initial="string.letters,string.digits"><add value="_" /></valid>
93 </sanitizer>
94 </param>
95 <repeat name="rep_group" title="Group" min="2" default="2">
96 <param name="groupName" type="text" label="Name"
97 help="Name of group that the counts files(s) belong to (e.g. WT or Mut). NOTE: Please only use letters, numbers or underscores (case sensitive).">
98 <sanitizer>
99 <valid initial="string.letters,string.digits"><add value="_" /></valid>
100 </sanitizer>
101 </param>
102 <param name="countsFile" type="data" format="tabular" multiple="true" label="Counts file(s)"/>
103 </repeat>
104 </repeat>
105 </when>
106
107 <when value="matrix">
108 <param name="counts" type="data" format="tabular" label="Count Matrix"/>
109
110 <conditional name="fact">
111 <param name="ffile" type="select" label="Input factor information from file?"
112 help="You can choose to input the factor and group information for the samples from a file or manually enter below.">
113 <option value="no">No</option>
114 <option value="yes">Yes</option>
115 </param>
116 <when value="yes">
117 <param name="finfo" type="data" format="tabular" label="Factor File"/>
118 </when>
119 <when value="no" >
120 <repeat name="rep_factor" title="Factor" min="1">
121 <param name="factorName" type="text" label="Factor Name"
122 help="Name of experiment factor of interest (e.g. Genotype). One factor must be entered and there must be two or more groups per factor. Additional factors (e.g. Batch) can be entered using the Insert Factor button below, see Help section below. NOTE: Please only use letters, numbers or underscores.">
123 <validator type="empty_field" />
124 <validator type="regex" message="Please only use letters, numbers or underscores">^[\w]+$</validator>
125 </param>
126 <param name="groupNames" type="text" label="Groups"
127 help="Enter the group names for the samples separated with commas e.g. WT,WT,WT,Mut,Mut,Mut. The order of the names must match the order of the samples in the columns of the count matrix. NOTE: Please only use letters, numbers or underscores (case sensitive).">
128 <validator type="empty_field" />
129 <validator type="regex" message="Please only use letters, numbers or underscores, and separate levels by commas">^[\w,]+$</validator>
130 </param>
131 </repeat>
132 </when>
133 </conditional>
134 </when>
135 </conditional>
136
137 <!-- Contrasts -->
138 <repeat name="rep_contrast" title="Contrast" min="1" default="1">
139 <param name="contrast" type="text" label="Contrast of Interest" help="Names of two groups to compare separated by a hyphen e.g. Mut-WT. If the order is Mut-WT the fold changes in the results will be up/down in Mut relative to WT. If you have more than one contrast enter each separately using the Insert Contrast button below. For more info, see Chapter 8 in the limma User's guide: https://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf">
140 <validator type="empty_field" />
141 <validator type="regex" message="Please only use letters, numbers or underscores">^[\w-]+$</validator>
142 </param>
143 </repeat>
144
145 <param name="genes" type="data" format="tabular"
146 label="Symbols Mapping file"
147 help="A file of Entrez Gene IDs mapped to Gene symbols in the format shown in the Help section below."/>
148
149 <param name="species" type="select" label="Species" help="Default: Human">
150 <option value="human" selected="True">Human</option>
151 <option value="mouse">Mouse</option>
152 <option value="rat">Rat</option>
153 </param>
154
155 <param name="base_methods" type="select" display="checkboxes" multiple="True" min="3" label="Gene Set Testing Methods" help="Select at least 3 gene set testing methods">
156 <option value="camera" selected="True">camera</option>
157 <option value="safe">safe</option>
158 <option value="gage">gage</option>
159 <option value="zscore">zscore</option>
160 <option value="gsva">gsva</option>
161 <option value="globaltest" selected="True">globaltest</option>
162 <option value="ora" selected="True">ora</option>
163 <option value="ssgsea">ssgsea</option>
164 <option value="padog">padog</option>
165 <option value="plage">plage</option>
166 <option value="fry">fry</option>
167 <option value="roast">roast</option>
168 </param>
169
170 <section name="msigdb" title="MSigDB Gene Sets" expanded="True">
171 <param name="msigdb_gsets" type="select" display="checkboxes" optional="True" multiple="True" label="MSigDB Gene Set Collections" help="Choose any MSigDB Gene Set Collections you want to use. Default: H: hallmark gene sets">
172 <option value="h" selected="True">H: hallmark gene sets</option>
173 <option value="c1">C1: positional gene sets (human only)</option>
174 <option value="c2">C2: curated gene sets</option>
175 <option value="c3">C3: motif gene sets</option>
176 <option value="c4">C4: computational gene sets</option>
177 <option value="c5">C5: GO gene sets</option>
178 <option value="c6">C6: oncogenic gene sets</option>
179 <option value="c7">C7: immunologic gene sets</option>
180 </param>
181 </section>
182
183 <section name="keggdb" title="KEGG Pathways" expanded="True">
184 <param name="keggdb_gsets" type="select" display="checkboxes" optional="True" multiple="True" label="KEGG Pathways" help="Choose any KEGG Pathways you want to use. Default: None">
185 <option value="keggmet">Metabolism pathways</option>
186 <option value="keggsig">Signalling pathways</option>
187 <option value="keggdis">Disease pathways</option>
188 </param>
189 </section>
190
191 <section name="gsdb" title="GeneSetDB Gene Sets" expanded="True">
192 <param name="gsdb_gsets" type="select" display="checkboxes" optional="True" multiple="True" label="GeneSigDB Gene Set Collections" help="Choose any GeneSetDB Gene Set Collections you want to use. Default: None">
193 <option value="gsdbpath">Pathway collection</option>
194 <option value="gsdbdis">Disease/Phenotype collection</option>
195 <option value="gsdbdrug">Drug/Chemical collection</option>
196 <option value="gsdbreg">Gene Regulation collection</option>
197 <option value="gsdbgo">Gene Ontology collection</option>
198 </param>
199 </section>
200
201 <section name="advanced" title="Advanced Options">
202 <param name="display_top" type="integer" value="5" min="1" max="20" label="Top Gene Sets to display" help="Set the number of top gene sets to display. Increasing this number increases the time to run, in order to generate the additional plots etc."/>
203 <param name="min_size" type="integer" min="0" value="2" label="Minimum Size of Gene Set" help="Minimum size of a gene set to be included in the analysis. Default: 2" />
204 <param name="fdr_cutoff" type="float" value="0.05" min="0" max="1" label="FDR cutoff" help="Cut-off threshold of differentially expressed genes used for the calculation of Significance Score and Regulation Direction. Default: 0.05"/>
205 <param name="combine_method" type="select" label="Combine Method" help="Method to use to combine the p-values from the different gene set testing methods. Default: wilkinson">
206 <option value="wilkinson" selected="True">wilkinson</option>
207 <option value="fisher">fisher</option>
208 <option value="average">average</option>
209 <option value="logitp">logitp</option>
210 <option value="sump">sump</option>
211 <option value="sumz">sumz</option>
212 <option value="votep">votep</option>
213 <option value="median">median</option>
214 </param>
215 <param name="sort_method" type="select" label="Sort Method" help="Select method to sort the results. Any of EGSEA’s combined scores or the rankings from individual base methods can be used for sorting the results. Default: med.rank">
216 <option value="p.adj">p.adj</option>
217 <option value="p.value">p.value</option>
218 <option value="vote.rank">vote.rank</option>
219 <option value="avg.rank">avg.rank</option>
220 <option value="med.rank" selected="True">med.rank</option>
221 <option value="min.pvalue">min.pvalue</option>
222 <option value="min.rank">min.rank</option>
223 <option value="avg.logfc">avg.logfc</option>
224 <option value="avg.logfc.dir">avg.logfc.dir</option>
225 <option value="direction">direction</option>
226 <option value="significance">significance</option>
227 <option value="camera">camera</option>
228 <option value="roast">roast</option>
229 <option value="safe" >safe</option>
230 <option value="gage">gage</option>
231 <option value="padog">padog</option>
232 <option value="plage">plage</option>
233 <option value="zscore">zscore</option>
234 <option value="gsva">gsva</option>
235 <option value="ssgsea">ssgsea</option>
236 <option value="globaltest">globaltest</option>
237 <option value="ora">ora</option>
238 <option value="fry">fry</option>
239 </param>
240 <param name="rscriptOpt" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used will be provided as a text file in the output. Default: No"/>
241 <param name="rdaOpt" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output RData file?" help="Output all the data used by R in the analysis, can be loaded into R. Default: No" />
242 </section>
243 <param name="non_commercial_use" label="I certify that I am not using this tool for commercial purposes." type="boolean" truevalue="NON_COMMERCIAL_USE" falsevalue="COMMERCIAL_USE" checked="False">
244 <validator type="expression" message="This tool is only available for non-commercial use.">value == True</validator>
245 </param>
246 </inputs>
247
248 <outputs>
249 <data name="outReport" format="html" label="${tool.name} on ${on_string}: Report"/>
250 <collection name="outTables" type="list" label="${tool.name} on ${on_string}: Tables">
251 <discover_datasets pattern="(?P&lt;name&gt;.+)\.txt$" format="tabular" directory="report_dir/ranked-gene-sets-base" visible="false" />
252 </collection>
253 <data name="outRscript" format="txt" from_work_dir="*.txt" label="${tool.name} on ${on_string}: Rscript">
254 <filter>advanced['rscriptOpt'] is True</filter>
255 </data>
256 <data name="outRdata" format="rdata" from_work_dir="EGSEA_analysis.RData" label="${tool.name} on ${on_string}: RData file">
257 <filter>advanced['rdaOpt'] is True</filter>
258 </data>
259 </outputs>
260
261 <tests>
262 <!-- Ensure report is output -->
263 <test expect_num_outputs="1">
264 <param name="non_commercial_use" value="True"/>
265 <param name="format" value="matrix" />
266 <param name="counts" value="il13.counts"/>
267 <param name="genes" value="il13.genes"/>
268 <repeat name="rep_factor">
269 <param name="factorName" value="Treatment"/>
270 <param name="groupNames" value="IL13,IL13Ant,IL13,IL13,IL13Ant"/>
271 </repeat>
272 <repeat name="rep_contrast">
273 <param name="contrast" value="IL13Ant-IL13"/>
274 </repeat>
275 <output_collection name="outTables" count="1">
276 <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13.txt"/>
277 </output_collection>
278 <output name="outReport">
279 <assert_contents>
280 <has_text text="Gene Set Testing Report"/>
281 </assert_contents>
282 </output>
283 </test>
284 <!-- Ensure factors file input works and Rscript is output-->
285 <test expect_num_outputs="2">
286 <param name="non_commercial_use" value="True"/>
287 <param name="format" value="matrix"/>
288 <param name="counts" value="il13.counts"/>
289 <param name="genes" value="il13.genes"/>
290 <param name="ffile" value="yes"/>
291 <param name="finfo" value="il13.group"/>
292 <repeat name="rep_contrast">
293 <param name="contrast" value="IL13Ant-IL13" />
294 </repeat>
295 <param name="rscriptOpt" value="True"/>
296 <output name="outReport">
297 <assert_contents>
298 <has_text text="Gene Set Testing Report"/>
299 </assert_contents>
300 </output>
301 <output_collection name="outTables" count="1">
302 <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13.txt"/>
303 </output_collection>
304 <output name="outRscript" value="out_rscript.txt"/>
305 </test>
306 <!-- Ensure two contrasts works -->
307 <test expect_num_outputs="1">
308 <param name="non_commercial_use" value="True"/>
309 <param name="format" value="matrix"/>
310 <param name="counts" value="il13.counts"/>
311 <param name="genes" value="il13.genes"/>
312 <param name="ffile" value="yes"/>
313 <param name="finfo" value="il13.group"/>
314 <repeat name="rep_contrast">
315 <param name="contrast" value="IL13Ant-IL13"/>
316 </repeat>
317 <repeat name="rep_contrast">
318 <param name="contrast" value="IL13-IL13Ant"/>
319 </repeat>
320 <output_collection name="outTables" count="3">
321 <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13.txt"/>
322 <element name="ranked-h-gene-sets-IL13-IL13Ant" ftype="tabular" file="ranked-h-gene-sets-IL13-IL13Ant.txt"/>
323 <element name="ranked-h-gene-sets-IL13-IL13Ant" ftype="tabular" file="ranked-h-gene-sets-compare.txt"/>
324 </output_collection>
325 </test>
326 <!-- Ensure two factors works -->
327 <test expect_num_outputs="1">
328 <param name="non_commercial_use" value="True"/>
329 <param name="format" value="matrix"/>
330 <param name="counts" value="il13.counts"/>
331 <param name="genes" value="il13.genes"/>
332 <param name="ffile" value="yes"/>
333 <param name="finfo" value="il13.group_batch"/>
334 <repeat name="rep_contrast">
335 <param name="contrast" value="IL13Ant-IL13"/>
336 </repeat>
337 <output_collection name="outTables" count="1">
338 <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13_batch.txt"/>
339 </output_collection>
340 </test>
341 </tests>
342
343 <help><![CDATA[
344
345 .. class:: infomark
346
347 **What it does**
348
349 EGSEA_, an acronym for *Ensemble of Gene Set Enrichment Analyses*, is a `Bioconductor package`_ that utilizes the analysis results of eleven prominent GSE algorithms from the literature to calculate collective significance scores for gene sets. These methods are currently: **ora, globaltest, plage, safe, zscore, gage, ssgsea, roast, fry, padog, camera, gsva**. The ora, gage, camera and gsva methods depend on a competitive null hypothesis while the remaining seven methods are based on a self-contained hypothesis. EGSEA’s gene set database, the **EGSEAdata** Bioconductor package, contains around 25,000 gene sets from 16 collections from MSigDB_, KEGG_ and GeneSetDB_. Supported organisms are human, mouse and rat, however MSigDB is only available for human and mouse. An example `EGSEA workflow`_ is available at the Bioconductor workflows website.
350
351 Currently the **egsea.cnt** function is implemented in this tool. This function takes a raw RNA-Seq count matrix and uses **limma-voom** with TMM normalization to convert the RNA-seq counts into expression values for EGSEA analysis.
352
353 EGSEA returns a HTML report of detailed analysis results for each contrast of interest and comparative analysis results. The heatmap view at both the gene set and summary level and the summary level bar plots can be useful summaries to include in publications to highlight the gene set testing results.
354
355 .. class:: warningmark
356
357 **WARNING: This tool is only available for non-commercial use.**
358
359 The **GAGE** and **Pathview** packages used by EGSEA make use of KEGG data
360 and Non-academic uses may require a KEGG license agreement. Before using, be
361 sure to review, agree, and comply with the relevant licenses for KEGG and
362 MSigDB.
363
364 * `KEGG Licence`_
365 * `MSigDB Licence`_
366
367 .. _KEGG Licence: http://www.kegg.jp/kegg/legal.html
368 .. _MSigDB Licence: http://software.broadinstitute.org/gsea/license_terms_list.jsp
369
370 -----
371
372 **Inputs**
373
374 **Counts Matrix**
375
376 This tool requires a counts matrix (counts table) containing the raw RNA-seq read counts. The first column must contain Entrez Gene IDs that are unique (not repeated) within the counts file. Entrez IDs can be obtained from the **annotateMyIDs** Galaxy tool.
377
378 Example:
379
380 =============== ========== ========== ========== ========= ========= =========
381 EntrezID **WT1** **WT2** **WT3** **Mut1** **Mut2** **Mut3**
382 =============== ========== ========== ========== ========= ========= =========
383 1 71 73 69 36 22 28
384 1000 3 4 2 4 0 1
385 10000 2310 2142 2683 1683 2068 2172
386 100009605 3 1 2 1 5 3
387 100009613 9 11 4 13 6 10
388 =============== ========== ========== ========== ========= ========= =========
389
390 **Factor Information**
391
392 Enter factor names and groups in the tool form, or provide a tab-separated file that has the samples in the same order as listed in the columns of the counts matrix. The second column should contain the primary factor levels (e.g. WT, Mut) with optional additional columns for any secondary factors e.g Batch.
393
394 Example:
395
396 ========== ============ =========
397 **Sample** **Genotype** **Batch**
398 ---------- ------------ ---------
399 WT1 WT b1
400 WT2 WT b2
401 WT3 WT b3
402 Mut1 Mut b1
403 Mut2 Mut b2
404 Mut3 Mut b3
405 ========== ============ =========
406
407 *Factor Name:* The name of the experimental factor being investigated e.g. Genotype, Treatment. One factor must be entered and spaces must not be used. Optionally, additional factors can be included, these are variables that might influence your experiment e.g. Batch, Gender, Subject. If additional factors are entered, edgeR will fit an additive linear model.
408
409 *Groups:* The names of the groups for the factor. These must be entered in the same order as the samples (to which the groups correspond) are listed in the columns of the counts matrix. Spaces must not be used and if entered into the tool form above, the values should be separated by commas.
410
411 **Symbols Mapping file**
412
413 A file containing the Gene Symbol for each Entrez Gene ID. The first column must be the Entrez Gene IDs and the second column must be the Gene Symbols. It is used for the heatmap visualization. The number of rows should match that of the Counts Matrix.
414
415 Example:
416
417 ========= =========
418 EntrezID Symbols
419 ========= =========
420 1 A1BG
421 1000 CDH2
422 10000 AKT3
423 100009605 TRNAF1
424 100009613 ANO1-AS2
425 ========= =========
426
427 -----
428
429 **Outputs**
430
431 The EGSEA report is an interactive HTML report that is generated to enable a swift navigation through the results of an EGSEA analysis. The pages below are generated for each gene set collection and contrast/comparison.
432
433 **Stats Table page**
434
435 The Stats Table page shows the detailed statistics of the EGSEA analysis for the top gene sets. It shows the EGSEA scores, individual rankings and additional annotation for each gene set. Hyperlinks to the source of each gene set can be seen in this table when they are available. The "Direction" column shows the regulation direction of a gene set which is calculated based on the logFC, which is either calculated from the limma differential expression analysis or provided by the user. The logFC cutoff and FDR cutoff are applied for this calculation. The calculations of the EGSEA scores can be seen in the references section. The method topSets can be used to generate custom Stats Table.
436
437 **Heatmaps page**
438
439 The Heatmaps page shows the heatmaps of the gene fold changes for the gene sets that are presented in the Stats Table page. Red indicates up-regulation while blue indicates down-regulation. Only genes that appear in the input expression/count matrix are visualized in the heat map. Gene names are coloured based on their statistical significance in the limma differential expression analysis. The "Interpret Results" link below each heat map allows the user to download the original heat map values along with additional statistics from limma DE analysis ( if available) so that they can be used to perform further analysis in R, e.g., customizing the heat map visualization.
440
441 **Summary Plots page**
442
443 The Summary Plots page shows the methods ranking plot along with the summary plots of EGSEA analysis. The method plot uses multidimensional scaling (MDS) to visualize the ranking of individual methods on a given gene set collection. The summary plots are bubble plots that visualize the distribution of gene sets based on the EGSEA Significance Score and another EGSEA score (default, p-value). Two summary plots are generated: ranking and directional plots. Each gene set is reprersented with a bubble which is coloured based on the EGSEA ranking (in ranking plots ) or gene set regulation direction (in directional plots) and sized based on the gene set cardinality (in ranking plots) or EGSEA Significance score (in directional plots). Since the EGSEA "Significance Score" is proportional to the p-value and the absolute fold changes, it could be useful to highlight gene sets that have high Significance scores. The blue labels on the summary plot indicate gene sets that do not appear in the top 10 list of gene sets based on the "sort.by" argument (black labels) yet they appear in the top 5 list of gene sets based on the EGSEA "Significance Score". If two contrasts are provided, the rank is calculated based on the "comparison" analysis results and the "Significance Score" is calculated as the mean.
444
445 **Pathways page**
446
447 The Pathways page shows the KEGG pathways for the gene sets that are presented in the Stats Table of a KEGG gene set collection. The gene fold changes are overlaid on the pathway maps and coloured based on the gene regulation direction: blue for down-regulation and red for up-regulation. Note that this page only appears if a KEGG gene set collection is used in the EGSEA analysis.
448
449 **GO Graphs page**
450
451 The GO Graphs page shows the Gene Ontology graphs for top 5 GO terms in each of three GO categories: Biological Processes (BP), Molecular Functions (MF), and Cellular Components (CC). Nodes are coloured based on the default sort.by score where red indicates high significance and yellow indicates low significance. Note that this page only appears if a Gene Ontology gene set collection is used, i.e., for the c5 collection from MSigDB or the gsdbgo collection from GeneSetDB.
452
453 **Interpret Results link**
454
455 The Interpret Results hyperlink in the EGSEA report allows the user to download the fold changes and limma analysis results and thus improve the interpretation of the results.
456
457 .. class:: warningmark
458
459 Note that the running time of this tool depends on a number of things, including the number of samples and contrasts provided as input, and also the number of gene set testing methods and gene set collections chosen. For example, the `egsea.cnt example`_ in the EGSEA vignette was conducted with 8 samples and 2 contrasts, using the KEGG Signaling and Disease pathways, and 7 of the 12 gene set testing methods, on a MacBook Pro machine that had a 2.8 GHz Intel Core i7 CPU and 16 GB of RAM. The execution time took 145.5 seconds using 16 threads.
460
461 .. _egsea.cnt example: https://bioconductor.org/packages/release/bioc/vignettes/EGSEA/inst/doc/EGSEA.pdf
462
463 -----
464
465 **More Information**
466
467 **MSigDB Gene Set Colletions**
468
469 The MSigDB_ gene sets are divided into 8 major collections:
470
471 * **H: hallmark gene sets** are coherently expressed signatures derived by aggregating many MSigDB gene sets to represent well-defined biological states or processes.
472 * **C1: positional gene sets** for each human chromosome and cytogenetic band.
473 * **C2: curated gene sets** are from online pathway databases, publications in PubMed, and knowledge of domain experts.
474 * **C3: motif gene sets** are based on conserved cis-regulatory motifs from a comparative analysis of the human, mouse, rat, and dog genomes.
475 * **C4: computational gene sets** are defined by mining large collections of cancer-oriented microarray data.
476 * **C5: GO gene sets** consist of genes annotated by the same GO terms.
477 * **C6: oncogenic gene sets** are defined directly from microarray gene expression data from cancer gene perturbations.
478 * **C7: immunologic gene sets** are defined directly from microarray gene expression data from immunologic studies.
479
480 -----
481
482 **GeneSetDB Gene Set Colletions**
483
484 GeneSetDB_ gene sets were obtained from `multiple source databases`_ (shown below) and were classified into five subclasses based on the database content: Pathway, Disease/Phenotype, Drug/Chemical, Genes Regulation and Gene Ontology.
485
486 **Pathway**
487
488 * Biocarta
489 * EHMN (Edinburgh Human Metabolic Network)
490 * HumnCyc
491 * INOH (Integrating Network Objects with Hierarchies)
492 * NetPath
493 * PID (Pathway Interaction Database)
494 * Reactome
495 * Wikipathways
496
497 **Disease/Phenotype**
498
499 * CancerGenes
500 * KEGG Disease
501 * HPO (Human Phenotype Ontology)
502 * MethCancerDB
503 * MethyCancer
504 * MPO (Mammalian Phenotype Ontology)
505 * SIDER (SIDe Effect Resource)
506
507 **Drug/Chemical**
508
509 * CTD (Comparative Toxicogenomics Database)
510 * DrugBank
511 * MATADOR (Manually Annotated Targets and Drugs Online Resource)
512 * SMPDB (Small Molecular Pathway DataBase)
513 * STITCH (Search Tool for Interactions of Chemicals)
514 * T3DB (Toxin and Toxin Target Database)
515
516 **Gene Regulation**
517
518 * MicroCosm Targets
519 * miRTarBase
520 * TFactS
521 * Rel/NF-kappaB target genes
522
523 **Gene Ontology**
524
525 * Gene Ontology
526
527 -----
528
529 **KEGG Pathways**
530
531 Obtained by EGSEAdata from the GAGE_ Bioconductor package using the gage function kegg.gsets(). The Pathview_ Bioconductor package is used to visualize the expression data mapped onto the KEGG pathway graphs. Pathview has a GPLv3 licence which means users are required to formally cite the original `Pathview paper`_ (not just mention it) in publications or products. GAGE/Pathview divide the KEGG pathways into 3 categories: Signaling, Metabolism and Disease, listed in this file at the `Pathview website here`_.
532
533 **Signaling**
534
535 * Genetic Information Processing
536 * Environmental Information Processing
537 * Cellular Processes
538 * Organismal Systems
539
540 **Metabolism**
541
542 * Metabolism
543
544 **Disease**
545
546 * Human Diseases
547
548 -----
549
550 Please cite EGSEA_, MSigDB_, KEGG_ and GeneSetDB_ appropriately if you use them.
551
552 .. _EGSEA: https://www.ncbi.nlm.nih.gov/pubmed/27694195
553 .. _Bioconductor package: https://bioconductor.org/packages/release/bioc/html/EGSEA.html
554 .. _MSigDB: http://software.broadinstitute.org/gsea/msigdb
555 .. _KEGG: http://www.genome.jp/kegg/
556 .. _GeneSetDB: http://genesetdb.auckland.ac.nz/haeremai.html
557 .. _EGSEA workflow: https://www.bioconductor.org/help/workflows/EGSEA123/
558 .. _multiple source databases: http://genesetdb.auckland.ac.nz/sourcedb.html
559 .. _GAGE: https://bioconductor.org/packages/release/bioc/html/gage.html
560 .. _Pathview: https://bioconductor.org/packages/release/bioc/html/pathview.html
561 .. _Pathview paper: https://www.ncbi.nlm.nih.gov/pubmed/23740750
562 .. _Pathview website here: https://pathview.uncc.edu/data/khier.tsv
563
564 ]]></help>
565 <citations>
566 <citation type="doi">10.1093/bioinformatics/btw623</citation>
567 </citations>
568 </tool>