comparison oncoenrichr_wrapper.xml @ 2:3c61ef74a176 draft

Uploaded
author sigven
date Tue, 27 Sep 2022 15:31:59 +0000
parents
children 2f22b3924572
comparison
equal deleted inserted replaced
1:28d4e824d3d3 2:3c61ef74a176
1 <tool id="oncoenrichr_wrapper" name="oncoEnrichR" version="1.3.2">
2 <description>Cancer-dedicated gene set interpretation</description>
3 <requirements>
4 <container type="docker">sigven/oncoenrichr:1.3.2</container>
5 </requirements>
6 <command detect_errors="aggressive"><![CDATA[
7 #if $query_set.query_choice.query_input == "text"
8 echo $query_set.query_choice.query_text | sed 's/__cn__/\n/g' > query_text.csv &&
9 #set input_file = './query_text.csv'
10 #else if $query_set.query_choice.query_input == "file"
11 ln -s $query_set.query_choice.query_file "$query_set.query_choice.query_file.element_identifier" &&
12 #set input_file = './' + str($query_set.query_choice.query_file.element_identifier)
13 #end if
14
15 #set background_file = ''
16 #if $fun_enrich.custom_bgset.def_background
17 #if $fun_enrich.custom_bgset.bg_choice.bg_source == "text"
18 echo $fun_enrich.custom_bgset.bg_choice.bg_enrich_text | sed 's/__cn__/\n/g' > custom_bgset.csv &&
19 #set background_file = './custom_bgset.csv'
20 #else if $fun_enrich.custom_bgset.bg_choice.bg_source == "file" and $fun_enrich.custom_bgset.bg_choice.bg_enrich_file
21 ln -s $fun_enrich.custom_bgset.bg_choice.bg_enrich_file background_text.csv &&
22 #set background_file = './custom_bgset.csv'
23 #else
24 #set background_file = ''
25 #end if
26 #end if
27
28 R -e 'suppressPackageStartupMessages(library(oncoEnrichR));
29 suppressWarnings(load(system.file("internal_db", "oedb.rda", package = "oncoEnrichR")));
30 gene_data <- read.csv("$input_file", stringsAsFactors = F, header = F);
31 oe_report <- oncoEnrichR::onco_enrich(
32 query = gene_data[[1]],
33 oeDB = oedb,
34 #if $query_set.query_id_type
35 query_id_type = "$query_set.query_id_type",
36 #end if
37 ignore_id_err = $query_set.ignore_id_err,
38
39 #if $report_metadata.project_title
40 project_title = "$report_metadata.project_title",
41 #end if
42 #if $report_metadata.project_owner
43 project_owner = "$report_metadata.project_owner",
44 #end if
45 #if $report_metadata.project_description
46 project_description = "$report_metadata.project_description",
47 #end if
48
49 show_enrichment = $modules.show_enrichment,
50 show_ppi = $modules.show_ppi,
51 show_disease = $modules.show_disease,
52 show_cancer_hallmarks = $modules.show_cancer_hallmarks,
53 show_drug = $modules.show_drug,
54 show_aberration = $modules.show_aberration,
55 show_coexpression = $modules.show_coexpression,
56 show_subcell_comp = $modules.show_subcell_comp,
57 show_complex = $modules.show_complex,
58 show_domain = $modules.show_domain,
59 show_fitness = $modules.show_fitness,
60 show_cell_tissue = $modules.show_cell_tissue,
61 show_ligand_receptor = $modules.show_ligand_receptor,
62 show_regulatory = $modules.show_regulatory,
63 show_prognostic = $modules.show_prognostic,
64 show_unknown_function = $modules.show_unknown_function,
65 show_synleth = $modules.show_synleth,
66
67 #if $background_file
68 bgset = read.csv("$background_file", stringsAsFactors = F, header = F)[[1]],
69 #if $fun_enrich.custom_bgset.bg_enrich_id_type
70 bgset_id_type = "$fun_enrich.custom_bgset.bg_enrich_id_type",
71 #end if
72 #if $fun_enrich.custom_bgset.bg_enrich_description
73 bgset_description = "$fun_enrich.custom_bgset.bg_enrich_description",
74 #end if
75 #else
76 bgset = NULL,
77 #end if
78
79 #if $fun_enrich.p_value_cutoff_enrichment
80 p_value_cutoff_enrichment = $fun_enrich.p_value_cutoff_enrichment,
81 #end if
82 #if $fun_enrich.p_value_adjustment_method
83 p_value_adjustment_method = "$fun_enrich.p_value_adjustment_method",
84 #end if
85 #if $fun_enrich.q_value_cutoff_enrichment
86 q_value_cutoff_enrichment = $fun_enrich.q_value_cutoff_enrichment,
87 #end if
88 #if $fun_enrich.min_geneset_size
89 min_geneset_size = $fun_enrich.min_geneset_size,
90 #end if
91 #if $fun_enrich.max_geneset_size
92 max_geneset_size = $fun_enrich.max_geneset_size,
93 #end if
94
95 #if $protein_interactions.ppi_add_nodes
96 ppi_add_nodes = $protein_interactions.ppi_add_nodes,
97 #end if
98 #if $protein_interactions.ppi_score_threshold
99 ppi_score_threshold = $protein_interactions.ppi_score_threshold,
100 #end if
101 show_drugs_in_ppi = $protein_interactions.show_drugs_in_ppi,
102 ppi_node_shadow = $protein_interactions.ppi_node_shadow,
103
104 #if $subcellular_compartments.min_subcellcomp_confidence
105 min_subcellcomp_confidence = $subcellular_compartments.min_subcellcomp_confidence,
106 #end if
107 #if $fitness.max_fitness_score
108 max_fitness_score = $fitness.max_fitness_score,
109 #end if
110 subcellcomp_show_cytosol = $subcellular_compartments.show_cytosol,
111 #if $disease.show_top_diseases_only
112 show_top_diseases_only = $disease.show_top_diseases_only,
113 #end if
114
115 min_confidence_reg_interaction = "$regulatory.min_confidence_reg_interaction",
116 num_terms_enrichment_plot = $fun_enrich.num_terms_enrichment_plot,
117 simplify_go = $fun_enrich.simplify_go,
118 html_floating_toc = $report_metadata.html_floating_toc,
119 html_report_theme = "$report_metadata.html_report_theme",
120 galaxy = TRUE
121 );
122
123 oncoEnrichR::write(report = oe_report, oeDB = oedb, file = "$report1", format = "html", selfcontained_html = F, extra_files_path = "$report1.extra_files_path", overwrite = T, ignore_file_extension = T);
124 oncoEnrichR::write(report = oe_report, oeDB = oedb, file = "$report2", format = "excel", overwrite = T, ignore_file_extension = T)' 2>&1
125
126 ]]></command>
127 <inputs>
128 <section title="" name=""/>
129 <section name="query_set" title="Query gene set" expanded="true">
130 <conditional name="query_choice">
131 <param name="query_input" type="select" multiple="false" display="radio"
132 label="Query gene set: do you want to upload a file OR paste into a text box?">
133 <option value="text">Text field</option>
134 <option value="file">From file</option>
135 </param>
136 <when value="text">
137 <param type="text" name="query_text" label="Query gene set identifiers (one per line)" area="true"/>
138 </when>
139 <when value="file">
140 <param name="query_file" type="data" format="txt" label="Query gene set identifiers" multiple="false"/>
141 </when>
142 </conditional>
143 <param name="query_id_type" type="select" label="Query identifier type" display="radio" multiple="false">
144 <option value="symbol">Primary gene symbol (HGNC) - e.g. KRAS</option>
145 <option value="uniprot_acc">UniProt accession - e.g. P01116</option>
146 <option value="entrezgene">NCBI Entrez gene identifier - e.g. 3845</option>
147 <option value="ensembl_gene">Ensembl gene identifier - e.g. ENSG00000133703</option>
148 <option value="ensembl_mrna">Ensembl transcript identifier - e.g. ENST00000311936</option>
149 <option value="ensembl_protein">Ensembl protein identifier - e.g. ENSP00000308495</option>
150 <option value="refseq_mrna">RefSeq mRNA identifier - e.g. NM_004985</option>
151 <option value="refseq_protein">RefSeq protein identifier - e.g. NP_004976</option>
152 </param>
153 <param name="ignore_id_err" type="boolean" label="Ignore erroneous idenfiers" truevalue="T" falsevalue="F" checked="true"/>
154 </section>
155
156 <section title="" name=""/>
157 <section name="report_metadata" title="Project metadata and output settings" expanded="true">
158 <param type="text" name="report_name" label="Output filename (prefix)" value="Report"/>
159 <param type="text" name="project_title" label="Project title" />
160 <param type="text" name="project_owner" label="Project owner" />
161 <param type="text" name="project_description" label="Project description" area="true"/>
162 <param name="html_floating_toc" type="boolean" label="HTML report - float the table of contents to the left of the main document content (always visible during scrolling)" truevalue="T" falsevalue="F" checked="true"/>
163 <param name="html_report_theme" type="select" label="HTML report - bootswatch theme" expanded="true">
164 <option value="default">default</option>
165 <option value="cerulean">cerulean</option>
166 <option value="cosmo">cosmo</option>
167 <option value="journal">journal</option>
168 <option value="lumen">lumen</option>
169 <option value="paper">paper</option>
170 <option value="sandstone">sandstone</option>
171 <option value="simplex">simplex</option>
172 <option value="spacelab">spacelab</option>
173 <option value="united">united</option>
174 <option value="yeti">yeti</option>
175 </param>
176 </section>
177
178 <section title="" name=""/>
179 <section name="modules" title="Analysis modules included in the report" expanded="true">
180 <param name="show_disease" type="boolean" label="Gene-cancer associations" truevalue="T" falsevalue="F" checked="true"/>
181 <param name="show_enrichment" type="boolean" label="Gene functional enrichment" truevalue="T" falsevalue="F" checked="true"/>
182 <param name="show_cell_tissue" type="boolean" label="Tissue/cell-type enrichment" truevalue="T" falsevalue="F" checked="false"/>
183 <param name="show_ppi" type="boolean" label="Protein-protein interaction network" truevalue="T" falsevalue="F" checked="true"/>
184 <param name="show_regulatory" type="boolean" label="Regulatory (TF-target) interactions" truevalue="T" falsevalue="F" checked="true"/>
185 <param name="show_ligand_receptor" type="boolean" label="Ligand-receptor interactions" truevalue="T" falsevalue="F" checked="true"/>
186 <param name="show_cancer_hallmarks" type="boolean" label="Cancer hallmark associations" truevalue="T" falsevalue="F" checked="true"/>
187 <param name="show_drug" type="boolean" label="Drug-target associations" truevalue="T" falsevalue="F" checked="true"/>
188 <param name="show_aberration" type="boolean" label="Tumor aberration frequencies" truevalue="T" falsevalue="F" checked="true"/>
189 <param name="show_coexpression" type="boolean" label="Tumor co-expression patterns" truevalue="T" falsevalue="F" checked="true"/>
190 <param name="show_subcell_comp" type="boolean" label="Subcellular localizations" truevalue="T" falsevalue="F" checked="true"/>
191 <param name="show_complex" type="boolean" label="Protein complex memberships" truevalue="T" falsevalue="F" checked="true"/>
192 <param name="show_domain" type="boolean" label="Protein domain frequencies" truevalue="T" falsevalue="F" checked="false"/>
193 <param name="show_fitness" type="boolean" label="Gene fitness effects" truevalue="T" falsevalue="F" checked="true"/>
194 <param name="show_synleth" type="boolean" label="Predicted synthetic lethality interactions" truevalue="T" falsevalue="F" checked="true"/>
195 <param name="show_unknown_function" type="boolean" label="Genes of poorly defined function" truevalue="T" falsevalue="F" checked="true"/>
196 <param name="show_prognostic" type="boolean" label="Prognostic cancer associations" truevalue="T" falsevalue="F" checked="true"/>
197 </section>
198
199 <section title="" name=""/>
200 <section name="fun_enrich" title="Options - gene functional enrichment" expanded="true">
201 <conditional name="custom_bgset">
202 <param name="def_background" type="boolean" label="Define custom background set (all annotated protein-coding genes by default)" truevalue="T" falsevalue="F" checked="false"/>
203 <when value="T">
204 <conditional name="bg_choice">
205 <param name="bg_source" type="select" display="radio"
206 label="Custom background gene set: do you want to upload a file OR paste into a text box?">
207 <option value="text">Text field</option>
208 <option value="file">From file</option>
209
210 </param>
211 <when value="file">
212 <param type="data" format="txt" name="bg_enrich_file" label="Custom background gene set" optional="true" multiple="false"/>
213 </when>
214 <when value="text">
215 <param type="text" name="bg_enrich_text" label="Custom background gene set identifiers (one per line):" area="true"/>
216 </when>
217 </conditional>
218
219 <param type="select" name="bg_enrich_id_type" label="Custom background identifier type" display="radio" multiple="false">
220 <option value="symbol">Primary gene symbol (HGNC) - e.g. KRAS</option>
221 <option value="uniprot_acc">UniProt accession - e.g. P01116</option>
222 <option value="entrezgene">NCBI Entrez gene identifier - e.g. 3845</option>
223 <option value="ensembl_gene">Ensembl gene identifier - e.g. ENSG00000133703</option>
224 <option value="ensembl_mrna">Ensembl transcript identifier - e.g. ENST00000311936</option>
225 <option value="ensembl_protein">Ensembl protein identifier - e.g. ENSP00000308495</option>
226 <option value="refseq_mrna">RefSeq mRNA identifier - e.g. NM_004985</option>
227 <option value="refseq_protein">RefSeq protein identifier - e.g. NP_004976</option>
228 </param>
229 <param type="text" name="bg_enrich_description" label="Custom background gene set description" value="Custom background description"/>
230 </when>
231 </conditional>
232
233 <param type="float" name="p_value_cutoff_enrichment" label="P-value cutoff for enrichment tests (clusterProfiler)" value="0.05"/>
234 <param type="select" name="p_value_adjustment_method" label="P-value adjustment method (clusterProfiler)">
235 <option value="holm">holm</option>
236 <option value="hochberg">hochberg</option>
237 <option value="hommel">hommel</option>
238 <option value="bonferroni">bonferroni</option>
239 <option value="BH">BH</option>
240 <option value="BY">BY</option>
241 <option value="fdr">fdr</option>
242 <option value="none">none</option>
243 </param>
244 <param type="float" name="q_value_cutoff_enrichment" label="Q-value cutoff for enrichment tests to report as significant (clusterProfiler)" value="0.2"/>
245 <param type="integer" name="min_geneset_size" label="Minimum number of genes annotated by ontology term for testing (clusterProfiler)" value="10"/>
246 <param type="integer" name="max_geneset_size" label="Maximum number of genes annotated by ontology term for testing (clusterProfiler)" value="500"/>
247 <param name="simplify_go" type="boolean" label="Simplify GO enrichment results by removal of redundant terms (recommended)" truevalue="T" falsevalue="F" checked="true"/>
248 <param type="integer" name="num_terms_enrichment_plot" label="Number of top enriched Gene Ontology terms (max) to show in enrichment barplot" min="10" max="30" value="20"/>
249 </section>
250
251 <section title="" name=""/>
252 <section name="fitness" title="Options - gene fitness scores" expanded="true">
253 <param type="float" name="max_fitness_score" label="Maximum loss-of-fitness score (Bayes Factor from BAGEL) for genes retrieved from Project Score" value="-2" min="-5" max="0"/>
254 </section>
255 <section title="" name=""/>
256 <section name="protein_interactions" title="Options - protein-protein interaction network" expanded="true">
257 <param type="integer" name="ppi_add_nodes" label="Addition of interacting non-queryset proteins to the protein-protein interaction network (maximum number)" value="50" min="0" max="50"/>
258 <param type="integer" name="ppi_score_threshold" label="Minimum confidence score for interactions to be included in the network (STRING confidence: 0-1000)" value="900" min="400" max="1000"/>
259 <param name="show_drugs_in_ppi" type="boolean" label="Show anti-cancer drugs in protein-protein interaction network" truevalue="T" falsevalue="F" checked="true"/>
260 <param name="ppi_node_shadow" type="boolean" label="Add shadow to nodes in protein-protein interaction network" truevalue="T" falsevalue="F" checked="true"/>
261 </section>
262 <section title="" name=""/>
263 <section name="regulatory" title="Options - regulatory interactions" expanded="true">
264 <param type="select" name="min_confidence_reg_interaction" label = "Minimum confidence level of regulatory interactions included (DoRothEA - A:highest, D:lowest)">
265 <option value="D">D</option>
266 <option value="C">C</option>
267 <option value="B">B</option>
268 <option value="A">A</option>
269 </param>
270 </section>
271 <section title="" name=""/>
272
273 <section name="subcellular_compartments" title="Options - Subcellular localizations" expanded="true">
274 <param type="integer" name="min_subcellcomp_confidence" label="Minimum confidence level for subcellular localization annotations" value="1" min="1" max="6"/>
275 <param name="show_cytosol" type="boolean" label="Show cytosol annotations (very common localization) in subcellular heatmap " truevalue="T" falsevalue="F" checked="false"/>
276 </section>
277 <section title="" name=""/>
278
279 <section name="disease" title="Options - Disease associations" expanded="true">
280 <param type="boolean" name="show_top_diseases_only" label="Show top disease assocations only" truevalue="T" falsevalue="F" checked="true"/>
281 </section>
282
283 </inputs>
284 <outputs>
285 <data format="xlsx" name="report2" label="$report_metadata.report_name - xlsx"/>
286 <data format="html" name="report1" label="$report_metadata.report_name - html"/>
287 </outputs>
288
289
290 <help><![CDATA[
291 .. class:: infomark
292
293 The query gene set is limited to n = 500 identifiers. A limited query gene set (e.g. n < 5) will in general reduce the relevance and significance of many oncoEnrichR report modules.
294
295 -----
296
297 **Dataset formats**
298
299 The input dataset is in tabular_ format. The two output datasets are html_ and xlsx.
300
301 .. _tabular: ${static_path}/formatHelp.html#tab
302 .. _html: ${static_path}/formatHelp.html#html
303
304 -----
305
306 **What it does**
307
308 *OncoEnrichR* is intended for exploratory analysis and prioritization of a candidate hits (referred to as *query set* below) from high-throughput cancer biology experiments. The tool queries a number of high-quality data resources in order to interpret the query gene set along various dimensions, examples being cancer aberration frequencies, protein-protein interactions, pathway enrichment, subcellular compartment localization, target druggability, gene fitness scores, and tissue/cell-type specificity.
309
310 The results from the various analysis modules are provided in an interactive HTML report where the user can interrogate the results further. A multisheet Excel workbook is also provided for convience. The following resources are currently utilized for annotation and analysis:
311
312 - `Open Targets Platform <https://targetvalidation.org/>`_ - disease associations, drug-target associations, cancer hallmarks, and druggability/tractability rankings
313
314 - `The Cancer Genome Atlas <https://portal.gdc.cancer.gov/>`_ - gene aberration frequencies and co-expression patterns in approximately 10,000 primary tumor samples
315
316 - `The Human Protein Atlas <https://www.proteinatlas.org/>`_ - expression data for healthy human tissues (`GTex <https://gtexportal.org/home/>`_)/cell types, and prognostic gene expression associations in cancer (`The Pathology Atlas <https://www.proteinatlas.org/humanproteome/pathology/>`_)
317
318 - `Molecular Signatures Database (MSigDB) <http://software.broadinstitute.org/gsea/msigdb/index.jsp/>`_ - collection of annotated (e.g. towards pathways) gene sets for enrichment/overrepresentation analysis. This includes gene sets from `Gene Ontology <http://geneontology.org/>`_, `Reactome <https://reactome.org/>`_, `KEGG <https://www.genome.jp/kegg/pathway.html/>`_, `WikiPathways <https://www.wikipathways.org/index.php/WikiPathways/>`_, `BIOCARTA <https://maayanlab.cloud/Harmonizome/dataset/Biocarta+Pathways/>`_, as well as curated `immunologic <https://www.gsea-msigdb.org/gsea/msigdb/collections.jsp#C7/>`_ and `cancer-specific <https://www.gsea-msigdb.org/gsea/msigdb/collections.jsp#C6/>`_ signatures.
319
320 - `NetPath <http://www.netpath.org/>`_ - manually curated resource of signal transduction pathways in humans
321
322 - `STRING <https://string-db.org/>`_ - protein-protein interaction database
323
324 - `CellChatDB <http://www.cellchat.org/>`_ - database on ligand-receptor interactions
325
326 - `DoRothEA <https://saezlab.github.io/dorothea/>`_ - gene set resource containing signed transcription factor (TF) - target interactions
327
328 - `CORUM <https://mips.helmholtz-muenchen.de/corum/>`_ - protein complex database
329
330 - `Compleat <https://fgr.hms.harvard.edu/compleat>`_ - protein complex resource
331
332 - `ComplexPortal <https://www.ebi.ac.uk/complexportal/home/>`_ - manually curated, encyclopaedic resource of macromolecular complexes
333
334 - `hu.MAP2 <http://humap2.proteincomplexes.org/>`_ - human protein complex map
335
336 - `ComPPI <http://comppi.linkgroup.hu/>`_ - subcellular compartment database
337
338 - `CancerMine <http://bionlp.bcgsc.ca/cancermine/>`_ - literature-mined resource on cancer drivers, oncogenes and tumor suppressor genes
339
340 - `Network of Cancer Genes <http://ncg.kcl.ac.uk/>`_ - manually curated collection of cancer genes, healthy drivers and their properties
341
342 - `Project Score <https://score.depmap.sanger.ac.uk/>`_ - database on the effects on cancer cell line viability elicited by CRISPR-Cas9 mediated gene activation
343
344 - `Genetic determinants of survival in cancer <http://survival.cshl.edu/>`_ - resource on the prognostic impact of genetic aberrations (methylation, CNA, mutation, expression) in human cancers (TCGA)
345
346 - `Predicted synthetic lethality interactions <https://pubmed.ncbi.nlm.nih.gov/34529928/>`_ - comprehensive prediction of synthetic lethality interactions in human cancer cell lines
347
348 The contents of the gene set analysis report attempt to answer the following questions related to the query set:
349
350 - Which diseases/tumor types are known to be associated with genes in the query set, and to what extent? Which genes are a classified as proto-oncogenes, tumor suppressors or cancer driver genes?
351
352 - Which query genes have been linked (through literature) to the various hallmarks of cancer?
353
354 - Which genes in the query set are poorly characterized or have an unknown function?
355
356 - Which proteins in the query set can be targeted by inhibitors for diffferent cancer conditions (early and late clinical development phases)? What is the tractability/druggability status for other targets in the query set?
357
358 - Which cancer-relevant protein complexes are involved for proteins in the query set?
359
360 - Are there known cancer-relevant regulatory interactions (transcription factor (TF) - target) found in the query set?
361
362 - Are there known ligand-receptor interactions in the query set?
363
364 - Which subcellular compartments (nucleus, cytosol, plasma membrane etc.) are dominant localizations for members of the query set?
365
366 - Are specific tissues or cell types enriched in the query set, considering healthy tissue/cell-type specific expression patterns (GTex/Human Protein Atlas) of query genes?
367
368 - Which protein-protein interactions are known within the query set? Are there interactions between members of the query set and other cancer-relevant proteins (e.g. proto-oncogenes, tumor-suppressors or predicted cancer drivers)? Which proteins constitute hubs in the protein-protein interaction network?
369
370 - Are there specific pathways, biological processes or molecular functions that are enriched within the query set, as compared to a reference/background set?
371
372 - Which members of the query set are frequently mutated in tumor sample cohorts (TCGA - SNVs/InDels / homozygous deletions / copy number amplifications)? What are the most frequent recurrent somatic variants (SNVs/InDels) in the query set genes?
373
374 - Which members of the query set are co-expressed (strong negative or positive correlations) with cancer-relevant genes (i.e. proto-oncogenes or tumor suppressors) in tumor sample cohorts (TCGA)?
375
376 - Which members of the query set are associated with better/worse survival in different cancers, considering mutation, expression, methylation or copy number levels in tumors?
377
378 - Which members of the query set are predicted as partners of synthetic lethality interactions?
379
380 - Which members of the query set are associated with cellular loss-of-fitness in CRISPR/Cas9 whole-genome drop out screens of cancer cell lines (i.e. reduction of cell viability elicited by a gene inactivation)? Which genes should be prioritized considering genomic biomarkers and fitness scores in combination?
381
382
383 ]]>
384 </help>
385
386 <citations>
387 <!-- Example of annotating a citation using a DOI. -->
388 <citation type="doi">10.48550/arXiv.2107.13247</citation>
389 <!-- Example of annotating a citation using a BibTex entry. -->
390 </citations>
391 </tool>