comparison oncoenrichr_wrapper.xml @ 8:dc08c98bb28f draft

Uploaded
author sigven
date Fri, 02 Aug 2024 18:36:59 +0000
parents 023155e2e66c
children e69867fb65fe
comparison
equal deleted inserted replaced
7:023155e2e66c 8:dc08c98bb28f
1 <tool id="oncoenrichr_wrapper" name="oncoEnrichR" version="1.4.2.1"> 1 <tool id="oncoenrichr_wrapper" name="oncoEnrichR" version="1.5.0">
2 <description>Cancer-dedicated gene set interpretation</description> 2 <description>Cancer-dedicated gene set interpretation</description>
3 <requirements> 3 <requirements>
4 <container type="docker">sigven/oncoenrichr:1.4.2</container> 4 <container type="docker">sigven/oncoenrichr:1.5.0</container>
5 </requirements> 5 </requirements>
6 <command detect_errors="aggressive"><![CDATA[ 6 <command detect_errors="aggressive"><![CDATA[
7 #if $query_set.query_choice.query_input == "text" 7 #if $query_set.query_choice.query_input == "text"
8 echo $query_set.query_choice.query_text | sed 's/__cn__/\n/g' > query_text.csv && 8 echo $query_set.query_choice.query_text | sed 's/__cn__/\n/g' > query_text.csv &&
9 #set input_file = './query_text.csv' 9 #set input_file = './query_text.csv'
25 #end if 25 #end if
26 #end if 26 #end if
27 27
28 R -e 'suppressPackageStartupMessages(library(oncoEnrichR)); 28 R -e 'suppressPackageStartupMessages(library(oncoEnrichR));
29 suppressWarnings(load(system.file("internal_db", "oedb.rda", package = "oncoEnrichR"))); 29 suppressWarnings(load(system.file("internal_db", "oedb.rda", package = "oncoEnrichR")));
30 gene_data <- read.csv("$input_file", strip.white = TRUE, header = F); 30 gene_data <- read.csv("$input_file", strip.white = TRUE);
31 oe_report <- oncoEnrichR::onco_enrich( 31 oe_report <- oncoEnrichR::onco_enrich(
32 query = gene_data[[1]], 32 query = gene_data[[1]],
33 oeDB = oedb, 33 oeDB = oedb,
34 #if $query_set.query_id_type 34 #if $query_set.query_id_type
35 query_id_type = "$query_set.query_id_type", 35 query_id_type = "$query_set.query_id_type",
55 show_coexpression = $modules.show_coexpression, 55 show_coexpression = $modules.show_coexpression,
56 show_subcell_comp = $modules.show_subcell_comp, 56 show_subcell_comp = $modules.show_subcell_comp,
57 show_complex = $modules.show_complex, 57 show_complex = $modules.show_complex,
58 show_domain = $modules.show_domain, 58 show_domain = $modules.show_domain,
59 show_fitness = $modules.show_fitness, 59 show_fitness = $modules.show_fitness,
60 show_cell_tissue = $modules.show_cell_tissue,
61 show_ligand_receptor = $modules.show_ligand_receptor, 60 show_ligand_receptor = $modules.show_ligand_receptor,
62 show_regulatory = $modules.show_regulatory, 61 show_regulatory = $modules.show_regulatory,
63 show_prognostic = $modules.show_prognostic, 62 show_prognostic = $modules.show_prognostic,
64 show_unknown_function = $modules.show_unknown_function, 63 show_unknown_function = $modules.show_unknown_function,
65 show_synleth = $modules.show_synleth, 64 show_synleth = $modules.show_synleth,
66 65
67 #if $background_file 66 #if $background_file
68 bgset = read.csv("$background_file", strip.white = TRUE, header = F)[[1]], 67 bgset = read.csv("$background_file", strip.white = TRUE)[[1]],
69 #if $fun_enrich.custom_bgset.bg_enrich_id_type 68 #if $fun_enrich.custom_bgset.bg_enrich_id_type
70 bgset_id_type = "$fun_enrich.custom_bgset.bg_enrich_id_type", 69 bgset_id_type = "$fun_enrich.custom_bgset.bg_enrich_id_type",
71 #end if 70 #end if
72 #if $fun_enrich.custom_bgset.bg_enrich_description 71 #if $fun_enrich.custom_bgset.bg_enrich_description
73 bgset_description = "$fun_enrich.custom_bgset.bg_enrich_description", 72 bgset_description = "$fun_enrich.custom_bgset.bg_enrich_description",
121 #if $disease.show_top_diseases_only 120 #if $disease.show_top_diseases_only
122 show_top_diseases_only = $disease.show_top_diseases_only, 121 show_top_diseases_only = $disease.show_top_diseases_only,
123 #end if 122 #end if
124 123
125 regulatory_min_confidence = "$regulatory.regulatory_min_confidence", 124 regulatory_min_confidence = "$regulatory.regulatory_min_confidence",
126
127 html_floating_toc = $report_metadata.html_floating_toc,
128 html_report_theme = "$report_metadata.html_report_theme",
129 galaxy = TRUE 125 galaxy = TRUE
130 ); 126 );
131 127
132 oncoEnrichR::write(report = oe_report, oeDB = oedb, file = "$report1", format = "html", selfcontained_html = F, extra_files_path = "$report1.extra_files_path", overwrite = T, ignore_file_extension = T); 128 oncoEnrichR::write(report = oe_report, oeDB = oedb, file = "$report1", format = "html", embed_resources = F, extra_files_path = "$report1.extra_files_path", overwrite = T, ignore_file_extension = T);
133 oncoEnrichR::write(report = oe_report, oeDB = oedb, file = "$report2", format = "excel", overwrite = T, ignore_file_extension = T)' 2>&1 129 oncoEnrichR::write(report = oe_report, oeDB = oedb, file = "$report2", format = "excel", overwrite = T, ignore_file_extension = T)' 2>&1
134 130
135 ]]></command> 131 ]]></command>
136 <inputs> 132 <inputs>
137 <section title="" name=""/> 133 <section title="" name=""/>
138 <section name="query_set" title="Query gene set" expanded="true"> 134 <section name="query_set" title="Query gene set" expanded="true">
154 <option value="uniprot_acc">UniProt accession - e.g. P01116</option> 150 <option value="uniprot_acc">UniProt accession - e.g. P01116</option>
155 <option value="entrezgene">NCBI Entrez gene identifier - e.g. 3845</option> 151 <option value="entrezgene">NCBI Entrez gene identifier - e.g. 3845</option>
156 <option value="ensembl_gene">Ensembl gene identifier - e.g. ENSG00000133703</option> 152 <option value="ensembl_gene">Ensembl gene identifier - e.g. ENSG00000133703</option>
157 <option value="ensembl_mrna">Ensembl transcript identifier - e.g. ENST00000311936</option> 153 <option value="ensembl_mrna">Ensembl transcript identifier - e.g. ENST00000311936</option>
158 <option value="ensembl_protein">Ensembl protein identifier - e.g. ENSP00000308495</option> 154 <option value="ensembl_protein">Ensembl protein identifier - e.g. ENSP00000308495</option>
159 <option value="refseq_mrna">RefSeq mRNA identifier - e.g. NM_004985</option> 155 <option value="refseq_transcript_id">RefSeq mRNA identifier - e.g. NM_004985</option>
160 <option value="refseq_protein">RefSeq protein identifier - e.g. NP_004976</option> 156 <option value="refseq_protein">RefSeq protein identifier - e.g. NP_004976</option>
161 </param> 157 </param>
162 <param name="ignore_id_err" type="boolean" label="Ignore erroneous idenfiers" truevalue="T" falsevalue="F" checked="true"/> 158 <param name="ignore_id_err" type="boolean" label="Ignore erroneous idenfiers" truevalue="T" falsevalue="F" checked="true"/>
163 </section> 159 </section>
164 160
166 <section name="report_metadata" title="Project metadata and output settings" expanded="true"> 162 <section name="report_metadata" title="Project metadata and output settings" expanded="true">
167 <param type="text" name="report_name" label="Output filename (prefix)" value="Report"/> 163 <param type="text" name="report_name" label="Output filename (prefix)" value="Report"/>
168 <param type="text" name="project_title" label="Project title" /> 164 <param type="text" name="project_title" label="Project title" />
169 <param type="text" name="project_owner" label="Project owner" /> 165 <param type="text" name="project_owner" label="Project owner" />
170 <param type="text" name="project_description" label="Project description" area="true"/> 166 <param type="text" name="project_description" label="Project description" area="true"/>
171 <param name="html_floating_toc" type="boolean" label="HTML report - float the table of contents to the left of the main document content (always visible during scrolling)" truevalue="T" falsevalue="F" checked="true"/>
172 <param name="html_report_theme" type="select" label="HTML report - bootswatch theme">
173 <option value="default">default</option>
174 <option value="cerulean">cerulean</option>
175 <option value="cosmo">cosmo</option>
176 <option value="journal">journal</option>
177 <option value="lumen">lumen</option>
178 <option value="paper">paper</option>
179 <option value="sandstone">sandstone</option>
180 <option value="simplex">simplex</option>
181 <option value="spacelab">spacelab</option>
182 <option value="united">united</option>
183 <option value="yeti">yeti</option>
184 </param>
185 </section> 167 </section>
186 168
187 <section title="" name=""/> 169 <section title="" name=""/>
188 <section name="modules" title="Analysis modules included in the report" expanded="true"> 170 <section name="modules" title="Analysis modules included in the report" expanded="true">
189 <param name="show_disease" type="boolean" label="Gene-cancer associations" truevalue="T" falsevalue="F" checked="true"/> 171 <param name="show_disease" type="boolean" label="Gene-cancer associations" truevalue="T" falsevalue="F" checked="true"/>
190 <param name="show_enrichment" type="boolean" label="Gene functional enrichment" truevalue="T" falsevalue="F" checked="true"/> 172 <param name="show_enrichment" type="boolean" label="Gene functional enrichment" truevalue="T" falsevalue="F" checked="true"/>
191 <param name="show_cell_tissue" type="boolean" label="Tissue/cell-type enrichment" truevalue="T" falsevalue="F" checked="false"/>
192 <param name="show_ppi" type="boolean" label="Protein-protein interaction network" truevalue="T" falsevalue="F" checked="true"/> 173 <param name="show_ppi" type="boolean" label="Protein-protein interaction network" truevalue="T" falsevalue="F" checked="true"/>
193 <param name="show_regulatory" type="boolean" label="Regulatory (TF-target) interactions" truevalue="T" falsevalue="F" checked="true"/> 174 <param name="show_regulatory" type="boolean" label="Regulatory (TF-target) interactions" truevalue="T" falsevalue="F" checked="false"/>
194 <param name="show_ligand_receptor" type="boolean" label="Ligand-receptor interactions" truevalue="T" falsevalue="F" checked="true"/> 175 <param name="show_ligand_receptor" type="boolean" label="Ligand-receptor interactions" truevalue="T" falsevalue="F" checked="false"/>
195 <param name="show_cancer_hallmarks" type="boolean" label="Cancer hallmark associations" truevalue="T" falsevalue="F" checked="true"/> 176 <param name="show_cancer_hallmarks" type="boolean" label="Cancer hallmark associations" truevalue="T" falsevalue="F" checked="true"/>
196 <param name="show_drug" type="boolean" label="Drug-target associations" truevalue="T" falsevalue="F" checked="true"/> 177 <param name="show_drug" type="boolean" label="Drug-target associations" truevalue="T" falsevalue="F" checked="true"/>
197 <param name="show_aberration" type="boolean" label="Tumor aberration frequencies" truevalue="T" falsevalue="F" checked="true"/> 178 <param name="show_aberration" type="boolean" label="Tumor aberration frequencies" truevalue="T" falsevalue="F" checked="false"/>
198 <param name="show_coexpression" type="boolean" label="Tumor co-expression patterns" truevalue="T" falsevalue="F" checked="true"/> 179 <param name="show_coexpression" type="boolean" label="Tumor co-expression patterns" truevalue="T" falsevalue="F" checked="false"/>
199 <param name="show_subcell_comp" type="boolean" label="Subcellular localizations" truevalue="T" falsevalue="F" checked="true"/> 180 <param name="show_subcell_comp" type="boolean" label="Subcellular localizations" truevalue="T" falsevalue="F" checked="true"/>
200 <param name="show_complex" type="boolean" label="Protein complex memberships" truevalue="T" falsevalue="F" checked="true"/> 181 <param name="show_complex" type="boolean" label="Protein complex memberships" truevalue="T" falsevalue="F" checked="true"/>
201 <param name="show_domain" type="boolean" label="Protein domain frequencies" truevalue="T" falsevalue="F" checked="false"/> 182 <param name="show_domain" type="boolean" label="Protein domain frequencies" truevalue="T" falsevalue="F" checked="false"/>
202 <param name="show_fitness" type="boolean" label="Gene fitness effects" truevalue="T" falsevalue="F" checked="true"/> 183 <param name="show_fitness" type="boolean" label="Gene fitness effects" truevalue="T" falsevalue="F" checked="true"/>
203 <param name="show_synleth" type="boolean" label="Predicted synthetic lethality interactions" truevalue="T" falsevalue="F" checked="true"/> 184 <param name="show_synleth" type="boolean" label="Predicted synthetic lethality interactions" truevalue="T" falsevalue="F" checked="true"/>
204 <param name="show_unknown_function" type="boolean" label="Genes of poorly defined function" truevalue="T" falsevalue="F" checked="true"/> 185 <param name="show_unknown_function" type="boolean" label="Genes of poorly defined function" truevalue="T" falsevalue="F" checked="false"/>
205 <param name="show_prognostic" type="boolean" label="Prognostic cancer associations" truevalue="T" falsevalue="F" checked="true"/> 186 <param name="show_prognostic" type="boolean" label="Prognostic cancer associations" truevalue="T" falsevalue="F" checked="true"/>
206 </section> 187 </section>
207 188
208 <section title="" name=""/> 189 <section title="" name=""/>
209 <section name="fun_enrich" title="Options - gene functional enrichment"> 190 <section name="fun_enrich" title="Options - gene functional enrichment">
230 <option value="uniprot_acc">UniProt accession - e.g. P01116</option> 211 <option value="uniprot_acc">UniProt accession - e.g. P01116</option>
231 <option value="entrezgene">NCBI Entrez gene identifier - e.g. 3845</option> 212 <option value="entrezgene">NCBI Entrez gene identifier - e.g. 3845</option>
232 <option value="ensembl_gene">Ensembl gene identifier - e.g. ENSG00000133703</option> 213 <option value="ensembl_gene">Ensembl gene identifier - e.g. ENSG00000133703</option>
233 <option value="ensembl_mrna">Ensembl transcript identifier - e.g. ENST00000311936</option> 214 <option value="ensembl_mrna">Ensembl transcript identifier - e.g. ENST00000311936</option>
234 <option value="ensembl_protein">Ensembl protein identifier - e.g. ENSP00000308495</option> 215 <option value="ensembl_protein">Ensembl protein identifier - e.g. ENSP00000308495</option>
235 <option value="refseq_mrna">RefSeq mRNA identifier - e.g. NM_004985</option> 216 <option value="refseq_transcript_id">RefSeq mRNA identifier - e.g. NM_004985</option>
236 <option value="refseq_protein">RefSeq protein identifier - e.g. NP_004976</option> 217 <option value="refseq_protein">RefSeq protein identifier - e.g. NP_004976</option>
237 </param> 218 </param>
238 <param type="text" name="bg_enrich_description" label="Custom background gene set description" value="Custom background description"/> 219 <param type="text" name="bg_enrich_description" label="Custom background gene set description" value="Custom background description"/>
239 </when> 220 </when>
240 </conditional> 221 </conditional>
304 285
305 286
306 <help><![CDATA[ 287 <help><![CDATA[
307 .. class:: infomark 288 .. class:: infomark
308 289
309 The query gene set is limited to n = 1000 identifiers. **NOTE**: A minimum of two gene entries are required to run the tool. Note also that a limited query gene set (e.g. n < 5) in general reduces the relevance and significance of many oncoEnrichR report modules. 290 The query gene set is limited to n = 200 identifiers. Running with more identifiers can be done through the stand-alone R package. A very limited query gene set (e.g. n < 5) will in general reduce the relevance and significance of many oncoEnrichR report modules (i.e. protein-protein interaction networks, functional enrichment etc.).
310 291
311 ----- 292 -----
312 293
313 **Dataset formats** 294 **Dataset formats**
314 295
319 300
320 ----- 301 -----
321 302
322 **What it does** 303 **What it does**
323 304
324 *OncoEnrichR* is intended for exploratory analysis and prioritization of a candidate hits (referred to as *query set* below) from high-throughput cancer biology experiments. The tool queries a number of high-quality data resources in order to interpret the query gene set along various dimensions, examples being cancer aberration frequencies, protein-protein interactions, pathway enrichment, subcellular compartment localization, target druggability, gene fitness scores, and tissue/cell-type specificity. 305 *OncoEnrichR* is intended for exploratory analysis and prioritization of a candidate hits (referred to as *query set* below) from high-throughput cancer biology experiments. The tool queries a number of high-quality data resources in order to interpret the query gene set along various dimensions, examples being cancer aberration frequencies, protein-protein interactions, pathway enrichment, subcellular compartment localization, target druggability, gene fitness scores, and regulatory interactions.
325 306
326 The results from the various analysis modules are provided in an interactive HTML report where the user can interrogate the results further. A multisheet Excel workbook is also provided for convience. The following resources are currently utilized for annotation and analysis: 307 The results from the various analysis modules are provided in an interactive HTML report where the user can interrogate the results further. A multisheet Excel workbook is also provided for convience. The following resources are currently utilized for annotation and analysis:
327 308
328 - `Open Targets Platform <https://targetvalidation.org/>`_ - disease associations, drug-target associations, cancer hallmarks, and druggability/tractability rankings 309 - `Open Targets Platform <https://targetvalidation.org/>`_ - disease associations, drug-target associations, cancer hallmarks, and druggability/tractability rankings
329 310
384 - Are there known cancer-relevant regulatory interactions (transcription factor (TF) - target) found in the query set? 365 - Are there known cancer-relevant regulatory interactions (transcription factor (TF) - target) found in the query set?
385 366
386 - Are there known ligand-receptor interactions in the query set? 367 - Are there known ligand-receptor interactions in the query set?
387 368
388 - Which subcellular compartments (nucleus, cytosol, plasma membrane etc.) are dominant localizations for members of the query set? 369 - Which subcellular compartments (nucleus, cytosol, plasma membrane etc.) are dominant localizations for members of the query set?
389
390 - Are specific tissues or cell types enriched in the query set, considering healthy tissue/cell-type specific expression patterns (GTex/Human Protein Atlas) of query genes?
391 370
392 - Which protein-protein interactions are known within the query set? Are there interactions between members of the query set and other cancer-relevant proteins (e.g. proto-oncogenes, tumor-suppressors or predicted cancer drivers)? Which proteins constitute hubs in the protein-protein interaction network? 371 - Which protein-protein interactions are known within the query set? Are there interactions between members of the query set and other cancer-relevant proteins (e.g. proto-oncogenes, tumor-suppressors or predicted cancer drivers)? Which proteins constitute hubs in the protein-protein interaction network?
393 372
394 - Are there specific pathways, biological processes or molecular functions that are enriched within the query set, as compared to a reference/background set? 373 - Are there specific pathways, biological processes or molecular functions that are enriched within the query set, as compared to a reference/background set?
395 374