Mercurial > repos > sigven > oncoenrichr

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/oncoenrichr_wrapper.xml	Thu Sep 22 11:35:13 2022 +0000
@@ -0,0 +1,391 @@
+<tool id="oncoenrichr_wrapper" name="oncoEnrichR" version="1.3.1">
+  <description>Cancer-dedicated gene set interpretation</description>
+    <requirements>
+        <container type="docker">sigven/oncoenrichr:1.3.1</container>
+  </requirements>
+  <command detect_errors="aggressive"><![CDATA[
+      #if $query_set.query_choice.query_input == "text"
+        echo $query_set.query_choice.query_text | sed 's/__cn__/\n/g' > query_text.csv &&
+        #set input_file = './query_text.csv'
+      #else if $query_set.query_choice.query_input == "file"
+        ln -s $query_set.query_choice.query_file "$query_set.query_choice.query_file.element_identifier" &&
+        #set input_file = './' + str($query_set.query_choice.query_file.element_identifier)
+      #end if
+
+      #set background_file = ''
+      #if $fun_enrich.custom_bgset.def_background
+	      #if $fun_enrich.custom_bgset.bg_choice.bg_source == "text"
+	        echo $fun_enrich.custom_bgset.bg_choice.bg_enrich_text | sed 's/__cn__/\n/g' > custom_bgset.csv &&
+	        #set background_file = './custom_bgset.csv'
+	      #else if $fun_enrich.custom_bgset.bg_choice.bg_source == "file" and $fun_enrich.custom_bgset.bg_choice.bg_enrich_file
+	        ln -s $fun_enrich.custom_bgset.bg_choice.bg_enrich_file background_text.csv &&
+	        #set background_file = './custom_bgset.csv'
+	      #else
+	        #set background_file = ''
+	      #end if
+	 #end if
+
+      R -e 'suppressPackageStartupMessages(library(oncoEnrichR));
+      suppressWarnings(load(system.file("internal_db", "oedb.rda", package = "oncoEnrichR")));
+      gene_data <- read.csv("$input_file", stringsAsFactors = F, header = F);
+      oe_report <- oncoEnrichR::onco_enrich(
+        query = gene_data[[1]],
+        oeDB = oedb,
+      #if $query_set.query_id_type
+        query_id_type = "$query_set.query_id_type",
+      #end if
+      ignore_id_err = $query_set.ignore_id_err,
+
+      #if $report_metadata.project_title
+          project_title = "$report_metadata.project_title",
+      #end if
+      #if $report_metadata.project_owner
+          project_owner = "$report_metadata.project_owner",
+      #end if
+      #if $report_metadata.project_description
+          project_description = "$report_metadata.project_description",
+      #end if
+
+      show_enrichment = $modules.show_enrichment,
+      show_ppi = $modules.show_ppi,
+      show_disease = $modules.show_disease,
+      show_cancer_hallmarks = $modules.show_cancer_hallmarks,
+      show_drug = $modules.show_drug,
+      show_aberration = $modules.show_aberration,
+      show_coexpression = $modules.show_coexpression,
+      show_subcell_comp = $modules.show_subcell_comp,
+      show_complex = $modules.show_complex,
+	 show_domain = $modules.show_domain,
+      show_fitness = $modules.show_fitness,
+	 show_cell_tissue = $modules.show_cell_tissue,
+      show_ligand_receptor = $modules.show_ligand_receptor,
+      show_regulatory = $modules.show_regulatory,
+	 show_prognostic = $modules.show_prognostic,
+	 show_unknown_function = $modules.show_unknown_function,
+      show_synleth = $modules.show_synleth,
+
+      #if $background_file
+          bgset = read.csv("$background_file", stringsAsFactors = F, header = F)[[1]],
+		#if $fun_enrich.custom_bgset.bg_enrich_id_type
+            bgset_id_type = "$fun_enrich.custom_bgset.bg_enrich_id_type",
+          #end if
+          #if $fun_enrich.custom_bgset.bg_enrich_description
+            bgset_description = "$fun_enrich.custom_bgset.bg_enrich_description",
+          #end if
+      #else
+          bgset = NULL,
+      #end if
+
+      #if $fun_enrich.p_value_cutoff_enrichment
+          p_value_cutoff_enrichment = $fun_enrich.p_value_cutoff_enrichment,
+      #end if
+      #if $fun_enrich.p_value_adjustment_method
+          p_value_adjustment_method = "$fun_enrich.p_value_adjustment_method",
+      #end if
+      #if $fun_enrich.q_value_cutoff_enrichment
+          q_value_cutoff_enrichment = $fun_enrich.q_value_cutoff_enrichment,
+      #end if
+      #if $fun_enrich.min_geneset_size
+          min_geneset_size = $fun_enrich.min_geneset_size,
+      #end if
+      #if $fun_enrich.max_geneset_size
+          max_geneset_size = $fun_enrich.max_geneset_size,
+      #end if
+
+      #if $protein_interactions.ppi_add_nodes
+          ppi_add_nodes = $protein_interactions.ppi_add_nodes,
+      #end if
+      #if $protein_interactions.ppi_score_threshold
+          ppi_score_threshold = $protein_interactions.ppi_score_threshold,
+      #end if
+      show_drugs_in_ppi = $protein_interactions.show_drugs_in_ppi,
+	 ppi_node_shadow = $protein_interactions.ppi_node_shadow,
+
+	 #if $subcellular_compartments.min_subcellcomp_confidence
+          min_subcellcomp_confidence = $subcellular_compartments.min_subcellcomp_confidence,
+      #end if
+      #if $fitness.max_fitness_score
+          max_fitness_score = $fitness.max_fitness_score,
+      #end if
+      subcellcomp_show_cytosol = $subcellular_compartments.show_cytosol,
+      #if $disease.show_top_diseases_only
+          show_top_diseases_only = $disease.show_top_diseases_only,
+      #end if
+
+      min_confidence_reg_interaction = "$regulatory.min_confidence_reg_interaction",
+      num_terms_enrichment_plot = $fun_enrich.num_terms_enrichment_plot,
+      simplify_go = $fun_enrich.simplify_go,
+      html_floating_toc = $report_metadata.html_floating_toc,
+      html_report_theme = "$report_metadata.html_report_theme",
+      galaxy = TRUE
+      );
+
+      oncoEnrichR::write(report = oe_report, oeDB = oedb, file = "$report1", format = "html", selfcontained_html = F, extra_files_path = "$report1.extra_files_path", overwrite = T, ignore_file_extension = T);
+	 oncoEnrichR::write(report = oe_report, oeDB = oedb, file = "$report2", format = "excel", overwrite = T, ignore_file_extension = T)' 2>&1
+
+  ]]></command>
+  <inputs>
+      <section title="" name=""/>
+      <section name="query_set" title="Query gene set" expanded="true">
+          <conditional name="query_choice">
+                <param name="query_input" type="select" multiple="false" display="radio"
+                       label="Query gene set: do you want to upload a file OR paste into a text box?">
+                    <option value="text">Text field</option>
+				<option value="file">From file</option>
+                </param>
+                <when value="text">
+                    <param type="text" name="query_text" label="Query gene set identifiers (one per line)" area="true"/>
+                </when>
+			 <when value="file">
+                   <param name="query_file" type="data" format="txt" label="Query gene set identifiers" multiple="false"/>
+                </when>
+          </conditional>
+          <param name="query_id_type" type="select" label="Query identifier type" display="radio" multiple="false">
+              <option value="symbol">Primary gene symbol (HGNC) - e.g. KRAS</option>
+              <option value="uniprot_acc">UniProt accession - e.g. P01116</option>
+              <option value="entrezgene">NCBI Entrez gene identifier - e.g. 3845</option>
+              <option value="ensembl_gene">Ensembl gene identifier - e.g. ENSG00000133703</option>
+              <option value="ensembl_mrna">Ensembl transcript identifier - e.g. ENST00000311936</option>
+              <option value="ensembl_protein">Ensembl protein identifier - e.g. ENSP00000308495</option>
+              <option value="refseq_mrna">RefSeq mRNA identifier - e.g. NM_004985</option>
+              <option value="refseq_protein">RefSeq protein identifier - e.g. NP_004976</option>
+          </param>
+		<param name="ignore_id_err" type="boolean" label="Ignore erroneous idenfiers" truevalue="T" falsevalue="F" checked="true"/>
+      </section>
+
+      <section title="" name=""/>
+      <section name="report_metadata" title="Project metadata and output settings" expanded="true">
+          <param type="text" name="report_name" label="Output filename (prefix)" value="Report"/>
+          <param type="text" name="project_title" label="Project title" />
+          <param type="text" name="project_owner" label="Project owner" />
+          <param type="text" name="project_description" label="Project description" area="true"/>
+          <param name="html_floating_toc" type="boolean" label="HTML report - float the table of contents to the left of the main document content (always visible during scrolling)" truevalue="T" falsevalue="F" checked="true"/>
+          <param name="html_report_theme" type="select" label="HTML report - bootswatch theme" expanded="true">
+              <option value="default">default</option>
+              <option value="cerulean">cerulean</option>
+              <option value="cosmo">cosmo</option>
+              <option value="journal">journal</option>
+              <option value="lumen">lumen</option>
+              <option value="paper">paper</option>
+              <option value="sandstone">sandstone</option>
+              <option value="simplex">simplex</option>
+              <option value="spacelab">spacelab</option>
+              <option value="united">united</option>
+              <option value="yeti">yeti</option>
+          </param>
+      </section>
+
+      <section title="" name=""/>
+      <section name="modules" title="Analysis modules included in the report" expanded="true">
+		<param name="show_disease" type="boolean" label="Gene-cancer associations" truevalue="T" falsevalue="F" checked="true"/>
+          <param name="show_enrichment" type="boolean" label="Gene functional enrichment" truevalue="T" falsevalue="F" checked="true"/>
+		<param name="show_cell_tissue" type="boolean" label="Tissue/cell-type enrichment" truevalue="T" falsevalue="F" checked="false"/>
+          <param name="show_ppi" type="boolean" label="Protein-protein interaction network" truevalue="T" falsevalue="F" checked="true"/>
+          <param name="show_regulatory" type="boolean" label="Regulatory (TF-target) interactions" truevalue="T" falsevalue="F" checked="true"/>
+          <param name="show_ligand_receptor" type="boolean" label="Ligand-receptor interactions" truevalue="T" falsevalue="F" checked="true"/>
+          <param name="show_cancer_hallmarks" type="boolean" label="Cancer hallmark associations" truevalue="T" falsevalue="F" checked="true"/>
+          <param name="show_drug" type="boolean" label="Drug-target associations" truevalue="T" falsevalue="F" checked="true"/>
+          <param name="show_aberration" type="boolean" label="Tumor aberration frequencies" truevalue="T" falsevalue="F" checked="true"/>
+          <param name="show_coexpression" type="boolean" label="Tumor co-expression patterns" truevalue="T" falsevalue="F" checked="true"/>
+          <param name="show_subcell_comp" type="boolean" label="Subcellular localizations" truevalue="T" falsevalue="F" checked="true"/>
+          <param name="show_complex" type="boolean" label="Protein complex memberships" truevalue="T" falsevalue="F" checked="true"/>
+		<param name="show_domain" type="boolean" label="Protein domain frequencies" truevalue="T" falsevalue="F" checked="false"/>
+          <param name="show_fitness" type="boolean" label="Gene fitness effects" truevalue="T" falsevalue="F" checked="true"/>
+          <param name="show_synleth" type="boolean" label="Predicted synthetic lethality interactions" truevalue="T" falsevalue="F" checked="true"/>
+		<param name="show_unknown_function" type="boolean" label="Genes of poorly defined function" truevalue="T" falsevalue="F" checked="true"/>
+		<param name="show_prognostic" type="boolean" label="Prognostic cancer associations" truevalue="T" falsevalue="F" checked="true"/>
+      </section>
+
+      <section title="" name=""/>
+      <section name="fun_enrich" title="Options - gene functional enrichment" expanded="true">
+		 <conditional name="custom_bgset">
+			 <param name="def_background" type="boolean" label="Define custom background set (all annotated protein-coding genes by default)" truevalue="T" falsevalue="F" checked="false"/>
+			 <when value="T">
+		            <conditional name="bg_choice">
+		                <param name="bg_source" type="select" display="radio"
+		                       label="Custom background gene set: do you want to upload a file OR paste into a text box?">
+						<option value="text">Text field</option>
+		                    <option value="file">From file</option>
+
+		                </param>
+		                <when value="file">
+		                   <param type="data" format="txt" name="bg_enrich_file" label="Custom background gene set" optional="true" multiple="false"/>
+		                </when>
+		                <when value="text">
+		                    <param type="text" name="bg_enrich_text" label="Custom background gene set identifiers (one per line):" area="true"/>
+		                </when>
+		          </conditional>
+
+		          <param type="select" name="bg_enrich_id_type" label="Custom background identifier type" display="radio" multiple="false">
+		              <option value="symbol">Primary gene symbol (HGNC) - e.g. KRAS</option>
+                      <option value="uniprot_acc">UniProt accession - e.g. P01116</option>
+                      <option value="entrezgene">NCBI Entrez gene identifier - e.g. 3845</option>
+                      <option value="ensembl_gene">Ensembl gene identifier - e.g. ENSG00000133703</option>
+                      <option value="ensembl_mrna">Ensembl transcript identifier - e.g. ENST00000311936</option>
+                      <option value="ensembl_protein">Ensembl protein identifier - e.g. ENSP00000308495</option>
+                      <option value="refseq_mrna">RefSeq mRNA identifier - e.g. NM_004985</option>
+                      <option value="refseq_protein">RefSeq protein identifier - e.g. NP_004976</option>
+		          </param>
+		          <param type="text" name="bg_enrich_description" label="Custom background gene set description" value="Custom background description"/>
+			</when>
+		</conditional>
+
+          <param type="float" name="p_value_cutoff_enrichment" label="P-value cutoff for enrichment tests (clusterProfiler)" value="0.05"/>
+          <param type="select" name="p_value_adjustment_method" label="P-value adjustment method (clusterProfiler)">
+              <option value="holm">holm</option>
+              <option value="hochberg">hochberg</option>
+              <option value="hommel">hommel</option>
+              <option value="bonferroni">bonferroni</option>
+              <option value="BH">BH</option>
+              <option value="BY">BY</option>
+              <option value="fdr">fdr</option>
+              <option value="none">none</option>
+          </param>
+          <param type="float" name="q_value_cutoff_enrichment" label="Q-value cutoff for enrichment tests to report as significant (clusterProfiler)" value="0.2"/>
+          <param type="integer" name="min_geneset_size" label="Minimum number of genes annotated by ontology term for testing (clusterProfiler)" value="10"/>
+          <param type="integer" name="max_geneset_size" label="Maximum number of genes annotated by ontology term for testing (clusterProfiler)" value="500"/>
+          <param name="simplify_go" type="boolean" label="Simplify GO enrichment results by removal of redundant terms (recommended)" truevalue="T" falsevalue="F" checked="true"/>
+          <param type="integer" name="num_terms_enrichment_plot" label="Number of top enriched Gene Ontology terms (max) to show in enrichment barplot" min="10" max="30" value="20"/>
+      </section>
+
+      <section title="" name=""/>
+      <section name="fitness" title="Options - gene fitness scores" expanded="true">
+          <param type="float" name="max_fitness_score" label="Maximum loss-of-fitness score (Bayes Factor from BAGEL) for genes retrieved from Project Score" value="-2" min="-5" max="0"/>
+      </section>
+      <section title="" name=""/>
+      <section name="protein_interactions" title="Options - protein-protein interaction network" expanded="true">
+          <param type="integer" name="ppi_add_nodes" label="Addition of interacting non-queryset proteins to the protein-protein interaction network (maximum number)" value="50" min="0" max="50"/>
+          <param type="integer" name="ppi_score_threshold" label="Minimum confidence score for interactions to be included in the network (STRING confidence: 0-1000)" value="900" min="400" max="1000"/>
+          <param name="show_drugs_in_ppi" type="boolean" label="Show anti-cancer drugs in protein-protein interaction network" truevalue="T" falsevalue="F" checked="true"/>
+		<param name="ppi_node_shadow" type="boolean" label="Add shadow to nodes in protein-protein interaction network" truevalue="T" falsevalue="F" checked="true"/>
+      </section>
+      <section title="" name=""/>
+      <section name="regulatory" title="Options - regulatory interactions" expanded="true">
+         <param type="select" name="min_confidence_reg_interaction" label = "Minimum confidence level of regulatory interactions included (DoRothEA - A:highest, D:lowest)">
+            <option value="D">D</option>
+            <option value="C">C</option>
+            <option value="B">B</option>
+            <option value="A">A</option>
+        </param>
+      </section>
+     <section title="" name=""/>
+
+	 <section name="subcellular_compartments" title="Options - Subcellular localizations" expanded="true">
+           <param type="integer" name="min_subcellcomp_confidence" label="Minimum confidence level for subcellular localization annotations" value="1" min="1" max="6"/>
+          <param name="show_cytosol" type="boolean" label="Show cytosol annotations (very common localization) in subcellular heatmap " truevalue="T" falsevalue="F" checked="false"/>
+      </section>
+      <section title="" name=""/>
+
+      <section name="disease" title="Options - Disease associations" expanded="true">
+          <param type="boolean" name="show_top_diseases_only" label="Show top disease assocations only" truevalue="T" falsevalue="F" checked="true"/>
+      </section>
+
+  </inputs>
+    <outputs>
+        <data format="xlsx" name="report2" label="$report_metadata.report_name - xlsx"/>
+        <data format="html" name="report1" label="$report_metadata.report_name - html"/>
+    </outputs>
+
+
+  <help><![CDATA[
+.. class:: infomark
+
+The query gene set is limited to n = 500 identifiers. A limited query gene set (e.g. n < 5) will in general reduce the relevance and significance of many oncoEnrichR report modules.
+
+-----
+
+**Dataset formats**
+
+The input dataset is in tabular_ format. The two output datasets are html_ and xlsx.
+
+.. _tabular: ${static_path}/formatHelp.html#tab
+.. _html: ${static_path}/formatHelp.html#html
+
+-----
+
+**What it does**
+
+*OncoEnrichR* is intended for exploratory analysis and prioritization of a candidate hits (referred to as *query set* below) from high-throughput cancer biology experiments. The tool queries a number of high-quality data resources in order to interpret the query gene set along various dimensions, examples being cancer aberration frequencies, protein-protein interactions, pathway enrichment, subcellular compartment localization, target druggability, gene fitness scores, and tissue/cell-type specificity.
+
+The results from the various analysis modules are provided in an interactive HTML report where the user can interrogate the results further. A multisheet Excel workbook is also provided for convience. The following resources are currently utilized for annotation and analysis:
+
+-  `Open Targets Platform <https://targetvalidation.org/>`_ - disease associations, drug-target associations, cancer hallmarks, and druggability/tractability rankings
+
+-  `The Cancer Genome Atlas <https://portal.gdc.cancer.gov/>`_ - gene aberration frequencies and co-expression patterns in approximately 10,000 primary tumor samples
+
+-  `The Human Protein Atlas <https://www.proteinatlas.org/>`_ - expression data for healthy human tissues (`GTex <https://gtexportal.org/home/>`_)/cell types, and prognostic gene expression associations in cancer (`The Pathology Atlas <https://www.proteinatlas.org/humanproteome/pathology/>`_)
+
+-  `Molecular Signatures Database (MSigDB) <http://software.broadinstitute.org/gsea/msigdb/index.jsp/>`_ - collection of annotated (e.g. towards pathways) gene sets for enrichment/overrepresentation analysis. This includes gene sets from `Gene Ontology <http://geneontology.org/>`_, `Reactome <https://reactome.org/>`_, `KEGG <https://www.genome.jp/kegg/pathway.html/>`_, `WikiPathways <https://www.wikipathways.org/index.php/WikiPathways/>`_, `BIOCARTA <https://maayanlab.cloud/Harmonizome/dataset/Biocarta+Pathways/>`_, as well as curated `immunologic <https://www.gsea-msigdb.org/gsea/msigdb/collections.jsp#C7/>`_ and `cancer-specific <https://www.gsea-msigdb.org/gsea/msigdb/collections.jsp#C6/>`_ signatures.
+
+-  `NetPath <http://www.netpath.org/>`_ - manually curated resource of signal transduction pathways in humans
+
+-  `STRING <https://string-db.org/>`_ - protein-protein interaction database
+
+-  `CellChatDB <http://www.cellchat.org/>`_ - database on ligand-receptor interactions
+
+-  `DoRothEA <https://saezlab.github.io/dorothea/>`_ - gene set resource containing signed transcription factor (TF) - target interactions
+
+-  `CORUM <https://mips.helmholtz-muenchen.de/corum/>`_ - protein complex database
+
+-  `Compleat <https://fgr.hms.harvard.edu/compleat>`_ - protein complex resource
+
+-  `ComplexPortal <https://www.ebi.ac.uk/complexportal/home/>`_ - manually curated, encyclopaedic resource of macromolecular complexes
+
+-  `hu.MAP2 <http://humap2.proteincomplexes.org/>`_ - human protein complex map
+
+-  `ComPPI <http://comppi.linkgroup.hu/>`_ - subcellular compartment database
+
+-  `CancerMine <http://bionlp.bcgsc.ca/cancermine/>`_ - literature-mined resource on cancer drivers, oncogenes and tumor suppressor genes
+
+-  `Network of Cancer Genes <http://ncg.kcl.ac.uk/>`_ - manually curated collection of cancer genes, healthy drivers and their properties
+
+-  `Project Score <https://score.depmap.sanger.ac.uk/>`_ - database on the effects on cancer cell line viability elicited by CRISPR-Cas9 mediated gene activation
+
+-  `Genetic determinants of survival in cancer <http://survival.cshl.edu/>`_ - resource on the prognostic impact of genetic aberrations (methylation, CNA, mutation, expression) in human cancers (TCGA)
+
+-  `Predicted synthetic lethality interactions <https://pubmed.ncbi.nlm.nih.gov/34529928/>`_ - comprehensive prediction of synthetic lethality interactions in human cancer cell lines
+
+The contents of the gene set analysis report attempt to answer the following questions related to the query set:
+
+-  Which diseases/tumor types are known to be associated with genes in the query set, and to what extent? Which genes are a classified as proto-oncogenes, tumor suppressors or cancer driver genes?
+
+-  Which query genes have been linked (through literature) to the various hallmarks of cancer?
+
+-  Which genes in the query set are poorly characterized or have an unknown function?
+
+-  Which proteins in the query set can be targeted by inhibitors for diffferent cancer conditions (early and late clinical development phases)? What is the tractability/druggability status for other targets in the query set?
+
+-  Which cancer-relevant protein complexes are involved for proteins in the query set?
+
+-  Are there known cancer-relevant regulatory interactions (transcription factor (TF) - target) found in the query set?
+
+-  Are there known ligand-receptor interactions in the query set?
+
+-  Which subcellular compartments (nucleus, cytosol, plasma membrane etc.) are dominant localizations for members of the query set?
+
+-  Are specific tissues or cell types enriched in the query set, considering healthy tissue/cell-type specific expression patterns (GTex/Human Protein Atlas) of query genes?
+
+-  Which protein-protein interactions are known within the query set? Are there interactions between members of the query set and other cancer-relevant proteins (e.g. proto-oncogenes, tumor-suppressors or predicted cancer drivers)? Which proteins constitute hubs in the protein-protein interaction network?
+
+-  Are there specific pathways, biological processes or molecular functions that are enriched within the query set, as compared to a reference/background set?
+
+-  Which members of the query set are frequently mutated in tumor sample cohorts (TCGA - SNVs/InDels / homozygous deletions / copy number amplifications)? What are the most frequent recurrent somatic variants (SNVs/InDels) in the query set genes?
+
+-  Which members of the query set are co-expressed (strong negative or positive correlations) with cancer-relevant genes (i.e. proto-oncogenes or tumor suppressors) in tumor sample cohorts (TCGA)?
+
+-  Which members of the query set are associated with better/worse survival in different cancers, considering mutation, expression, methylation or copy number levels in tumors?
+
+-  Which members of the query set are predicted as partners of synthetic lethality interactions?
+
+-  Which members of the query set are associated with cellular loss-of-fitness in CRISPR/Cas9 whole-genome drop out screens of cancer cell lines (i.e. reduction of cell viability elicited by a gene inactivation)? Which genes should be prioritized considering genomic biomarkers and fitness scores in combination?
+
+
+]]>
+  </help>
+
+ <citations>
+     <!-- Example of annotating a citation using a DOI. -->
+     <citation type="doi">10.48550/arXiv.2107.13247</citation>
+     <!-- Example of annotating a citation using a BibTex entry. -->
+  </citations>
+</tool>