# HG changeset patch # User sigven # Date 1664292719 0 # Node ID 3c61ef74a17635c03f89c02b889a53224096a037 # Parent 28d4e824d3d33902012a963f3e225d0879b3359b Uploaded diff -r 28d4e824d3d3 -r 3c61ef74a176 oncoenrichr_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/oncoenrichr_wrapper.xml Tue Sep 27 15:31:59 2022 +0000 @@ -0,0 +1,391 @@ + + Cancer-dedicated gene set interpretation + + sigven/oncoenrichr:1.3.2 + + query_text.csv && + #set input_file = './query_text.csv' + #else if $query_set.query_choice.query_input == "file" + ln -s $query_set.query_choice.query_file "$query_set.query_choice.query_file.element_identifier" && + #set input_file = './' + str($query_set.query_choice.query_file.element_identifier) + #end if + + #set background_file = '' + #if $fun_enrich.custom_bgset.def_background + #if $fun_enrich.custom_bgset.bg_choice.bg_source == "text" + echo $fun_enrich.custom_bgset.bg_choice.bg_enrich_text | sed 's/__cn__/\n/g' > custom_bgset.csv && + #set background_file = './custom_bgset.csv' + #else if $fun_enrich.custom_bgset.bg_choice.bg_source == "file" and $fun_enrich.custom_bgset.bg_choice.bg_enrich_file + ln -s $fun_enrich.custom_bgset.bg_choice.bg_enrich_file background_text.csv && + #set background_file = './custom_bgset.csv' + #else + #set background_file = '' + #end if + #end if + + R -e 'suppressPackageStartupMessages(library(oncoEnrichR)); + suppressWarnings(load(system.file("internal_db", "oedb.rda", package = "oncoEnrichR"))); + gene_data <- read.csv("$input_file", stringsAsFactors = F, header = F); + oe_report <- oncoEnrichR::onco_enrich( + query = gene_data[[1]], + oeDB = oedb, + #if $query_set.query_id_type + query_id_type = "$query_set.query_id_type", + #end if + ignore_id_err = $query_set.ignore_id_err, + + #if $report_metadata.project_title + project_title = "$report_metadata.project_title", + #end if + #if $report_metadata.project_owner + project_owner = "$report_metadata.project_owner", + #end if + #if $report_metadata.project_description + project_description = "$report_metadata.project_description", + #end if + + show_enrichment = $modules.show_enrichment, + show_ppi = $modules.show_ppi, + show_disease = $modules.show_disease, + show_cancer_hallmarks = $modules.show_cancer_hallmarks, + show_drug = $modules.show_drug, + show_aberration = $modules.show_aberration, + show_coexpression = $modules.show_coexpression, + show_subcell_comp = $modules.show_subcell_comp, + show_complex = $modules.show_complex, + show_domain = $modules.show_domain, + show_fitness = $modules.show_fitness, + show_cell_tissue = $modules.show_cell_tissue, + show_ligand_receptor = $modules.show_ligand_receptor, + show_regulatory = $modules.show_regulatory, + show_prognostic = $modules.show_prognostic, + show_unknown_function = $modules.show_unknown_function, + show_synleth = $modules.show_synleth, + + #if $background_file + bgset = read.csv("$background_file", stringsAsFactors = F, header = F)[[1]], + #if $fun_enrich.custom_bgset.bg_enrich_id_type + bgset_id_type = "$fun_enrich.custom_bgset.bg_enrich_id_type", + #end if + #if $fun_enrich.custom_bgset.bg_enrich_description + bgset_description = "$fun_enrich.custom_bgset.bg_enrich_description", + #end if + #else + bgset = NULL, + #end if + + #if $fun_enrich.p_value_cutoff_enrichment + p_value_cutoff_enrichment = $fun_enrich.p_value_cutoff_enrichment, + #end if + #if $fun_enrich.p_value_adjustment_method + p_value_adjustment_method = "$fun_enrich.p_value_adjustment_method", + #end if + #if $fun_enrich.q_value_cutoff_enrichment + q_value_cutoff_enrichment = $fun_enrich.q_value_cutoff_enrichment, + #end if + #if $fun_enrich.min_geneset_size + min_geneset_size = $fun_enrich.min_geneset_size, + #end if + #if $fun_enrich.max_geneset_size + max_geneset_size = $fun_enrich.max_geneset_size, + #end if + + #if $protein_interactions.ppi_add_nodes + ppi_add_nodes = $protein_interactions.ppi_add_nodes, + #end if + #if $protein_interactions.ppi_score_threshold + ppi_score_threshold = $protein_interactions.ppi_score_threshold, + #end if + show_drugs_in_ppi = $protein_interactions.show_drugs_in_ppi, + ppi_node_shadow = $protein_interactions.ppi_node_shadow, + + #if $subcellular_compartments.min_subcellcomp_confidence + min_subcellcomp_confidence = $subcellular_compartments.min_subcellcomp_confidence, + #end if + #if $fitness.max_fitness_score + max_fitness_score = $fitness.max_fitness_score, + #end if + subcellcomp_show_cytosol = $subcellular_compartments.show_cytosol, + #if $disease.show_top_diseases_only + show_top_diseases_only = $disease.show_top_diseases_only, + #end if + + min_confidence_reg_interaction = "$regulatory.min_confidence_reg_interaction", + num_terms_enrichment_plot = $fun_enrich.num_terms_enrichment_plot, + simplify_go = $fun_enrich.simplify_go, + html_floating_toc = $report_metadata.html_floating_toc, + html_report_theme = "$report_metadata.html_report_theme", + galaxy = TRUE + ); + + oncoEnrichR::write(report = oe_report, oeDB = oedb, file = "$report1", format = "html", selfcontained_html = F, extra_files_path = "$report1.extra_files_path", overwrite = T, ignore_file_extension = T); + oncoEnrichR::write(report = oe_report, oeDB = oedb, file = "$report2", format = "excel", overwrite = T, ignore_file_extension = T)' 2>&1 + + ]]> + +
+
+ + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + + + + + + + + + + + + + + + + +
+ +
+
+ + + + + + + + + + + + + + + + + +
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ +
+
+
+ + + + +
+
+
+ + + + + + +
+
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + `_ - disease associations, drug-target associations, cancer hallmarks, and druggability/tractability rankings + +- `The Cancer Genome Atlas `_ - gene aberration frequencies and co-expression patterns in approximately 10,000 primary tumor samples + +- `The Human Protein Atlas `_ - expression data for healthy human tissues (`GTex `_)/cell types, and prognostic gene expression associations in cancer (`The Pathology Atlas `_) + +- `Molecular Signatures Database (MSigDB) `_ - collection of annotated (e.g. towards pathways) gene sets for enrichment/overrepresentation analysis. This includes gene sets from `Gene Ontology `_, `Reactome `_, `KEGG `_, `WikiPathways `_, `BIOCARTA `_, as well as curated `immunologic `_ and `cancer-specific `_ signatures. + +- `NetPath `_ - manually curated resource of signal transduction pathways in humans + +- `STRING `_ - protein-protein interaction database + +- `CellChatDB `_ - database on ligand-receptor interactions + +- `DoRothEA `_ - gene set resource containing signed transcription factor (TF) - target interactions + +- `CORUM `_ - protein complex database + +- `Compleat `_ - protein complex resource + +- `ComplexPortal `_ - manually curated, encyclopaedic resource of macromolecular complexes + +- `hu.MAP2 `_ - human protein complex map + +- `ComPPI `_ - subcellular compartment database + +- `CancerMine `_ - literature-mined resource on cancer drivers, oncogenes and tumor suppressor genes + +- `Network of Cancer Genes `_ - manually curated collection of cancer genes, healthy drivers and their properties + +- `Project Score `_ - database on the effects on cancer cell line viability elicited by CRISPR-Cas9 mediated gene activation + +- `Genetic determinants of survival in cancer `_ - resource on the prognostic impact of genetic aberrations (methylation, CNA, mutation, expression) in human cancers (TCGA) + +- `Predicted synthetic lethality interactions `_ - comprehensive prediction of synthetic lethality interactions in human cancer cell lines + +The contents of the gene set analysis report attempt to answer the following questions related to the query set: + +- Which diseases/tumor types are known to be associated with genes in the query set, and to what extent? Which genes are a classified as proto-oncogenes, tumor suppressors or cancer driver genes? + +- Which query genes have been linked (through literature) to the various hallmarks of cancer? + +- Which genes in the query set are poorly characterized or have an unknown function? + +- Which proteins in the query set can be targeted by inhibitors for diffferent cancer conditions (early and late clinical development phases)? What is the tractability/druggability status for other targets in the query set? + +- Which cancer-relevant protein complexes are involved for proteins in the query set? + +- Are there known cancer-relevant regulatory interactions (transcription factor (TF) - target) found in the query set? + +- Are there known ligand-receptor interactions in the query set? + +- Which subcellular compartments (nucleus, cytosol, plasma membrane etc.) are dominant localizations for members of the query set? + +- Are specific tissues or cell types enriched in the query set, considering healthy tissue/cell-type specific expression patterns (GTex/Human Protein Atlas) of query genes? + +- Which protein-protein interactions are known within the query set? Are there interactions between members of the query set and other cancer-relevant proteins (e.g. proto-oncogenes, tumor-suppressors or predicted cancer drivers)? Which proteins constitute hubs in the protein-protein interaction network? + +- Are there specific pathways, biological processes or molecular functions that are enriched within the query set, as compared to a reference/background set? + +- Which members of the query set are frequently mutated in tumor sample cohorts (TCGA - SNVs/InDels / homozygous deletions / copy number amplifications)? What are the most frequent recurrent somatic variants (SNVs/InDels) in the query set genes? + +- Which members of the query set are co-expressed (strong negative or positive correlations) with cancer-relevant genes (i.e. proto-oncogenes or tumor suppressors) in tumor sample cohorts (TCGA)? + +- Which members of the query set are associated with better/worse survival in different cancers, considering mutation, expression, methylation or copy number levels in tumors? + +- Which members of the query set are predicted as partners of synthetic lethality interactions? + +- Which members of the query set are associated with cellular loss-of-fitness in CRISPR/Cas9 whole-genome drop out screens of cancer cell lines (i.e. reduction of cell viability elicited by a gene inactivation)? Which genes should be prioritized considering genomic biomarkers and fitness scores in combination? + + +]]> + + + + + 10.48550/arXiv.2107.13247 + + +