# HG changeset patch
# User iuc
# Date 1642014465 0
# Node ID d0cbe6cc1f044e620db6c2095a3bec33c31a28ab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genomic_super_signature commit 1aadd5dce3b254e7714c2fdd39413029fd4b9b7a"
diff -r 000000000000 -r d0cbe6cc1f04 genomic_super_signature.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genomic_super_signature.xml Wed Jan 12 19:07:45 2022 +0000
@@ -0,0 +1,213 @@
+
+ interpretation of RNAseq experiments
+
+ 1.2.0
+ 0
+
+
+ bioconductor-genomicsupersignature
+ r-optparse
+ r-wordcloud
+ bioconductor-biocstyle
+ r-magick
+ tzdata
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ `__ -
+ bloodCellMarkersIRISDMAP, svmMarkers, and canonicalPathways
+
+Outputs
+-------
+
+There are four categories of outputs from this tool, which is one html
+file and three csv tabular files. The actual number of csv files will
+vary depending on the parameter, *–numOut*, and the validated RAVs.
+
+validate.csv
+~~~~~~~~~~~~
+
++--------------------------+--------------------------------------------+
+| Column | Description |
++==========================+============================================+
+| score | the maximum pearson correlation |
+| | coefficient between the top 8 PCs of the |
+| | input and RAVs |
++--------------------------+--------------------------------------------+
+| PC | one of the top 8 PCs of the input, which |
+| | gives the highest *score* |
++--------------------------+--------------------------------------------+
+| sw | the average silhouette width of the RAV |
++--------------------------+--------------------------------------------+
+| cl_size | the size of each RAV |
++--------------------------+--------------------------------------------+
+| cl_num | the RAV number |
++--------------------------+--------------------------------------------+
+
+Genesets
+~~~~~~~~
+
+This is the enriched gene sets for the target RAV, calculated from the
+ranked gene list. Gene sets with the adjusted p-value < 0.05 are
+included.
+
+=========== ================================
+Column Description
+=========== ================================
+Description name of the gene sets
+NES normalized enrichment score (ES)
+pvalue statistical significance
+qvalues p-value adjusted for the FDR
+=========== ================================
+
+Literatures
+~~~~~~~~~~~
+
+========= ======================
+Column Description
+========= ======================
+studyName study accession
+title the title of the study
+========= ======================
+
+report.html
+~~~~~~~~~~~
+
+A html file with the summary of the main analyses by
+GenomicSuperSignature. It includes MeSH terms in word cloud and an
+interactive plot overviewing the validated RAVs, in addition to the
+previews of the tabular output files.
+
+Citations
+---------
+
+Oh, S., Geistlinger, L., Ramos, M., Taroni, J.N., Carey, V.J., Greene,
+C.S., Waldron, L., & Davis, S.R. (2021). GenomicSuperSignature:
+interpretation of RNA-seq experiments through robust, efficient
+comparison to public databases. bioRxiv.
+
+References
+----------
+
+| GenomicSuperSignature package:
+ `webpage `__
+| GenomicSuperSignature usecases:
+ `webpage `__
+ ]]>
+
+ 10.1101/2021.05.26.445900
+
+
diff -r 000000000000 -r d0cbe6cc1f04 gss.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gss.R Wed Jan 12 19:07:45 2022 +0000
@@ -0,0 +1,114 @@
+suppressPackageStartupMessages(library(optparse))
+
+### Parsing command line -------------------------------------------------------
+option_list <- list(
+ make_option(c("--input"), type = "character",
+ default = NULL, help = "Count matrix in tsv format"),
+ make_option(c("--model"), type = "character",
+ default = NULL, help = "RAVmodel to apply.
+ Currently 'C2' and 'PLIERpriors' are available"),
+ make_option(c("--method"), type = "character",
+ default = formals(GenomicSuperSignature::validate)$method),
+ make_option(c("--maxFrom"), type = "character",
+ default = formals(GenomicSuperSignature::validate)$maxFrom),
+ make_option(c("--level"), type = "character",
+ default = formals(GenomicSuperSignature::validate)$level),
+ make_option(c("--scale"), type = "character",
+ default = formals(GenomicSuperSignature::validate)$scale),
+ make_option(c("--outDir"), type = "character",
+ default = NULL, help = "Output file name"),
+ make_option(c("--validate"), type = "character",
+ default = NULL, help = "Path to save validate.csv"),
+ make_option(c("--html"), type = "character",
+ default = NULL, help = "Path to save HTML report"),
+ make_option(c("--numOut"), type = "integer",
+ default = 3, help = "The number of top validated RAVs to check"),
+ make_option(c("--toolDir"), type = "character",
+ default = ".", help = "Directory containing the tool scripts (e.g. gss.Rmd")
+)
+
+opt <- parse_args(OptionParser(option_list = option_list),
+ args = commandArgs(trailingOnly = TRUE))
+input <- opt$input
+model <- opt$model
+out_dir <- opt$outDir
+num_out <- opt$numOut
+
+if (is.null(input)) stop("Need --input.")
+if (is.null(model)) stop("Need --model.")
+if (is.null(out_dir)) stop("Need --outDir.")
+
+input_name <- basename(tools::file_path_sans_ext(input))
+out_dir <- normalizePath(out_dir)
+
+suppressPackageStartupMessages(library(GenomicSuperSignature))
+dat <- as.matrix(read.table(file = input, header = TRUE, sep = "\t",
+ row.names = 1))
+if (model %in% c("C2", "PLIERpriors")) {
+ rav_model <- getModel(model)
+} else {
+ rav_model <- readRDS(model)
+}
+
+
+
+### validate -------------------------------------------------------------------
+val_all <- validate(dat, rav_model)
+validated_ind <- validatedSignatures(val_all, num.out = num_out,
+ swCutoff = 0, indexOnly = TRUE)
+n <- min(num_out, length(validated_ind), na.rm = TRUE)
+
+### Save tables in csv ---------------------------------------------------------
+# Validation
+if (is.null(opt$validate)) {
+ output_fname <- file.path(out_dir, paste0(input_name, "_validate.csv"))
+} else {
+ output_fname <- opt$validate
+}
+write.csv(val_all,
+ file = output_fname,
+ row.names = TRUE)
+
+# GSEA
+for (i in seq_len(n)) {
+ rav_num <- validated_ind[i]
+ rav_name <- paste0("RAV", rav_num)
+ res <- gsea(rav_model)[[rav_name]]
+
+ output_fname <- paste0(input_name, "_genesets_RAV", rav_num, ".csv")
+ write.csv(res,
+ file = file.path(out_dir, output_fname),
+ row.names = TRUE)
+}
+
+# Related prior studies
+for (i in seq_len(n)) {
+ rav_num <- validated_ind[i]
+ res <- findStudiesInCluster(rav_model, rav_num)
+
+ output_fname <- paste0(input_name, "_literatures_RAV", rav_num, ".csv")
+ write.csv(res,
+ file = file.path(out_dir, output_fname),
+ row.names = TRUE)
+}
+
+### Create a report ------------------------------------------------------------
+if (is.null(opt$html)) {
+ output_fname <- file.path(out_dir, paste0("GSS-", input_name, "-",
+ format(Sys.Date(), format = "%Y%m%d"), ".html"))
+} else {
+ output_fname <- opt$html
+
+}
+rmarkdown::render(
+ file.path(opt$toolDir, "gss.Rmd"), params = list(
+ val_all = val_all,
+ dat = dat,
+ RAVmodel = rav_model,
+ inputName = input_name,
+ numOut = num_out
+ ),
+ output_file = output_fname,
+ intermediates_dir = ".",
+ knit_root_dir = "."
+)
diff -r 000000000000 -r d0cbe6cc1f04 gss.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gss.Rmd Wed Jan 12 19:07:45 2022 +0000
@@ -0,0 +1,122 @@
+---
+title: "Analysis by GenomicSuperSignature"
+date: "`r Sys.Date()`"
+output:
+ BiocStyle::html_document:
+ toc: true
+ toc_float: false
+ toc_depth: 3
+params:
+ val_all: val_all
+ dat: dat
+ RAVmodel: RAVmodel
+ inputName: inputName
+ numOut: numOut
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = FALSE)
+```
+
+# RAVs best represents your dataset
+The *validation* provides a quantitative representation of the relevance
+between your dataset and RAVs. Below shows the top 6 validated RAVs and
+the complete result is saved as `{input_name}_validate.csv`.
+
+```{r}
+head(params$val_all)
+```
+
+## Heatmap Table
+`heatmapTable` takes validation results as its input and displays them into
+a two panel table: the top panel shows the average silhouette width (avg.sw)
+and the bottom panel displays the validation score.
+
+`heatmapTable` can display different subsets of the validation output. For
+example, if you specify `scoreCutoff`, any validation result above that score
+will be shown. If you specify the number (n) of top validation results through
+`num.out`, the output will be a n-columned heatmap table. You can also use the
+average silhouette width (`swCutoff`), the size of cluster (`clsizecutoff`),
+one of the top 8 PCs from the dataset (`whichPC`).
+
+Here, we print out top `r params$numOut` validated RAVs with average silhouette
+width above 0.
+
+```{r out.height="45%", out.width="45%", message=FALSE, warning=FALSE}
+heatmapTable(params$val_all, num.out = params$numOut, swCutoff = 0)
+```
+
+## Interactive Graph
+Under the default condition, `plotValidate` plots validation results of all non
+single-element RAVs in one graph, where x-axis represents average silhouette
+width of the RAVs (a quality control measure of RAVs) and y-axis represents
+validation score. We recommend users to focus on RAVs with higher validation
+score and use average silhouette width as a secondary criteria.
+
+```{r out.height="80%", out.width="80%"}
+plotValidate(params$val_all, interactive = TRUE)
+```
+
+Note that `interactive = TRUE` will result in a zoomable, interactive plot that
+included tooltips, which is saved as `{input_name}_validate_plot.html` file.
+
+You can hover each data point for more information:
+
+- **sw** : the average silhouette width of the cluster
+- **score** : the top validation score between 8 PCs of the dataset and RAVs
+- **cl_size** : the size of RAVs, represented by the dot size
+- **cl_num** : the RAV number. You need this index to find more information
+about the RAV.
+- **PC** : test dataset's PC number that validates the given RAV. Because we
+used top 8 PCs of the test dataset for validation, there are 8 categories.
+
+If you double-click the PC legend on the right, you will enter an
+individual display mode where you can add an additional group of data
+point by single-click.
+
+
+# Prior information associated to your dataset
+```{r echo=FALSE}
+validated_ind <- validatedSignatures(params$val_all, num.out = params$numOut,
+ swCutoff = 0, indexOnly = TRUE)
+
+# In case, there are fewer validated_ind than the number of outputs user set
+n <- min(params$numOut, length(validated_ind), na.rm = TRUE)
+```
+
+## MeSH terms in wordcloud
+```{r out.height="60%", out.width="60%", fig.width=8, fig.height=8}
+for (i in seq_len(n)) {
+ set.seed(1)
+ print(paste0("MeSH terms related to RAV", validated_ind[i]))
+ drawWordcloud(params$RAVmodel, validated_ind[i])
+}
+```
+
+## GSEA
+The complete result is saved as `{input_name}_genesets_RAV*.csv`.
+```{r}
+res_all <- vector(mode = "list", length = n)
+for (i in seq_len(n)) {
+ RAVnum <- validated_ind[i]
+ RAVname <- paste0("RAV", RAVnum)
+ res <- gsea(params$RAVmodel)[[RAVname]]
+ res_all[[i]] <- head(res)
+ names(res_all)[i] <- paste0("Enriched gene sets for RAV", validated_ind[i])
+}
+res_all
+```
+
+## Publication
+The complete result is saved as `{input_name}_literatures_RAV*.csv`.
+```{r}
+res_all <- vector(mode = "list", length = n)
+for (i in seq_len(n)) {
+ RAVnum <- validated_ind[i]
+ res <- findStudiesInCluster(params$RAVmodel, RAVnum, studyTitle = TRUE)
+ res_all[[i]] <- head(res)
+ names(res_all)[i] <- paste0("Studies related to RAV", validated_ind[i])
+}
+res_all
+```
+
diff -r 000000000000 -r d0cbe6cc1f04 test-data/bcellViperExpr_10C.tsv.gz
Binary file test-data/bcellViperExpr_10C.tsv.gz has changed
diff -r 000000000000 -r d0cbe6cc1f04 test-data/genomic_super_signature_ravmodels.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genomic_super_signature_ravmodels.loc Wed Jan 12 19:07:45 2022 +0000
@@ -0,0 +1,2 @@
+#
+microRAVmodel hg38 0 microRAVmodel (test data only) ${__HERE__}/microRAVmodel.rds
diff -r 000000000000 -r d0cbe6cc1f04 test-data/microRAVmodel.rds
Binary file test-data/microRAVmodel.rds has changed
diff -r 000000000000 -r d0cbe6cc1f04 tool-data/genomic_super_signature_ravmodels.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/genomic_super_signature_ravmodels.loc.sample Wed Jan 12 19:07:45 2022 +0000
@@ -0,0 +1,1 @@
+#
diff -r 000000000000 -r d0cbe6cc1f04 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Jan 12 19:07:45 2022 +0000
@@ -0,0 +1,7 @@
+
+
+