# HG changeset patch # User iuc # Date 1642014465 0 # Node ID d0cbe6cc1f044e620db6c2095a3bec33c31a28ab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genomic_super_signature commit 1aadd5dce3b254e7714c2fdd39413029fd4b9b7a" diff -r 000000000000 -r d0cbe6cc1f04 genomic_super_signature.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genomic_super_signature.xml Wed Jan 12 19:07:45 2022 +0000 @@ -0,0 +1,213 @@ + + interpretation of RNAseq experiments + + 1.2.0 + 0 + + + bioconductor-genomicsupersignature + r-optparse + r-wordcloud + bioconductor-biocstyle + r-magick + tzdata + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + `__ - + bloodCellMarkersIRISDMAP, svmMarkers, and canonicalPathways + +Outputs +------- + +There are four categories of outputs from this tool, which is one html +file and three csv tabular files. The actual number of csv files will +vary depending on the parameter, *–numOut*, and the validated RAVs. + +validate.csv +~~~~~~~~~~~~ + ++--------------------------+--------------------------------------------+ +| Column | Description | ++==========================+============================================+ +| score | the maximum pearson correlation | +| | coefficient between the top 8 PCs of the | +| | input and RAVs | ++--------------------------+--------------------------------------------+ +| PC | one of the top 8 PCs of the input, which | +| | gives the highest *score* | ++--------------------------+--------------------------------------------+ +| sw | the average silhouette width of the RAV | ++--------------------------+--------------------------------------------+ +| cl_size | the size of each RAV | ++--------------------------+--------------------------------------------+ +| cl_num | the RAV number | ++--------------------------+--------------------------------------------+ + +Genesets +~~~~~~~~ + +This is the enriched gene sets for the target RAV, calculated from the +ranked gene list. Gene sets with the adjusted p-value < 0.05 are +included. + +=========== ================================ +Column Description +=========== ================================ +Description name of the gene sets +NES normalized enrichment score (ES) +pvalue statistical significance +qvalues p-value adjusted for the FDR +=========== ================================ + +Literatures +~~~~~~~~~~~ + +========= ====================== +Column Description +========= ====================== +studyName study accession +title the title of the study +========= ====================== + +report.html +~~~~~~~~~~~ + +A html file with the summary of the main analyses by +GenomicSuperSignature. It includes MeSH terms in word cloud and an +interactive plot overviewing the validated RAVs, in addition to the +previews of the tabular output files. + +Citations +--------- + +Oh, S., Geistlinger, L., Ramos, M., Taroni, J.N., Carey, V.J., Greene, +C.S., Waldron, L., & Davis, S.R. (2021). GenomicSuperSignature: +interpretation of RNA-seq experiments through robust, efficient +comparison to public databases. bioRxiv. + +References +---------- + +| GenomicSuperSignature package: + `webpage `__ +| GenomicSuperSignature usecases: + `webpage `__ + ]]> + + 10.1101/2021.05.26.445900 + + diff -r 000000000000 -r d0cbe6cc1f04 gss.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gss.R Wed Jan 12 19:07:45 2022 +0000 @@ -0,0 +1,114 @@ +suppressPackageStartupMessages(library(optparse)) + +### Parsing command line ------------------------------------------------------- +option_list <- list( + make_option(c("--input"), type = "character", + default = NULL, help = "Count matrix in tsv format"), + make_option(c("--model"), type = "character", + default = NULL, help = "RAVmodel to apply. + Currently 'C2' and 'PLIERpriors' are available"), + make_option(c("--method"), type = "character", + default = formals(GenomicSuperSignature::validate)$method), + make_option(c("--maxFrom"), type = "character", + default = formals(GenomicSuperSignature::validate)$maxFrom), + make_option(c("--level"), type = "character", + default = formals(GenomicSuperSignature::validate)$level), + make_option(c("--scale"), type = "character", + default = formals(GenomicSuperSignature::validate)$scale), + make_option(c("--outDir"), type = "character", + default = NULL, help = "Output file name"), + make_option(c("--validate"), type = "character", + default = NULL, help = "Path to save validate.csv"), + make_option(c("--html"), type = "character", + default = NULL, help = "Path to save HTML report"), + make_option(c("--numOut"), type = "integer", + default = 3, help = "The number of top validated RAVs to check"), + make_option(c("--toolDir"), type = "character", + default = ".", help = "Directory containing the tool scripts (e.g. gss.Rmd") +) + +opt <- parse_args(OptionParser(option_list = option_list), + args = commandArgs(trailingOnly = TRUE)) +input <- opt$input +model <- opt$model +out_dir <- opt$outDir +num_out <- opt$numOut + +if (is.null(input)) stop("Need --input.") +if (is.null(model)) stop("Need --model.") +if (is.null(out_dir)) stop("Need --outDir.") + +input_name <- basename(tools::file_path_sans_ext(input)) +out_dir <- normalizePath(out_dir) + +suppressPackageStartupMessages(library(GenomicSuperSignature)) +dat <- as.matrix(read.table(file = input, header = TRUE, sep = "\t", + row.names = 1)) +if (model %in% c("C2", "PLIERpriors")) { + rav_model <- getModel(model) +} else { + rav_model <- readRDS(model) +} + + + +### validate ------------------------------------------------------------------- +val_all <- validate(dat, rav_model) +validated_ind <- validatedSignatures(val_all, num.out = num_out, + swCutoff = 0, indexOnly = TRUE) +n <- min(num_out, length(validated_ind), na.rm = TRUE) + +### Save tables in csv --------------------------------------------------------- +# Validation +if (is.null(opt$validate)) { + output_fname <- file.path(out_dir, paste0(input_name, "_validate.csv")) +} else { + output_fname <- opt$validate +} +write.csv(val_all, + file = output_fname, + row.names = TRUE) + +# GSEA +for (i in seq_len(n)) { + rav_num <- validated_ind[i] + rav_name <- paste0("RAV", rav_num) + res <- gsea(rav_model)[[rav_name]] + + output_fname <- paste0(input_name, "_genesets_RAV", rav_num, ".csv") + write.csv(res, + file = file.path(out_dir, output_fname), + row.names = TRUE) +} + +# Related prior studies +for (i in seq_len(n)) { + rav_num <- validated_ind[i] + res <- findStudiesInCluster(rav_model, rav_num) + + output_fname <- paste0(input_name, "_literatures_RAV", rav_num, ".csv") + write.csv(res, + file = file.path(out_dir, output_fname), + row.names = TRUE) +} + +### Create a report ------------------------------------------------------------ +if (is.null(opt$html)) { + output_fname <- file.path(out_dir, paste0("GSS-", input_name, "-", + format(Sys.Date(), format = "%Y%m%d"), ".html")) +} else { + output_fname <- opt$html + +} +rmarkdown::render( + file.path(opt$toolDir, "gss.Rmd"), params = list( + val_all = val_all, + dat = dat, + RAVmodel = rav_model, + inputName = input_name, + numOut = num_out + ), + output_file = output_fname, + intermediates_dir = ".", + knit_root_dir = "." +) diff -r 000000000000 -r d0cbe6cc1f04 gss.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gss.Rmd Wed Jan 12 19:07:45 2022 +0000 @@ -0,0 +1,122 @@ +--- +title: "Analysis by GenomicSuperSignature" +date: "`r Sys.Date()`" +output: + BiocStyle::html_document: + toc: true + toc_float: false + toc_depth: 3 +params: + val_all: val_all + dat: dat + RAVmodel: RAVmodel + inputName: inputName + numOut: numOut +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = FALSE) +``` + +# RAVs best represents your dataset +The *validation* provides a quantitative representation of the relevance +between your dataset and RAVs. Below shows the top 6 validated RAVs and +the complete result is saved as `{input_name}_validate.csv`. + +```{r} +head(params$val_all) +``` + +## Heatmap Table +`heatmapTable` takes validation results as its input and displays them into +a two panel table: the top panel shows the average silhouette width (avg.sw) +and the bottom panel displays the validation score. + +`heatmapTable` can display different subsets of the validation output. For +example, if you specify `scoreCutoff`, any validation result above that score +will be shown. If you specify the number (n) of top validation results through +`num.out`, the output will be a n-columned heatmap table. You can also use the +average silhouette width (`swCutoff`), the size of cluster (`clsizecutoff`), +one of the top 8 PCs from the dataset (`whichPC`). + +Here, we print out top `r params$numOut` validated RAVs with average silhouette +width above 0. + +```{r out.height="45%", out.width="45%", message=FALSE, warning=FALSE} +heatmapTable(params$val_all, num.out = params$numOut, swCutoff = 0) +``` + +## Interactive Graph +Under the default condition, `plotValidate` plots validation results of all non +single-element RAVs in one graph, where x-axis represents average silhouette +width of the RAVs (a quality control measure of RAVs) and y-axis represents +validation score. We recommend users to focus on RAVs with higher validation +score and use average silhouette width as a secondary criteria. + +```{r out.height="80%", out.width="80%"} +plotValidate(params$val_all, interactive = TRUE) +``` + +Note that `interactive = TRUE` will result in a zoomable, interactive plot that +included tooltips, which is saved as `{input_name}_validate_plot.html` file. + +You can hover each data point for more information: + +- **sw** : the average silhouette width of the cluster +- **score** : the top validation score between 8 PCs of the dataset and RAVs +- **cl_size** : the size of RAVs, represented by the dot size +- **cl_num** : the RAV number. You need this index to find more information +about the RAV. +- **PC** : test dataset's PC number that validates the given RAV. Because we +used top 8 PCs of the test dataset for validation, there are 8 categories. + +If you double-click the PC legend on the right, you will enter an +individual display mode where you can add an additional group of data +point by single-click. + + +# Prior information associated to your dataset +```{r echo=FALSE} +validated_ind <- validatedSignatures(params$val_all, num.out = params$numOut, + swCutoff = 0, indexOnly = TRUE) + +# In case, there are fewer validated_ind than the number of outputs user set +n <- min(params$numOut, length(validated_ind), na.rm = TRUE) +``` + +## MeSH terms in wordcloud +```{r out.height="60%", out.width="60%", fig.width=8, fig.height=8} +for (i in seq_len(n)) { + set.seed(1) + print(paste0("MeSH terms related to RAV", validated_ind[i])) + drawWordcloud(params$RAVmodel, validated_ind[i]) +} +``` + +## GSEA +The complete result is saved as `{input_name}_genesets_RAV*.csv`. +```{r} +res_all <- vector(mode = "list", length = n) +for (i in seq_len(n)) { + RAVnum <- validated_ind[i] + RAVname <- paste0("RAV", RAVnum) + res <- gsea(params$RAVmodel)[[RAVname]] + res_all[[i]] <- head(res) + names(res_all)[i] <- paste0("Enriched gene sets for RAV", validated_ind[i]) +} +res_all +``` + +## Publication +The complete result is saved as `{input_name}_literatures_RAV*.csv`. +```{r} +res_all <- vector(mode = "list", length = n) +for (i in seq_len(n)) { + RAVnum <- validated_ind[i] + res <- findStudiesInCluster(params$RAVmodel, RAVnum, studyTitle = TRUE) + res_all[[i]] <- head(res) + names(res_all)[i] <- paste0("Studies related to RAV", validated_ind[i]) +} +res_all +``` + diff -r 000000000000 -r d0cbe6cc1f04 test-data/bcellViperExpr_10C.tsv.gz Binary file test-data/bcellViperExpr_10C.tsv.gz has changed diff -r 000000000000 -r d0cbe6cc1f04 test-data/genomic_super_signature_ravmodels.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genomic_super_signature_ravmodels.loc Wed Jan 12 19:07:45 2022 +0000 @@ -0,0 +1,2 @@ +# +microRAVmodel hg38 0 microRAVmodel (test data only) ${__HERE__}/microRAVmodel.rds diff -r 000000000000 -r d0cbe6cc1f04 test-data/microRAVmodel.rds Binary file test-data/microRAVmodel.rds has changed diff -r 000000000000 -r d0cbe6cc1f04 tool-data/genomic_super_signature_ravmodels.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/genomic_super_signature_ravmodels.loc.sample Wed Jan 12 19:07:45 2022 +0000 @@ -0,0 +1,1 @@ +# diff -r 000000000000 -r d0cbe6cc1f04 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Jan 12 19:07:45 2022 +0000 @@ -0,0 +1,7 @@ + + + + value, dbkey, version, name, path + +
+
diff -r 000000000000 -r d0cbe6cc1f04 tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Wed Jan 12 19:07:45 2022 +0000 @@ -0,0 +1,7 @@ + + + + value, dbkey, version, name, path + +
+