Mercurial > repos > iuc > multigsea
diff multiGSEA.R @ 0:28e29a3d0eda draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/multigsea commit 5c1b8a2b105a80e236f88e71a743147d79925ac4
author | iuc |
---|---|
date | Wed, 07 Jun 2023 19:48:50 +0000 |
parents | |
children | e48b10ce08b8 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/multiGSEA.R Wed Jun 07 19:48:50 2023 +0000 @@ -0,0 +1,172 @@ +library(multiGSEA, + quietly = TRUE, + warn.conflicts = FALSE) +library(argparse, quietly = TRUE, warn.conflicts = FALSE) + +################################################################################ +### Input Processing +################################################################################ + + +# Collect arguments from command line +parser <- ArgumentParser(description = "multiGSEA R script") + +parser$add_argument("--transcriptomics", required = FALSE, + help = "Transcriptomics data") +parser$add_argument( + "--transcriptome_ids", + required = FALSE, + help = "Transcriptomics ids", + default = "SYMBOL" +) +parser$add_argument("--proteomics", required = FALSE, + help = "Proteomics data") +parser$add_argument( + "--proteome_ids", + required = FALSE, + help = "Proteomics ids", + default = "SYMBOL" +) +parser$add_argument("--metabolomics", required = FALSE, + help = "Metabolomics data") +parser$add_argument( + "--metabolome_ids", + required = FALSE, + help = "Metabolomics ids", + default = "HMDB" +) +parser$add_argument("--organism", required = TRUE, + help = "Organism") +parser$add_argument("--combine_pvalues", required = TRUE, + help = "Combine p-values method") +parser$add_argument("--padj_method", required = TRUE, + help = "P-adjustment method") +parser$add_argument("--databases", + required = TRUE, + help = "Pathway databases") + +args <- parser$parse_args() + +## ----Load library------------------------------------------------------------- + +organism_mapping <- c( + "hsapiens" = "org.Hs.eg.db", + "mmusculus" = "org.Mm.eg.db", + "rnorvegicus" = "org.Rn.eg.db", + "cfamiliaris" = "org.Cf.eg.db", + "btaurus" = "org.Bt.eg.db", + "sscrofa" = "org.Ss.eg.db", + "ggallus" = "org.Gg.eg.db", + "drerio" = "org.Xl.eg.db", + "xlaevis" = "org.Dr.eg.db", + "dmelanogaster" = "org.Dm.eg.db", + "celegans" = "org.Ce.eg.db" +) + +library(organism_mapping[args$organism], character.only = TRUE) + + +## ----Load omics data---------------------------------------------------------- + +layer <- c() + +if (!is.null(args$transcriptomics)) { + transcriptome <- read.csv( + args$transcriptomics, + header = TRUE, + sep = "\t", + dec = "." + ) + layer <- append(layer, "transcriptome") +} + +if (!is.null(args$proteomics)) { + proteome <- read.csv(args$proteomics, + header = TRUE, + sep = "\t", + dec = ".") + layer <- append(layer, "proteome") +} + +if (!is.null(args$metabolomics)) { + metabolome <- read.csv(args$metabolomics, + header = TRUE, + sep = "\t", + dec = ".") + layer <- append(layer, "metabolome") +} + +## ----rank_features------------------------------------------------------------ + +# create data structure +omics_data <- initOmicsDataStructure(layer) + +## add transcriptome layer +if (!is.null(args$transcriptomics)) { + omics_data$transcriptome <- rankFeatures(transcriptome$logFC, + transcriptome$pValue) + names(omics_data$transcriptome) <- transcriptome$Symbol +} + +## add proteome layer +if (!is.null(args$proteomics)) { + omics_data$proteome <- rankFeatures(proteome$logFC, proteome$pValue) + names(omics_data$proteome) <- proteome$Symbol +} + +## add metabolome layer +## HMDB features have to be updated to the new HMDB format +if (!is.null(args$metabolomics)) { + omics_data$metabolome <- + rankFeatures(metabolome$logFC, metabolome$pValue) + names(omics_data$metabolome) <- metabolome$HMDB + names(omics_data$metabolome) <- gsub("HMDB", "HMDB00", + names(omics_data$metabolome)) +} + + +## remove NA's and sort feature ranks +omics_data <- lapply(omics_data, function(vec) { + sort(vec[!is.na(vec)]) +}) + +## ----Pathway definitions------------------------------------------------------ + +pathways <- + getMultiOmicsFeatures( + dbs = unlist(strsplit(args$databases, ",", fixed = TRUE)), + layer = layer, + returnTranscriptome = args$transcriptome_ids, + returnProteome = args$proteome_ids, + returnMetabolome = args$metabolome_ids, + organism = args$organism, + useLocal = FALSE + ) + +## ----calculate enrichment----------------------------------------------------- + +enrichment_scores <- + multiGSEA(pathways, omics_data) + +## ----combine_pvalues---------------------------------------------------------- + +df <- extractPvalues(enrichmentScores = enrichment_scores, + pathwayNames = names(pathways[[1]])) + +df$combined_pval <- + combinePvalues(df, method = args$combine_pvalues) +df$combined_padj <- + p.adjust(df$combined_pval, method = args$padj_method) + +df <- cbind(data.frame(pathway = names(pathways[[1]])), df) + +## ----Write output------------------------------------------------------------- + +write.table( + df, + file = "results.tsv", + quote = FALSE, + sep = "\t", + col.names = TRUE, + row.names = FALSE +)