view multiGSEA.R @ 0:28e29a3d0eda draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/multigsea commit 5c1b8a2b105a80e236f88e71a743147d79925ac4
author iuc
date Wed, 07 Jun 2023 19:48:50 +0000
parents
children e48b10ce08b8
line wrap: on
line source

library(multiGSEA,
        quietly = TRUE,
        warn.conflicts = FALSE)
library(argparse, quietly = TRUE, warn.conflicts = FALSE)

################################################################################
### Input Processing
################################################################################


# Collect arguments from command line
parser <- ArgumentParser(description = "multiGSEA R script")

parser$add_argument("--transcriptomics",  required = FALSE,
                    help = "Transcriptomics data")
parser$add_argument(
  "--transcriptome_ids",
  required = FALSE,
  help = "Transcriptomics ids",
  default = "SYMBOL"
)
parser$add_argument("--proteomics",  required = FALSE,
                    help = "Proteomics data")
parser$add_argument(
  "--proteome_ids",
  required = FALSE,
  help = "Proteomics ids",
  default = "SYMBOL"
)
parser$add_argument("--metabolomics",  required = FALSE,
                    help = "Metabolomics data")
parser$add_argument(
  "--metabolome_ids",
  required = FALSE,
  help = "Metabolomics ids",
  default = "HMDB"
)
parser$add_argument("--organism",  required = TRUE,
                    help = "Organism")
parser$add_argument("--combine_pvalues",  required = TRUE,
                    help = "Combine p-values method")
parser$add_argument("--padj_method",  required = TRUE,
                    help = "P-adjustment method")
parser$add_argument("--databases",
                    required = TRUE,
                    help = "Pathway databases")

args <- parser$parse_args()

## ----Load library-------------------------------------------------------------

organism_mapping <- c(
  "hsapiens" = "org.Hs.eg.db",
  "mmusculus" = "org.Mm.eg.db",
  "rnorvegicus" = "org.Rn.eg.db",
  "cfamiliaris" = "org.Cf.eg.db",
  "btaurus" = "org.Bt.eg.db",
  "sscrofa" = "org.Ss.eg.db",
  "ggallus" = "org.Gg.eg.db",
  "drerio" = "org.Xl.eg.db",
  "xlaevis" = "org.Dr.eg.db",
  "dmelanogaster" = "org.Dm.eg.db",
  "celegans" = "org.Ce.eg.db"
)

library(organism_mapping[args$organism], character.only = TRUE)


## ----Load omics data----------------------------------------------------------

layer <- c()

if (!is.null(args$transcriptomics)) {
  transcriptome <- read.csv(
    args$transcriptomics,
    header = TRUE,
    sep = "\t",
    dec = "."
  )
  layer <- append(layer, "transcriptome")
}

if (!is.null(args$proteomics)) {
  proteome <- read.csv(args$proteomics,
                       header = TRUE,
                       sep = "\t",
                       dec = ".")
  layer <- append(layer, "proteome")
}

if (!is.null(args$metabolomics)) {
  metabolome <- read.csv(args$metabolomics,
                         header = TRUE,
                         sep = "\t",
                         dec = ".")
  layer <- append(layer, "metabolome")
}

## ----rank_features------------------------------------------------------------

# create data structure
omics_data <- initOmicsDataStructure(layer)

## add transcriptome layer
if (!is.null(args$transcriptomics)) {
  omics_data$transcriptome <- rankFeatures(transcriptome$logFC,
                                           transcriptome$pValue)
  names(omics_data$transcriptome) <- transcriptome$Symbol
}

## add proteome layer
if (!is.null(args$proteomics)) {
  omics_data$proteome <- rankFeatures(proteome$logFC, proteome$pValue)
  names(omics_data$proteome) <- proteome$Symbol
}

## add metabolome layer
## HMDB features have to be updated to the new HMDB format
if (!is.null(args$metabolomics)) {
  omics_data$metabolome <-
    rankFeatures(metabolome$logFC, metabolome$pValue)
  names(omics_data$metabolome) <- metabolome$HMDB
  names(omics_data$metabolome) <- gsub("HMDB", "HMDB00",
                                       names(omics_data$metabolome))
}


## remove NA's and sort feature ranks
omics_data <- lapply(omics_data, function(vec) {
  sort(vec[!is.na(vec)])
})

## ----Pathway definitions------------------------------------------------------

pathways <-
  getMultiOmicsFeatures(
    dbs = unlist(strsplit(args$databases, ",", fixed = TRUE)),
    layer = layer,
    returnTranscriptome = args$transcriptome_ids,
    returnProteome = args$proteome_ids,
    returnMetabolome = args$metabolome_ids,
    organism = args$organism,
    useLocal = FALSE
  )

## ----calculate enrichment-----------------------------------------------------

enrichment_scores <-
  multiGSEA(pathways, omics_data)

## ----combine_pvalues----------------------------------------------------------

df <- extractPvalues(enrichmentScores = enrichment_scores,
                     pathwayNames = names(pathways[[1]]))

df$combined_pval <-
  combinePvalues(df, method = args$combine_pvalues)
df$combined_padj <-
  p.adjust(df$combined_pval, method = args$padj_method)

df <- cbind(data.frame(pathway = names(pathways[[1]])), df)

## ----Write output-------------------------------------------------------------

write.table(
  df,
  file = "results.tsv",
  quote = FALSE,
  sep = "\t",
  col.names = TRUE,
  row.names = FALSE
)