Mercurial > repos > artbio > gsc_mannwhitney_de
diff MannWhitney_DE.R @ 4:6916ac5a9ef0 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_mannwhitney_de commit c394391dcf541d91ee1dfdc0c3d80cd7a21942ff
author | artbio |
---|---|
date | Thu, 30 Nov 2023 02:03:53 +0000 |
parents | 3d86c89f15bf |
children | a7eb04240b8b |
line wrap: on
line diff
--- a/MannWhitney_DE.R Thu Aug 29 05:35:04 2019 -0400 +++ b/MannWhitney_DE.R Thu Nov 30 02:03:53 2023 +0000 @@ -1,83 +1,88 @@ -#################### -# Differential # -# analysis # -#################### - -# Perform a differential analysis between 2 -# groups of cells. +# Perform a differential analysis between 2 groups of cells. # Example of command # Rscript MannWhitney_DE.R --input <input.tsv> --sep <tab> --colnames <TRUE> --metadata <signature.tsv> --column_name <rate> --fdr <0.01> --output <diff_analysis.tsv> -# load packages that are provided in the conda env -options( show.error.messages=F, - error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) +options(show.error.messages = FALSE, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) + } +) + loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") -warnings() -library(optparse) -#Arguments -option_list = list( +suppressPackageStartupMessages({ + library(optparse) +}) + +sessionInfo() + +option_list <- list( make_option( "--input", default = NA, - type = 'character', + type = "character", help = "Input file that contains log2(CPM +1) values" ), make_option( "--sep", - default = '\t', - type = 'character', + default = "\t", + type = "character", help = "File separator [default : '%default' ]" ), make_option( "--colnames", default = TRUE, - type = 'logical', + type = "logical", help = "Consider first line as header ? [default : '%default' ]" - ), + ), make_option( "--comparison_factor_file", default = NA, - type = 'character', + type = "character", help = " A two column table : cell identifiers and a comparison factor that split cells in two categories (high/low, HOM/HET,...)" ), make_option( "--factor1", - type = 'character', + type = "character", help = "level associated to the control condition in the factor file" - ), + ), make_option( "--factor2", - type = 'character', + type = "character", help = "level associated to the test condition in the factor file" ), make_option( "--fdr", default = 0.01, - type = 'numeric', + type = "numeric", help = "FDR threshold [default : '%default' ]" ), make_option( "--log", - default=FALSE, - action="store_true", - type = 'logical', + default = FALSE, + action = "store_true", + type = "logical", help = "Expression data are log-transformed [default : '%default' ]" ), make_option( "--output", default = "results.tsv", - type = 'character', + type = "character", help = "Output name [default : '%default' ]" ) ) -opt = parse_args(OptionParser(option_list = option_list), - args = commandArgs(trailingOnly = TRUE)) +opt <- parse_args(OptionParser(option_list = option_list), + args = commandArgs(trailingOnly = TRUE)) -if (opt$sep == "tab") {opt$sep = "\t"} -if (opt$sep == "comma") {opt$sep = ","} +if (opt$sep == "tab") { + opt$sep <- "\t" +} +if (opt$sep == "comma") { + opt$sep <- "," +} #Open files data.counts <- read.table( @@ -85,13 +90,13 @@ h = opt$colnames, row.names = 1, sep = opt$sep, - check.names = F + check.names = FALSE ) metadata <- read.table( opt$comparison_factor_file, header = TRUE, - stringsAsFactors = F, + stringsAsFactors = FALSE, sep = "\t", check.names = FALSE, row.names = 1 @@ -100,35 +105,34 @@ metadata <- subset(metadata, rownames(metadata) %in% colnames(data.counts)) # Create two logical named vectors for each factor level of cell signature -factor1_cells <- setNames(metadata[,1] == opt$factor1, rownames(metadata)) -factor2_cells <- setNames(metadata[,1] == opt$factor2, rownames(metadata)) +factor1_cells <- setNames(metadata[, 1] == opt$factor1, rownames(metadata)) +factor2_cells <- setNames(metadata[, 1] == opt$factor2, rownames(metadata)) ## Mann-Whitney test (Two-sample Wilcoxon test) MW_test <- data.frame(t(apply(data.counts, 1, function(x) { do.call("cbind", wilcox.test(x[names(factor1_cells)[factor1_cells]], x[names(factor2_cells)[factor2_cells]]))[, 1:2] -})), stringsAsFactors = F) +})), stringsAsFactors = FALSE) # Benjamini-Hochberg correction and significativity -MW_test$p.adjust <- p.adjust(as.numeric(MW_test$p.value), method = "BH" , n = nrow(MW_test)) -# MW_test$Critical.value <- (rank(MW_test$p.value) / nrow(MW_test)) * opt$fdr +MW_test$p.adjust <- p.adjust(as.numeric(MW_test$p.value), method = "BH", n = nrow(MW_test)) MW_test$Significant <- MW_test$p.adjust < opt$fdr ## Descriptive Statistics Function descriptive_stats <- function(InputData) { - SummaryData = data.frame( + SummaryData <- data.frame( mean = rowMeans(InputData), SD = apply(InputData, 1, sd), Variance = apply(InputData, 1, var), Percentage_Detection = apply(InputData, 1, function(x, y = InputData) { (sum(x != 0) / ncol(y)) * 100 }), - mean_condition2 = rowMeans(InputData[,factor2_cells]), + mean_condition2 = rowMeans(InputData[, factor2_cells]), mean_condition1 = rowMeans(InputData[, factor1_cells]) ) - if(opt$log) { - SummaryData$log2FC <- SummaryData$mean_condition2 - SummaryData$mean_condition1 + if (opt$log) { + SummaryData$log2FC <- SummaryData$mean_condition2 - SummaryData$mean_condition1 } else { - SummaryData$log2FC <- log2(SummaryData$mean_condition2 / SummaryData$mean_condition1) + SummaryData$log2FC <- log2(SummaryData$mean_condition2 / SummaryData$mean_condition1) } return(SummaryData) } @@ -139,16 +143,16 @@ colnames(results)[1] <- "genes" ## Annotate Significant column -results$Significant[results$Significant == T & !is.na(results$Significant)] <- ifelse(subset(results, Significant == T)$log2FC > 0, "UP", "DOWN") -results$Significant[results$Significant == F & !is.na(results$Significant)] <- "NS" +results$Significant[results$Significant == TRUE & !is.na(results$Significant)] <- ifelse(subset(results, Significant == TRUE)$log2FC > 0, "UP", "DOWN") +results$Significant[results$Significant == FALSE & !is.na(results$Significant)] <- "NS" # Save files write.table( - results[order(results$p.adjust),], + results[order(results$p.adjust), ], opt$output, sep = "\t", - quote = F, - col.names = T, - row.names = F + quote = FALSE, + col.names = TRUE, + row.names = FALSE )