Mercurial > repos > artbio > gsc_mannwhitney_de
diff MannWhitney_DE.R @ 5:a7eb04240b8b draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_mannwhitney_de commit 9286176209be97cd4b972c034345cf060bd7783e
author | artbio |
---|---|
date | Thu, 07 Nov 2024 21:37:11 +0000 |
parents | 6916ac5a9ef0 |
children |
line wrap: on
line diff
--- a/MannWhitney_DE.R Thu Nov 30 02:03:53 2023 +0000 +++ b/MannWhitney_DE.R Thu Nov 07 21:37:11 2024 +0000 @@ -3,103 +3,105 @@ # Example of command # Rscript MannWhitney_DE.R --input <input.tsv> --sep <tab> --colnames <TRUE> --metadata <signature.tsv> --column_name <rate> --fdr <0.01> --output <diff_analysis.tsv> -options(show.error.messages = FALSE, - error = function() { - cat(geterrmessage(), file = stderr()) - q("no", 1, FALSE) - } +options( + show.error.messages = FALSE, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) + } ) loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") suppressPackageStartupMessages({ - library(optparse) + library(optparse) }) sessionInfo() option_list <- list( - make_option( - "--input", - default = NA, - type = "character", - help = "Input file that contains log2(CPM +1) values" - ), - make_option( - "--sep", - default = "\t", - type = "character", - help = "File separator [default : '%default' ]" - ), - make_option( - "--colnames", - default = TRUE, - type = "logical", - help = "Consider first line as header ? [default : '%default' ]" - ), - make_option( - "--comparison_factor_file", - default = NA, - type = "character", - help = " A two column table : cell identifiers and a comparison factor that split cells in two categories (high/low, HOM/HET,...)" - ), - make_option( - "--factor1", - type = "character", - help = "level associated to the control condition in the factor file" - ), - make_option( - "--factor2", - type = "character", - help = "level associated to the test condition in the factor file" - ), - make_option( - "--fdr", - default = 0.01, - type = "numeric", - help = "FDR threshold [default : '%default' ]" - ), - make_option( - "--log", - default = FALSE, - action = "store_true", - type = "logical", - help = "Expression data are log-transformed [default : '%default' ]" - ), - make_option( - "--output", - default = "results.tsv", - type = "character", - help = "Output name [default : '%default' ]" - ) + make_option( + "--input", + default = NA, + type = "character", + help = "Input file that contains log2(CPM +1) values" + ), + make_option( + "--sep", + default = "\t", + type = "character", + help = "File separator [default : '%default' ]" + ), + make_option( + "--colnames", + default = TRUE, + type = "logical", + help = "Consider first line as header ? [default : '%default' ]" + ), + make_option( + "--comparison_factor_file", + default = NA, + type = "character", + help = " A two column table : cell identifiers and a comparison factor that split cells in two categories (high/low, HOM/HET,...)" + ), + make_option( + "--factor1", + type = "character", + help = "level associated to the control condition in the factor file" + ), + make_option( + "--factor2", + type = "character", + help = "level associated to the test condition in the factor file" + ), + make_option( + "--fdr", + default = 0.01, + type = "numeric", + help = "FDR threshold [default : '%default' ]" + ), + make_option( + "--log", + default = FALSE, + action = "store_true", + type = "logical", + help = "Expression data are log-transformed [default : '%default' ]" + ), + make_option( + "--output", + default = "results.tsv", + type = "character", + help = "Output name [default : '%default' ]" + ) ) opt <- parse_args(OptionParser(option_list = option_list), - args = commandArgs(trailingOnly = TRUE)) + args = commandArgs(trailingOnly = TRUE) +) if (opt$sep == "tab") { - opt$sep <- "\t" + opt$sep <- "\t" } if (opt$sep == "comma") { - opt$sep <- "," + opt$sep <- "," } -#Open files +# Open files data.counts <- read.table( - opt$input, - h = opt$colnames, - row.names = 1, - sep = opt$sep, - check.names = FALSE + opt$input, + h = opt$colnames, + row.names = 1, + sep = opt$sep, + check.names = FALSE ) metadata <- read.table( - opt$comparison_factor_file, - header = TRUE, - stringsAsFactors = FALSE, - sep = "\t", - check.names = FALSE, - row.names = 1 + opt$comparison_factor_file, + header = TRUE, + stringsAsFactors = FALSE, + sep = "\t", + check.names = FALSE, + row.names = 1 ) metadata <- subset(metadata, rownames(metadata) %in% colnames(data.counts)) @@ -110,7 +112,7 @@ ## Mann-Whitney test (Two-sample Wilcoxon test) MW_test <- data.frame(t(apply(data.counts, 1, function(x) { - do.call("cbind", wilcox.test(x[names(factor1_cells)[factor1_cells]], x[names(factor2_cells)[factor2_cells]]))[, 1:2] + do.call("cbind", wilcox.test(x[names(factor1_cells)[factor1_cells]], x[names(factor2_cells)[factor2_cells]]))[, 1:2] })), stringsAsFactors = FALSE) # Benjamini-Hochberg correction and significativity @@ -119,22 +121,22 @@ ## Descriptive Statistics Function descriptive_stats <- function(InputData) { - SummaryData <- data.frame( - mean = rowMeans(InputData), - SD = apply(InputData, 1, sd), - Variance = apply(InputData, 1, var), - Percentage_Detection = apply(InputData, 1, function(x, y = InputData) { - (sum(x != 0) / ncol(y)) * 100 - }), - mean_condition2 = rowMeans(InputData[, factor2_cells]), - mean_condition1 = rowMeans(InputData[, factor1_cells]) - ) - if (opt$log) { - SummaryData$log2FC <- SummaryData$mean_condition2 - SummaryData$mean_condition1 - } else { - SummaryData$log2FC <- log2(SummaryData$mean_condition2 / SummaryData$mean_condition1) - } - return(SummaryData) + SummaryData <- data.frame( + mean = rowMeans(InputData), + SD = apply(InputData, 1, sd), + Variance = apply(InputData, 1, var), + Percentage_Detection = apply(InputData, 1, function(x, y = InputData) { + (sum(x != 0) / ncol(y)) * 100 + }), + mean_condition2 = rowMeans(InputData[, factor2_cells]), + mean_condition1 = rowMeans(InputData[, factor1_cells]) + ) + if (opt$log) { + SummaryData$log2FC <- SummaryData$mean_condition2 - SummaryData$mean_condition1 + } else { + SummaryData$log2FC <- log2(SummaryData$mean_condition2 / SummaryData$mean_condition1) + } + return(SummaryData) } gene_stats <- descriptive_stats(data.counts) @@ -149,10 +151,10 @@ # Save files write.table( - results[order(results$p.adjust), ], - opt$output, - sep = "\t", - quote = FALSE, - col.names = TRUE, - row.names = FALSE + results[order(results$p.adjust), ], + opt$output, + sep = "\t", + quote = FALSE, + col.names = TRUE, + row.names = FALSE )