comparison MannWhitney_DE.R @ 5:a7eb04240b8b draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_mannwhitney_de commit 9286176209be97cd4b972c034345cf060bd7783e
author artbio
date Thu, 07 Nov 2024 21:37:11 +0000
parents 6916ac5a9ef0
children
comparison
equal deleted inserted replaced
4:6916ac5a9ef0 5:a7eb04240b8b
1 # Perform a differential analysis between 2 groups of cells. 1 # Perform a differential analysis between 2 groups of cells.
2 2
3 # Example of command 3 # Example of command
4 # Rscript MannWhitney_DE.R --input <input.tsv> --sep <tab> --colnames <TRUE> --metadata <signature.tsv> --column_name <rate> --fdr <0.01> --output <diff_analysis.tsv> 4 # Rscript MannWhitney_DE.R --input <input.tsv> --sep <tab> --colnames <TRUE> --metadata <signature.tsv> --column_name <rate> --fdr <0.01> --output <diff_analysis.tsv>
5 5
6 options(show.error.messages = FALSE, 6 options(
7 error = function() { 7 show.error.messages = FALSE,
8 cat(geterrmessage(), file = stderr()) 8 error = function() {
9 q("no", 1, FALSE) 9 cat(geterrmessage(), file = stderr())
10 } 10 q("no", 1, FALSE)
11 }
11 ) 12 )
12 13
13 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 14 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
14 15
15 suppressPackageStartupMessages({ 16 suppressPackageStartupMessages({
16 library(optparse) 17 library(optparse)
17 }) 18 })
18 19
19 sessionInfo() 20 sessionInfo()
20 21
21 option_list <- list( 22 option_list <- list(
22 make_option( 23 make_option(
23 "--input", 24 "--input",
24 default = NA, 25 default = NA,
25 type = "character", 26 type = "character",
26 help = "Input file that contains log2(CPM +1) values" 27 help = "Input file that contains log2(CPM +1) values"
27 ), 28 ),
28 make_option( 29 make_option(
29 "--sep", 30 "--sep",
30 default = "\t", 31 default = "\t",
31 type = "character", 32 type = "character",
32 help = "File separator [default : '%default' ]" 33 help = "File separator [default : '%default' ]"
33 ), 34 ),
34 make_option( 35 make_option(
35 "--colnames", 36 "--colnames",
36 default = TRUE, 37 default = TRUE,
37 type = "logical", 38 type = "logical",
38 help = "Consider first line as header ? [default : '%default' ]" 39 help = "Consider first line as header ? [default : '%default' ]"
39 ), 40 ),
40 make_option( 41 make_option(
41 "--comparison_factor_file", 42 "--comparison_factor_file",
42 default = NA, 43 default = NA,
43 type = "character", 44 type = "character",
44 help = " A two column table : cell identifiers and a comparison factor that split cells in two categories (high/low, HOM/HET,...)" 45 help = " A two column table : cell identifiers and a comparison factor that split cells in two categories (high/low, HOM/HET,...)"
45 ), 46 ),
46 make_option( 47 make_option(
47 "--factor1", 48 "--factor1",
48 type = "character", 49 type = "character",
49 help = "level associated to the control condition in the factor file" 50 help = "level associated to the control condition in the factor file"
50 ), 51 ),
51 make_option( 52 make_option(
52 "--factor2", 53 "--factor2",
53 type = "character", 54 type = "character",
54 help = "level associated to the test condition in the factor file" 55 help = "level associated to the test condition in the factor file"
55 ), 56 ),
56 make_option( 57 make_option(
57 "--fdr", 58 "--fdr",
58 default = 0.01, 59 default = 0.01,
59 type = "numeric", 60 type = "numeric",
60 help = "FDR threshold [default : '%default' ]" 61 help = "FDR threshold [default : '%default' ]"
61 ), 62 ),
62 make_option( 63 make_option(
63 "--log", 64 "--log",
64 default = FALSE, 65 default = FALSE,
65 action = "store_true", 66 action = "store_true",
66 type = "logical", 67 type = "logical",
67 help = "Expression data are log-transformed [default : '%default' ]" 68 help = "Expression data are log-transformed [default : '%default' ]"
68 ), 69 ),
69 make_option( 70 make_option(
70 "--output", 71 "--output",
71 default = "results.tsv", 72 default = "results.tsv",
72 type = "character", 73 type = "character",
73 help = "Output name [default : '%default' ]" 74 help = "Output name [default : '%default' ]"
74 ) 75 )
75 ) 76 )
76 77
77 opt <- parse_args(OptionParser(option_list = option_list), 78 opt <- parse_args(OptionParser(option_list = option_list),
78 args = commandArgs(trailingOnly = TRUE)) 79 args = commandArgs(trailingOnly = TRUE)
80 )
79 81
80 if (opt$sep == "tab") { 82 if (opt$sep == "tab") {
81 opt$sep <- "\t" 83 opt$sep <- "\t"
82 } 84 }
83 if (opt$sep == "comma") { 85 if (opt$sep == "comma") {
84 opt$sep <- "," 86 opt$sep <- ","
85 } 87 }
86 88
87 #Open files 89 # Open files
88 data.counts <- read.table( 90 data.counts <- read.table(
89 opt$input, 91 opt$input,
90 h = opt$colnames, 92 h = opt$colnames,
91 row.names = 1, 93 row.names = 1,
92 sep = opt$sep, 94 sep = opt$sep,
93 check.names = FALSE 95 check.names = FALSE
94 ) 96 )
95 97
96 metadata <- read.table( 98 metadata <- read.table(
97 opt$comparison_factor_file, 99 opt$comparison_factor_file,
98 header = TRUE, 100 header = TRUE,
99 stringsAsFactors = FALSE, 101 stringsAsFactors = FALSE,
100 sep = "\t", 102 sep = "\t",
101 check.names = FALSE, 103 check.names = FALSE,
102 row.names = 1 104 row.names = 1
103 ) 105 )
104 106
105 metadata <- subset(metadata, rownames(metadata) %in% colnames(data.counts)) 107 metadata <- subset(metadata, rownames(metadata) %in% colnames(data.counts))
106 108
107 # Create two logical named vectors for each factor level of cell signature 109 # Create two logical named vectors for each factor level of cell signature
108 factor1_cells <- setNames(metadata[, 1] == opt$factor1, rownames(metadata)) 110 factor1_cells <- setNames(metadata[, 1] == opt$factor1, rownames(metadata))
109 factor2_cells <- setNames(metadata[, 1] == opt$factor2, rownames(metadata)) 111 factor2_cells <- setNames(metadata[, 1] == opt$factor2, rownames(metadata))
110 112
111 ## Mann-Whitney test (Two-sample Wilcoxon test) 113 ## Mann-Whitney test (Two-sample Wilcoxon test)
112 MW_test <- data.frame(t(apply(data.counts, 1, function(x) { 114 MW_test <- data.frame(t(apply(data.counts, 1, function(x) {
113 do.call("cbind", wilcox.test(x[names(factor1_cells)[factor1_cells]], x[names(factor2_cells)[factor2_cells]]))[, 1:2] 115 do.call("cbind", wilcox.test(x[names(factor1_cells)[factor1_cells]], x[names(factor2_cells)[factor2_cells]]))[, 1:2]
114 })), stringsAsFactors = FALSE) 116 })), stringsAsFactors = FALSE)
115 117
116 # Benjamini-Hochberg correction and significativity 118 # Benjamini-Hochberg correction and significativity
117 MW_test$p.adjust <- p.adjust(as.numeric(MW_test$p.value), method = "BH", n = nrow(MW_test)) 119 MW_test$p.adjust <- p.adjust(as.numeric(MW_test$p.value), method = "BH", n = nrow(MW_test))
118 MW_test$Significant <- MW_test$p.adjust < opt$fdr 120 MW_test$Significant <- MW_test$p.adjust < opt$fdr
119 121
120 ## Descriptive Statistics Function 122 ## Descriptive Statistics Function
121 descriptive_stats <- function(InputData) { 123 descriptive_stats <- function(InputData) {
122 SummaryData <- data.frame( 124 SummaryData <- data.frame(
123 mean = rowMeans(InputData), 125 mean = rowMeans(InputData),
124 SD = apply(InputData, 1, sd), 126 SD = apply(InputData, 1, sd),
125 Variance = apply(InputData, 1, var), 127 Variance = apply(InputData, 1, var),
126 Percentage_Detection = apply(InputData, 1, function(x, y = InputData) { 128 Percentage_Detection = apply(InputData, 1, function(x, y = InputData) {
127 (sum(x != 0) / ncol(y)) * 100 129 (sum(x != 0) / ncol(y)) * 100
128 }), 130 }),
129 mean_condition2 = rowMeans(InputData[, factor2_cells]), 131 mean_condition2 = rowMeans(InputData[, factor2_cells]),
130 mean_condition1 = rowMeans(InputData[, factor1_cells]) 132 mean_condition1 = rowMeans(InputData[, factor1_cells])
131 ) 133 )
132 if (opt$log) { 134 if (opt$log) {
133 SummaryData$log2FC <- SummaryData$mean_condition2 - SummaryData$mean_condition1 135 SummaryData$log2FC <- SummaryData$mean_condition2 - SummaryData$mean_condition1
134 } else { 136 } else {
135 SummaryData$log2FC <- log2(SummaryData$mean_condition2 / SummaryData$mean_condition1) 137 SummaryData$log2FC <- log2(SummaryData$mean_condition2 / SummaryData$mean_condition1)
136 } 138 }
137 return(SummaryData) 139 return(SummaryData)
138 } 140 }
139 141
140 gene_stats <- descriptive_stats(data.counts) 142 gene_stats <- descriptive_stats(data.counts)
141 143
142 results <- merge(gene_stats, MW_test, by = "row.names") 144 results <- merge(gene_stats, MW_test, by = "row.names")
147 results$Significant[results$Significant == FALSE & !is.na(results$Significant)] <- "NS" 149 results$Significant[results$Significant == FALSE & !is.na(results$Significant)] <- "NS"
148 150
149 151
150 # Save files 152 # Save files
151 write.table( 153 write.table(
152 results[order(results$p.adjust), ], 154 results[order(results$p.adjust), ],
153 opt$output, 155 opt$output,
154 sep = "\t", 156 sep = "\t",
155 quote = FALSE, 157 quote = FALSE,
156 col.names = TRUE, 158 col.names = TRUE,
157 row.names = FALSE 159 row.names = FALSE
158 ) 160 )