Mercurial > repos > artbio > gsc_mannwhitney_de
comparison MannWhitney_DE.R @ 4:6916ac5a9ef0 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_mannwhitney_de commit c394391dcf541d91ee1dfdc0c3d80cd7a21942ff
author | artbio |
---|---|
date | Thu, 30 Nov 2023 02:03:53 +0000 |
parents | 3d86c89f15bf |
children | a7eb04240b8b |
comparison
equal
deleted
inserted
replaced
3:3d86c89f15bf | 4:6916ac5a9ef0 |
---|---|
1 #################### | 1 # Perform a differential analysis between 2 groups of cells. |
2 # Differential # | |
3 # analysis # | |
4 #################### | |
5 | |
6 # Perform a differential analysis between 2 | |
7 # groups of cells. | |
8 | 2 |
9 # Example of command | 3 # Example of command |
10 # Rscript MannWhitney_DE.R --input <input.tsv> --sep <tab> --colnames <TRUE> --metadata <signature.tsv> --column_name <rate> --fdr <0.01> --output <diff_analysis.tsv> | 4 # Rscript MannWhitney_DE.R --input <input.tsv> --sep <tab> --colnames <TRUE> --metadata <signature.tsv> --column_name <rate> --fdr <0.01> --output <diff_analysis.tsv> |
11 | 5 |
12 # load packages that are provided in the conda env | 6 options(show.error.messages = FALSE, |
13 options( show.error.messages=F, | 7 error = function() { |
14 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | 8 cat(geterrmessage(), file = stderr()) |
9 q("no", 1, FALSE) | |
10 } | |
11 ) | |
12 | |
15 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 13 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
16 warnings() | |
17 library(optparse) | |
18 | 14 |
19 #Arguments | 15 suppressPackageStartupMessages({ |
20 option_list = list( | 16 library(optparse) |
17 }) | |
18 | |
19 sessionInfo() | |
20 | |
21 option_list <- list( | |
21 make_option( | 22 make_option( |
22 "--input", | 23 "--input", |
23 default = NA, | 24 default = NA, |
24 type = 'character', | 25 type = "character", |
25 help = "Input file that contains log2(CPM +1) values" | 26 help = "Input file that contains log2(CPM +1) values" |
26 ), | 27 ), |
27 make_option( | 28 make_option( |
28 "--sep", | 29 "--sep", |
29 default = '\t', | 30 default = "\t", |
30 type = 'character', | 31 type = "character", |
31 help = "File separator [default : '%default' ]" | 32 help = "File separator [default : '%default' ]" |
32 ), | 33 ), |
33 make_option( | 34 make_option( |
34 "--colnames", | 35 "--colnames", |
35 default = TRUE, | 36 default = TRUE, |
36 type = 'logical', | 37 type = "logical", |
37 help = "Consider first line as header ? [default : '%default' ]" | 38 help = "Consider first line as header ? [default : '%default' ]" |
38 ), | 39 ), |
39 make_option( | 40 make_option( |
40 "--comparison_factor_file", | 41 "--comparison_factor_file", |
41 default = NA, | 42 default = NA, |
42 type = 'character', | 43 type = "character", |
43 help = " A two column table : cell identifiers and a comparison factor that split cells in two categories (high/low, HOM/HET,...)" | 44 help = " A two column table : cell identifiers and a comparison factor that split cells in two categories (high/low, HOM/HET,...)" |
44 ), | 45 ), |
45 make_option( | 46 make_option( |
46 "--factor1", | 47 "--factor1", |
47 type = 'character', | 48 type = "character", |
48 help = "level associated to the control condition in the factor file" | 49 help = "level associated to the control condition in the factor file" |
49 ), | 50 ), |
50 make_option( | 51 make_option( |
51 "--factor2", | 52 "--factor2", |
52 type = 'character', | 53 type = "character", |
53 help = "level associated to the test condition in the factor file" | 54 help = "level associated to the test condition in the factor file" |
54 ), | 55 ), |
55 make_option( | 56 make_option( |
56 "--fdr", | 57 "--fdr", |
57 default = 0.01, | 58 default = 0.01, |
58 type = 'numeric', | 59 type = "numeric", |
59 help = "FDR threshold [default : '%default' ]" | 60 help = "FDR threshold [default : '%default' ]" |
60 ), | 61 ), |
61 make_option( | 62 make_option( |
62 "--log", | 63 "--log", |
63 default=FALSE, | 64 default = FALSE, |
64 action="store_true", | 65 action = "store_true", |
65 type = 'logical', | 66 type = "logical", |
66 help = "Expression data are log-transformed [default : '%default' ]" | 67 help = "Expression data are log-transformed [default : '%default' ]" |
67 ), | 68 ), |
68 make_option( | 69 make_option( |
69 "--output", | 70 "--output", |
70 default = "results.tsv", | 71 default = "results.tsv", |
71 type = 'character', | 72 type = "character", |
72 help = "Output name [default : '%default' ]" | 73 help = "Output name [default : '%default' ]" |
73 ) | 74 ) |
74 ) | 75 ) |
75 | 76 |
76 opt = parse_args(OptionParser(option_list = option_list), | 77 opt <- parse_args(OptionParser(option_list = option_list), |
77 args = commandArgs(trailingOnly = TRUE)) | 78 args = commandArgs(trailingOnly = TRUE)) |
78 | 79 |
79 if (opt$sep == "tab") {opt$sep = "\t"} | 80 if (opt$sep == "tab") { |
80 if (opt$sep == "comma") {opt$sep = ","} | 81 opt$sep <- "\t" |
82 } | |
83 if (opt$sep == "comma") { | |
84 opt$sep <- "," | |
85 } | |
81 | 86 |
82 #Open files | 87 #Open files |
83 data.counts <- read.table( | 88 data.counts <- read.table( |
84 opt$input, | 89 opt$input, |
85 h = opt$colnames, | 90 h = opt$colnames, |
86 row.names = 1, | 91 row.names = 1, |
87 sep = opt$sep, | 92 sep = opt$sep, |
88 check.names = F | 93 check.names = FALSE |
89 ) | 94 ) |
90 | 95 |
91 metadata <- read.table( | 96 metadata <- read.table( |
92 opt$comparison_factor_file, | 97 opt$comparison_factor_file, |
93 header = TRUE, | 98 header = TRUE, |
94 stringsAsFactors = F, | 99 stringsAsFactors = FALSE, |
95 sep = "\t", | 100 sep = "\t", |
96 check.names = FALSE, | 101 check.names = FALSE, |
97 row.names = 1 | 102 row.names = 1 |
98 ) | 103 ) |
99 | 104 |
100 metadata <- subset(metadata, rownames(metadata) %in% colnames(data.counts)) | 105 metadata <- subset(metadata, rownames(metadata) %in% colnames(data.counts)) |
101 | 106 |
102 # Create two logical named vectors for each factor level of cell signature | 107 # Create two logical named vectors for each factor level of cell signature |
103 factor1_cells <- setNames(metadata[,1] == opt$factor1, rownames(metadata)) | 108 factor1_cells <- setNames(metadata[, 1] == opt$factor1, rownames(metadata)) |
104 factor2_cells <- setNames(metadata[,1] == opt$factor2, rownames(metadata)) | 109 factor2_cells <- setNames(metadata[, 1] == opt$factor2, rownames(metadata)) |
105 | 110 |
106 ## Mann-Whitney test (Two-sample Wilcoxon test) | 111 ## Mann-Whitney test (Two-sample Wilcoxon test) |
107 MW_test <- data.frame(t(apply(data.counts, 1, function(x) { | 112 MW_test <- data.frame(t(apply(data.counts, 1, function(x) { |
108 do.call("cbind", wilcox.test(x[names(factor1_cells)[factor1_cells]], x[names(factor2_cells)[factor2_cells]]))[, 1:2] | 113 do.call("cbind", wilcox.test(x[names(factor1_cells)[factor1_cells]], x[names(factor2_cells)[factor2_cells]]))[, 1:2] |
109 })), stringsAsFactors = F) | 114 })), stringsAsFactors = FALSE) |
110 | 115 |
111 # Benjamini-Hochberg correction and significativity | 116 # Benjamini-Hochberg correction and significativity |
112 MW_test$p.adjust <- p.adjust(as.numeric(MW_test$p.value), method = "BH" , n = nrow(MW_test)) | 117 MW_test$p.adjust <- p.adjust(as.numeric(MW_test$p.value), method = "BH", n = nrow(MW_test)) |
113 # MW_test$Critical.value <- (rank(MW_test$p.value) / nrow(MW_test)) * opt$fdr | |
114 MW_test$Significant <- MW_test$p.adjust < opt$fdr | 118 MW_test$Significant <- MW_test$p.adjust < opt$fdr |
115 | 119 |
116 ## Descriptive Statistics Function | 120 ## Descriptive Statistics Function |
117 descriptive_stats <- function(InputData) { | 121 descriptive_stats <- function(InputData) { |
118 SummaryData = data.frame( | 122 SummaryData <- data.frame( |
119 mean = rowMeans(InputData), | 123 mean = rowMeans(InputData), |
120 SD = apply(InputData, 1, sd), | 124 SD = apply(InputData, 1, sd), |
121 Variance = apply(InputData, 1, var), | 125 Variance = apply(InputData, 1, var), |
122 Percentage_Detection = apply(InputData, 1, function(x, y = InputData) { | 126 Percentage_Detection = apply(InputData, 1, function(x, y = InputData) { |
123 (sum(x != 0) / ncol(y)) * 100 | 127 (sum(x != 0) / ncol(y)) * 100 |
124 }), | 128 }), |
125 mean_condition2 = rowMeans(InputData[,factor2_cells]), | 129 mean_condition2 = rowMeans(InputData[, factor2_cells]), |
126 mean_condition1 = rowMeans(InputData[, factor1_cells]) | 130 mean_condition1 = rowMeans(InputData[, factor1_cells]) |
127 ) | 131 ) |
128 if(opt$log) { | 132 if (opt$log) { |
129 SummaryData$log2FC <- SummaryData$mean_condition2 - SummaryData$mean_condition1 | 133 SummaryData$log2FC <- SummaryData$mean_condition2 - SummaryData$mean_condition1 |
130 } else { | 134 } else { |
131 SummaryData$log2FC <- log2(SummaryData$mean_condition2 / SummaryData$mean_condition1) | 135 SummaryData$log2FC <- log2(SummaryData$mean_condition2 / SummaryData$mean_condition1) |
132 } | 136 } |
133 return(SummaryData) | 137 return(SummaryData) |
134 } | 138 } |
135 | 139 |
136 gene_stats <- descriptive_stats(data.counts) | 140 gene_stats <- descriptive_stats(data.counts) |
137 | 141 |
138 results <- merge(gene_stats, MW_test, by = "row.names") | 142 results <- merge(gene_stats, MW_test, by = "row.names") |
139 colnames(results)[1] <- "genes" | 143 colnames(results)[1] <- "genes" |
140 | 144 |
141 ## Annotate Significant column | 145 ## Annotate Significant column |
142 results$Significant[results$Significant == T & !is.na(results$Significant)] <- ifelse(subset(results, Significant == T)$log2FC > 0, "UP", "DOWN") | 146 results$Significant[results$Significant == TRUE & !is.na(results$Significant)] <- ifelse(subset(results, Significant == TRUE)$log2FC > 0, "UP", "DOWN") |
143 results$Significant[results$Significant == F & !is.na(results$Significant)] <- "NS" | 147 results$Significant[results$Significant == FALSE & !is.na(results$Significant)] <- "NS" |
144 | 148 |
145 | 149 |
146 # Save files | 150 # Save files |
147 write.table( | 151 write.table( |
148 results[order(results$p.adjust),], | 152 results[order(results$p.adjust), ], |
149 opt$output, | 153 opt$output, |
150 sep = "\t", | 154 sep = "\t", |
151 quote = F, | 155 quote = FALSE, |
152 col.names = T, | 156 col.names = TRUE, |
153 row.names = F | 157 row.names = FALSE |
154 ) | 158 ) |