Mercurial > repos > artbio > gsc_mannwhitney_de
comparison MannWhitney_DE.R @ 5:a7eb04240b8b draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_mannwhitney_de commit 9286176209be97cd4b972c034345cf060bd7783e
author | artbio |
---|---|
date | Thu, 07 Nov 2024 21:37:11 +0000 |
parents | 6916ac5a9ef0 |
children |
comparison
equal
deleted
inserted
replaced
4:6916ac5a9ef0 | 5:a7eb04240b8b |
---|---|
1 # Perform a differential analysis between 2 groups of cells. | 1 # Perform a differential analysis between 2 groups of cells. |
2 | 2 |
3 # Example of command | 3 # Example of command |
4 # Rscript MannWhitney_DE.R --input <input.tsv> --sep <tab> --colnames <TRUE> --metadata <signature.tsv> --column_name <rate> --fdr <0.01> --output <diff_analysis.tsv> | 4 # Rscript MannWhitney_DE.R --input <input.tsv> --sep <tab> --colnames <TRUE> --metadata <signature.tsv> --column_name <rate> --fdr <0.01> --output <diff_analysis.tsv> |
5 | 5 |
6 options(show.error.messages = FALSE, | 6 options( |
7 error = function() { | 7 show.error.messages = FALSE, |
8 cat(geterrmessage(), file = stderr()) | 8 error = function() { |
9 q("no", 1, FALSE) | 9 cat(geterrmessage(), file = stderr()) |
10 } | 10 q("no", 1, FALSE) |
11 } | |
11 ) | 12 ) |
12 | 13 |
13 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 14 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
14 | 15 |
15 suppressPackageStartupMessages({ | 16 suppressPackageStartupMessages({ |
16 library(optparse) | 17 library(optparse) |
17 }) | 18 }) |
18 | 19 |
19 sessionInfo() | 20 sessionInfo() |
20 | 21 |
21 option_list <- list( | 22 option_list <- list( |
22 make_option( | 23 make_option( |
23 "--input", | 24 "--input", |
24 default = NA, | 25 default = NA, |
25 type = "character", | 26 type = "character", |
26 help = "Input file that contains log2(CPM +1) values" | 27 help = "Input file that contains log2(CPM +1) values" |
27 ), | 28 ), |
28 make_option( | 29 make_option( |
29 "--sep", | 30 "--sep", |
30 default = "\t", | 31 default = "\t", |
31 type = "character", | 32 type = "character", |
32 help = "File separator [default : '%default' ]" | 33 help = "File separator [default : '%default' ]" |
33 ), | 34 ), |
34 make_option( | 35 make_option( |
35 "--colnames", | 36 "--colnames", |
36 default = TRUE, | 37 default = TRUE, |
37 type = "logical", | 38 type = "logical", |
38 help = "Consider first line as header ? [default : '%default' ]" | 39 help = "Consider first line as header ? [default : '%default' ]" |
39 ), | 40 ), |
40 make_option( | 41 make_option( |
41 "--comparison_factor_file", | 42 "--comparison_factor_file", |
42 default = NA, | 43 default = NA, |
43 type = "character", | 44 type = "character", |
44 help = " A two column table : cell identifiers and a comparison factor that split cells in two categories (high/low, HOM/HET,...)" | 45 help = " A two column table : cell identifiers and a comparison factor that split cells in two categories (high/low, HOM/HET,...)" |
45 ), | 46 ), |
46 make_option( | 47 make_option( |
47 "--factor1", | 48 "--factor1", |
48 type = "character", | 49 type = "character", |
49 help = "level associated to the control condition in the factor file" | 50 help = "level associated to the control condition in the factor file" |
50 ), | 51 ), |
51 make_option( | 52 make_option( |
52 "--factor2", | 53 "--factor2", |
53 type = "character", | 54 type = "character", |
54 help = "level associated to the test condition in the factor file" | 55 help = "level associated to the test condition in the factor file" |
55 ), | 56 ), |
56 make_option( | 57 make_option( |
57 "--fdr", | 58 "--fdr", |
58 default = 0.01, | 59 default = 0.01, |
59 type = "numeric", | 60 type = "numeric", |
60 help = "FDR threshold [default : '%default' ]" | 61 help = "FDR threshold [default : '%default' ]" |
61 ), | 62 ), |
62 make_option( | 63 make_option( |
63 "--log", | 64 "--log", |
64 default = FALSE, | 65 default = FALSE, |
65 action = "store_true", | 66 action = "store_true", |
66 type = "logical", | 67 type = "logical", |
67 help = "Expression data are log-transformed [default : '%default' ]" | 68 help = "Expression data are log-transformed [default : '%default' ]" |
68 ), | 69 ), |
69 make_option( | 70 make_option( |
70 "--output", | 71 "--output", |
71 default = "results.tsv", | 72 default = "results.tsv", |
72 type = "character", | 73 type = "character", |
73 help = "Output name [default : '%default' ]" | 74 help = "Output name [default : '%default' ]" |
74 ) | 75 ) |
75 ) | 76 ) |
76 | 77 |
77 opt <- parse_args(OptionParser(option_list = option_list), | 78 opt <- parse_args(OptionParser(option_list = option_list), |
78 args = commandArgs(trailingOnly = TRUE)) | 79 args = commandArgs(trailingOnly = TRUE) |
80 ) | |
79 | 81 |
80 if (opt$sep == "tab") { | 82 if (opt$sep == "tab") { |
81 opt$sep <- "\t" | 83 opt$sep <- "\t" |
82 } | 84 } |
83 if (opt$sep == "comma") { | 85 if (opt$sep == "comma") { |
84 opt$sep <- "," | 86 opt$sep <- "," |
85 } | 87 } |
86 | 88 |
87 #Open files | 89 # Open files |
88 data.counts <- read.table( | 90 data.counts <- read.table( |
89 opt$input, | 91 opt$input, |
90 h = opt$colnames, | 92 h = opt$colnames, |
91 row.names = 1, | 93 row.names = 1, |
92 sep = opt$sep, | 94 sep = opt$sep, |
93 check.names = FALSE | 95 check.names = FALSE |
94 ) | 96 ) |
95 | 97 |
96 metadata <- read.table( | 98 metadata <- read.table( |
97 opt$comparison_factor_file, | 99 opt$comparison_factor_file, |
98 header = TRUE, | 100 header = TRUE, |
99 stringsAsFactors = FALSE, | 101 stringsAsFactors = FALSE, |
100 sep = "\t", | 102 sep = "\t", |
101 check.names = FALSE, | 103 check.names = FALSE, |
102 row.names = 1 | 104 row.names = 1 |
103 ) | 105 ) |
104 | 106 |
105 metadata <- subset(metadata, rownames(metadata) %in% colnames(data.counts)) | 107 metadata <- subset(metadata, rownames(metadata) %in% colnames(data.counts)) |
106 | 108 |
107 # Create two logical named vectors for each factor level of cell signature | 109 # Create two logical named vectors for each factor level of cell signature |
108 factor1_cells <- setNames(metadata[, 1] == opt$factor1, rownames(metadata)) | 110 factor1_cells <- setNames(metadata[, 1] == opt$factor1, rownames(metadata)) |
109 factor2_cells <- setNames(metadata[, 1] == opt$factor2, rownames(metadata)) | 111 factor2_cells <- setNames(metadata[, 1] == opt$factor2, rownames(metadata)) |
110 | 112 |
111 ## Mann-Whitney test (Two-sample Wilcoxon test) | 113 ## Mann-Whitney test (Two-sample Wilcoxon test) |
112 MW_test <- data.frame(t(apply(data.counts, 1, function(x) { | 114 MW_test <- data.frame(t(apply(data.counts, 1, function(x) { |
113 do.call("cbind", wilcox.test(x[names(factor1_cells)[factor1_cells]], x[names(factor2_cells)[factor2_cells]]))[, 1:2] | 115 do.call("cbind", wilcox.test(x[names(factor1_cells)[factor1_cells]], x[names(factor2_cells)[factor2_cells]]))[, 1:2] |
114 })), stringsAsFactors = FALSE) | 116 })), stringsAsFactors = FALSE) |
115 | 117 |
116 # Benjamini-Hochberg correction and significativity | 118 # Benjamini-Hochberg correction and significativity |
117 MW_test$p.adjust <- p.adjust(as.numeric(MW_test$p.value), method = "BH", n = nrow(MW_test)) | 119 MW_test$p.adjust <- p.adjust(as.numeric(MW_test$p.value), method = "BH", n = nrow(MW_test)) |
118 MW_test$Significant <- MW_test$p.adjust < opt$fdr | 120 MW_test$Significant <- MW_test$p.adjust < opt$fdr |
119 | 121 |
120 ## Descriptive Statistics Function | 122 ## Descriptive Statistics Function |
121 descriptive_stats <- function(InputData) { | 123 descriptive_stats <- function(InputData) { |
122 SummaryData <- data.frame( | 124 SummaryData <- data.frame( |
123 mean = rowMeans(InputData), | 125 mean = rowMeans(InputData), |
124 SD = apply(InputData, 1, sd), | 126 SD = apply(InputData, 1, sd), |
125 Variance = apply(InputData, 1, var), | 127 Variance = apply(InputData, 1, var), |
126 Percentage_Detection = apply(InputData, 1, function(x, y = InputData) { | 128 Percentage_Detection = apply(InputData, 1, function(x, y = InputData) { |
127 (sum(x != 0) / ncol(y)) * 100 | 129 (sum(x != 0) / ncol(y)) * 100 |
128 }), | 130 }), |
129 mean_condition2 = rowMeans(InputData[, factor2_cells]), | 131 mean_condition2 = rowMeans(InputData[, factor2_cells]), |
130 mean_condition1 = rowMeans(InputData[, factor1_cells]) | 132 mean_condition1 = rowMeans(InputData[, factor1_cells]) |
131 ) | 133 ) |
132 if (opt$log) { | 134 if (opt$log) { |
133 SummaryData$log2FC <- SummaryData$mean_condition2 - SummaryData$mean_condition1 | 135 SummaryData$log2FC <- SummaryData$mean_condition2 - SummaryData$mean_condition1 |
134 } else { | 136 } else { |
135 SummaryData$log2FC <- log2(SummaryData$mean_condition2 / SummaryData$mean_condition1) | 137 SummaryData$log2FC <- log2(SummaryData$mean_condition2 / SummaryData$mean_condition1) |
136 } | 138 } |
137 return(SummaryData) | 139 return(SummaryData) |
138 } | 140 } |
139 | 141 |
140 gene_stats <- descriptive_stats(data.counts) | 142 gene_stats <- descriptive_stats(data.counts) |
141 | 143 |
142 results <- merge(gene_stats, MW_test, by = "row.names") | 144 results <- merge(gene_stats, MW_test, by = "row.names") |
147 results$Significant[results$Significant == FALSE & !is.na(results$Significant)] <- "NS" | 149 results$Significant[results$Significant == FALSE & !is.na(results$Significant)] <- "NS" |
148 | 150 |
149 | 151 |
150 # Save files | 152 # Save files |
151 write.table( | 153 write.table( |
152 results[order(results$p.adjust), ], | 154 results[order(results$p.adjust), ], |
153 opt$output, | 155 opt$output, |
154 sep = "\t", | 156 sep = "\t", |
155 quote = FALSE, | 157 quote = FALSE, |
156 col.names = TRUE, | 158 col.names = TRUE, |
157 row.names = FALSE | 159 row.names = FALSE |
158 ) | 160 ) |