Mercurial > repos > artbio > gsc_mannwhitney_de
comparison MannWhitney_DE.R @ 0:c67dba545a37 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_mannwhitney_de commit 09dcd74dbc01f448518cf3db3e646afb0675a6fe
author | artbio |
---|---|
date | Mon, 24 Jun 2019 13:39:39 -0400 |
parents | |
children | 3d86c89f15bf |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c67dba545a37 |
---|---|
1 #################### | |
2 # Differential # | |
3 # analysis # | |
4 #################### | |
5 | |
6 # Perform a differential analysis between 2 | |
7 # groups of cells. | |
8 | |
9 # Example of command | |
10 # Rscript MannWhitney_DE.R --input <input.tsv> --sep <tab> --colnames <TRUE> --metadata <signature.tsv> --column_name <rate> --fdr <0.01> --output <diff_analysis.tsv> | |
11 | |
12 # load packages that are provided in the conda env | |
13 options( show.error.messages=F, | |
14 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | |
15 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
16 warnings() | |
17 library(optparse) | |
18 | |
19 #Arguments | |
20 option_list = list( | |
21 make_option( | |
22 "--input", | |
23 default = NA, | |
24 type = 'character', | |
25 help = "Input file that contains log2(CPM +1) values" | |
26 ), | |
27 make_option( | |
28 "--sep", | |
29 default = '\t', | |
30 type = 'character', | |
31 help = "File separator [default : '%default' ]" | |
32 ), | |
33 make_option( | |
34 "--colnames", | |
35 default = TRUE, | |
36 type = 'logical', | |
37 help = "Consider first line as header ? [default : '%default' ]" | |
38 ), | |
39 make_option( | |
40 "--comparison_factor_file", | |
41 default = NA, | |
42 type = 'character', | |
43 help = " A two column table : cell identifiers and a comparison factor that split cells in two categories (high/low, HOM/HET,...)" | |
44 ), | |
45 make_option( | |
46 "--factor1", | |
47 type = 'character', | |
48 help = "level associated to the control condition in the factor file" | |
49 ), | |
50 make_option( | |
51 "--factor2", | |
52 type = 'character', | |
53 help = "level associated to the test condition in the factor file" | |
54 ), | |
55 make_option( | |
56 "--fdr", | |
57 default = 0.01, | |
58 type = 'numeric', | |
59 help = "FDR threshold [default : '%default' ]" | |
60 ), | |
61 make_option( | |
62 "--log", | |
63 default=FALSE, | |
64 action="store_true", | |
65 type = 'logical', | |
66 help = "Expression data are log-transformed [default : '%default' ]" | |
67 ), | |
68 make_option( | |
69 "--output", | |
70 default = "results.tsv", | |
71 type = 'character', | |
72 help = "Output name [default : '%default' ]" | |
73 ) | |
74 ) | |
75 | |
76 opt = parse_args(OptionParser(option_list = option_list), | |
77 args = commandArgs(trailingOnly = TRUE)) | |
78 | |
79 if (opt$sep == "tab") {opt$sep = "\t"} | |
80 if (opt$sep == "comma") {opt$sep = ","} | |
81 | |
82 #Open files | |
83 data.counts <- read.table( | |
84 opt$input, | |
85 h = opt$colnames, | |
86 row.names = 1, | |
87 sep = opt$sep, | |
88 check.names = F | |
89 ) | |
90 | |
91 metadata <- read.table( | |
92 opt$comparison_factor_file, | |
93 header = TRUE, | |
94 stringsAsFactors = F, | |
95 sep = "\t", | |
96 check.names = FALSE, | |
97 row.names = 1 | |
98 ) | |
99 | |
100 metadata <- subset(metadata, rownames(metadata) %in% colnames(data.counts)) | |
101 | |
102 # Create two logical named vectors for each factor level of cell signature | |
103 factor1_cells <- setNames(metadata[,1] == opt$factor1, rownames(metadata)) | |
104 factor2_cells <- setNames(metadata[,1] == opt$factor2, rownames(metadata)) | |
105 | |
106 ## Mann-Whitney test (Two-sample Wilcoxon test) | |
107 MW_test <- data.frame(t(apply(data.counts, 1, function(x) { | |
108 do.call("cbind", wilcox.test(x[names(factor1_cells)[factor1_cells]], x[names(factor2_cells)[factor2_cells]]))[, 1:2] | |
109 })), stringsAsFactors = F) | |
110 | |
111 # Benjamini-Hochberg correction and significativity | |
112 MW_test$p.adjust <- p.adjust(as.numeric(MW_test$p.value), method = "BH" , n = nrow(MW_test)) | |
113 # MW_test$Critical.value <- (rank(MW_test$p.value) / nrow(MW_test)) * opt$fdr | |
114 MW_test$Significant <- MW_test$p.adjust < opt$fdr | |
115 | |
116 ## Descriptive Statistics Function | |
117 descriptive_stats <- function(InputData) { | |
118 SummaryData = data.frame( | |
119 mean = rowMeans(InputData), | |
120 SD = apply(InputData, 1, sd), | |
121 Variance = apply(InputData, 1, var), | |
122 Percentage_Detection = apply(InputData, 1, function(x, y = InputData) { | |
123 (sum(x != 0) / ncol(y)) * 100 | |
124 }), | |
125 mean_condition2 = rowMeans(InputData[,factor2_cells]), | |
126 mean_condition1 = rowMeans(InputData[, factor1_cells]) | |
127 ) | |
128 if(opt$log) { | |
129 SummaryData$log2FC <- SummaryData$mean_condition2 - SummaryData$mean_condition1 | |
130 } else { | |
131 SummaryData$log2FC <- log2(SummaryData$mean_condition2 / SummaryData$mean_condition1) | |
132 } | |
133 return(SummaryData) | |
134 } | |
135 | |
136 gene_stats <- descriptive_stats(data.counts) | |
137 | |
138 results <- merge(gene_stats, MW_test, by = "row.names") | |
139 colnames(results)[1] <- "genes" | |
140 | |
141 # Save files | |
142 write.table( | |
143 results, | |
144 opt$output, | |
145 sep = "\t", | |
146 quote = F, | |
147 col.names = T, | |
148 row.names = F | |
149 ) |