Mercurial > repos > melpetera > intensity_checks
comparison Intchecks/Script_intensity_check.R @ 0:c2c2e1be904a draft
Uploaded
author | melpetera |
---|---|
date | Thu, 11 Oct 2018 05:33:19 -0400 |
parents | |
children | 4973a2104cfd |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c2c2e1be904a |
---|---|
1 #################################################################### | |
2 # SCRIPT INTENSITY CHECK | |
3 # V1: Fold and NA | |
4 # | |
5 # Input: Data Matrix, VariableMetadata, SampleMetadata | |
6 # Output: VariableMetadata, Graphics (barplots and boxplots) | |
7 # | |
8 # Dependencies: RcheckLibrary.R | |
9 # | |
10 #################################################################### | |
11 | |
12 | |
13 # Parameters (for dev) | |
14 if(FALSE){ | |
15 | |
16 rm(list = ls()) | |
17 setwd("Y:\\Developpement\\Intensity check\\Pour tests") | |
18 | |
19 DM.name <- "DM_NA.tabular" | |
20 SM.name <- "SM_NA.tabular" | |
21 VM.name <- "vM_NA.tabular" | |
22 type <- "One_class" | |
23 class.col <- "2" | |
24 class1 <- "Blanks" | |
25 VM.output <- "new_VM.txt" | |
26 graphs.output <- "Barplots_and_Boxplots.pdf" | |
27 } | |
28 | |
29 intens_check <- function(DM.name, SM.name, VM.name, type, class.col, class1, VM.output, graphs.output){ | |
30 # This function allows to check the intensities considering classes with a fold calculation, the number and | |
31 # the proportion of NA | |
32 # | |
33 # Two options: | |
34 # - one class (selected by the user) against all the remaining samples ("One_class") | |
35 # - tests on each class ("Each_class") | |
36 # | |
37 # Parameters: | |
38 # DM.name, SM.name, VM.name: dataMatrix, sampleMetadata, variableMetadata files access | |
39 # type: "One_class" or "Each_class" | |
40 # class.col: number of the sampleMetadata's column with classes | |
41 # class1: name of the class if type="One_class" | |
42 # VM.output: output file's access (VM with the new columns) | |
43 # graphs.output: pdf with barplots for the proportion of NA and boxplots with the folds values | |
44 | |
45 | |
46 | |
47 | |
48 # Input --------------------------------------------------------- | |
49 | |
50 DM <- read.table(DM.name, header=TRUE, sep="\t", check.names=FALSE) | |
51 SM <- read.table(SM.name, header=TRUE, sep="\t", check.names=FALSE) | |
52 VM <- read.table(VM.name, header=TRUE, sep="\t", check.names=FALSE) | |
53 | |
54 | |
55 | |
56 # Table match check with Rchecklibrary | |
57 table.check <- match3(DM, SM, VM) | |
58 check.err(table.check) | |
59 | |
60 | |
61 rownames(DM) <- DM[,1] | |
62 var_names <- DM[,1] | |
63 DM <- DM[,-1] | |
64 DM <- data.frame(t(DM)) | |
65 | |
66 class.col <- colnames(SM)[as.numeric(class.col)] | |
67 | |
68 # check class.col, class1 and the number of classes--------------- | |
69 | |
70 if(!(class.col %in% colnames(SM))){ | |
71 stop("\n- - - - - - - - -\n", "The column ",class.col, " is not a part of the specify sample Metadata","\n- - - - - - - - -\n") | |
72 } | |
73 | |
74 c_class <- SM[,class.col] | |
75 c_class <- as.factor(c_class) | |
76 nb_class <- nlevels(c_class) | |
77 classnames <- levels(c_class) | |
78 | |
79 | |
80 if((nb_class > (nrow(SM))/3)){ | |
81 class.err <- c("\n There are too many classes, think about reducing the number of classes and excluding those | |
82 with few samples \n") | |
83 cat(class.err) | |
84 } | |
85 | |
86 if(type == "One_class"){ | |
87 if(!(class1 %in% classnames)){ | |
88 list.class1 <- c("\n Classes:",classnames,"\n") | |
89 cat(list.class1) | |
90 err.class1 <- c("The class ",class1, " does not appear in the column ", class.col) | |
91 stop("\n- - - - - - - - -\n", err.class1,"\n- - - - - - - - -\n") | |
92 } | |
93 } | |
94 | |
95 #If type is "one_class", change others classes in "other" | |
96 if(type == "One_class"){ | |
97 for(i in 1:length(c_class)){ | |
98 if(c_class[i]!=class1){ | |
99 c_class <- as.character(c_class) | |
100 c_class[i] <- "Other" | |
101 c_class <- as.factor(c_class) | |
102 nb_class <- nlevels(c_class) | |
103 classnames <- levels(c_class) | |
104 | |
105 } | |
106 } | |
107 } | |
108 | |
109 DM <- cbind(DM,c_class) | |
110 | |
111 # fold ------------------------------------------------------- | |
112 n <- 1 | |
113 fold <- data.frame() | |
114 for(j in 1:(nb_class-1)){ | |
115 for(k in (j+1):nb_class) { | |
116 for (i in 1:(length(DM)-1)){ | |
117 fold[i,n] <- mean(DM[which(DM$c_class==classnames[k]),i], na.rm=TRUE)/ | |
118 mean(DM[which(DM$c_class==classnames[j]),i], na.rm=TRUE)} | |
119 names(fold)[n] <- paste("fold",classnames[k],"VS", classnames[j], sep="_") | |
120 n <- n + 1} | |
121 } | |
122 | |
123 # NA --------------------------------------------------------- | |
124 calcul_NA <- data.frame() | |
125 pct_NA <- data.frame() | |
126 for (i in 1:(length(DM)-1)){ | |
127 for (j in 1:nb_class){ | |
128 n <- 0 | |
129 new_DM <- DM[which(DM$c_class==classnames[j]),i] | |
130 for(k in 1:length(new_DM)){ | |
131 if (is.na(new_DM[k])){ | |
132 n <- n + 1} | |
133 calcul_NA[i,j] <- n | |
134 pct_NA[i,j] <- (calcul_NA[i,j]/length(new_DM))*100} | |
135 } | |
136 } | |
137 names(calcul_NA) <- paste("Nb_NA",classnames, sep="_") | |
138 names(pct_NA) <- paste("Pct_NA", classnames, sep="_") | |
139 | |
140 # Alert message if there is no NA | |
141 | |
142 sumNA <- colSums(calcul_NA) | |
143 sum_total <- sum(sumNA) | |
144 | |
145 alerte <- NULL | |
146 if(sum_total==0){ | |
147 alerte <- c(alerte, "Data Matrix contains no NA.\n") | |
148 } | |
149 | |
150 if(length(alerte) != 0){ | |
151 cat(alerte,"\n") | |
152 } | |
153 | |
154 table_NA <- cbind(calcul_NA, pct_NA) | |
155 | |
156 # check columns names ---------------------------------------- | |
157 | |
158 # Fold | |
159 VM.names <- colnames(VM) | |
160 fold.names <- colnames(fold) | |
161 | |
162 for (i in 1:length(VM.names)){ | |
163 for (j in 1:length(fold.names)){ | |
164 if (VM.names[i]==fold.names[j]){ | |
165 fold.names[j] <- paste(fold.names[j],"2", sep="_") | |
166 } | |
167 } | |
168 } | |
169 colnames(fold) <- fold.names | |
170 | |
171 # NA | |
172 NA.names <- colnames(table_NA) | |
173 | |
174 for (i in 1:length(VM.names)){ | |
175 for (j in 1:length(NA.names)){ | |
176 if (VM.names[i]==NA.names[j]){ | |
177 NA.names[j] <- paste(NA.names[j],"2", sep="_") | |
178 } | |
179 } | |
180 } | |
181 colnames(table_NA) <- NA.names | |
182 | |
183 #for NA barplots --------------------------------------------- | |
184 | |
185 data_bp <- data.frame() | |
186 | |
187 for (j in 1:ncol(pct_NA)){ | |
188 Nb_NA_0_20 <- 0 | |
189 Nb_NA_20_40 <- 0 | |
190 Nb_NA_40_60 <- 0 | |
191 Nb_NA_60_80 <- 0 | |
192 Nb_NA_80_100 <- 0 | |
193 for (i in 1:nrow(pct_NA)){ | |
194 | |
195 if ((0<=pct_NA[i,j])&(pct_NA[i,j]<20)){ | |
196 Nb_NA_0_20=Nb_NA_0_20+1 | |
197 } | |
198 | |
199 if ((20<=pct_NA[i,j])&(pct_NA[i,j]<40)){ | |
200 Nb_NA_20_40=Nb_NA_20_40+1} | |
201 | |
202 if ((40<=pct_NA[i,j])&(pct_NA[i,j]<60)){ | |
203 Nb_NA_40_60=Nb_NA_40_60+1} | |
204 | |
205 if ((60<=pct_NA[i,j])&(pct_NA[i,j]<80)){ | |
206 Nb_NA_60_80=Nb_NA_60_80+1} | |
207 | |
208 if ((80<=pct_NA[i,j])&(pct_NA[i,j]<=100)){ | |
209 Nb_NA_80_100=Nb_NA_80_100+1} | |
210 } | |
211 data_bp[1,j] <- Nb_NA_0_20 | |
212 data_bp[2,j] <- Nb_NA_20_40 | |
213 data_bp[3,j] <- Nb_NA_40_60 | |
214 data_bp[4,j] <- Nb_NA_60_80 | |
215 data_bp[5,j] <- Nb_NA_80_100 | |
216 } | |
217 rownames(data_bp) <- c("0-20%", "20-40%", "40-60%", "60-80%", "80-100%") | |
218 colnames(data_bp) <- classnames | |
219 data_bp <- as.matrix(data_bp) | |
220 | |
221 | |
222 # Output-------------------------------------------------------- | |
223 | |
224 VM <- cbind(VM,fold,table_NA) | |
225 | |
226 write.table(VM, VM.output,sep="\t", quote=FALSE, row.names=FALSE) | |
227 | |
228 #graphics pdf | |
229 | |
230 pdf(graphs.output) | |
231 | |
232 #Boxplots for NA | |
233 | |
234 par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE) | |
235 | |
236 bp=barplot(data_bp, col=rainbow(nrow(data_bp)), main="Proportion of NA", xlab="Classes", ylab="Variables") | |
237 legend("topright", fill=rainbow(nrow(data_bp)),rownames(data_bp), inset=c(-0.3,0)) | |
238 | |
239 | |
240 stock=0 | |
241 for (i in 1:nrow(data_bp)){ | |
242 text(bp, stock+data_bp[i,]/2, data_bp[i,], col="white") | |
243 stock <- stock+data_bp[i,] | |
244 } | |
245 | |
246 | |
247 #Boxplots for fold test | |
248 for (j in 1:ncol(fold)){ | |
249 title=paste(fold.names[j]) | |
250 boxplot(fold[j], main=title) | |
251 } | |
252 | |
253 dev.off() | |
254 | |
255 } | |
256 | |
257 | |
258 # Function call--------------- | |
259 | |
260 #setwd("Y:\\Developpement\\Intensity check\\Pour tests") | |
261 #intens_check("DM_NA.tabular", "SM_NA.tabular", "VM_NA.tabular", "One_class", "class", "Blanks", "VM_oneclass_NA.txt", | |
262 #"Barplots_and_Boxplots") | |
263 | |
264 |