0
|
1 ####################################################################
|
|
2 # SCRIPT INTENSITY CHECK
|
|
3 # V1: Fold and NA
|
|
4 #
|
|
5 # Input: Data Matrix, VariableMetadata, SampleMetadata
|
|
6 # Output: VariableMetadata, Graphics (barplots and boxplots)
|
|
7 #
|
|
8 # Dependencies: RcheckLibrary.R
|
|
9 #
|
|
10 ####################################################################
|
|
11
|
|
12
|
|
13 # Parameters (for dev)
|
|
14 if(FALSE){
|
|
15
|
|
16 rm(list = ls())
|
|
17 setwd("Y:\\Developpement\\Intensity check\\Pour tests")
|
|
18
|
|
19 DM.name <- "DM_NA.tabular"
|
|
20 SM.name <- "SM_NA.tabular"
|
|
21 VM.name <- "vM_NA.tabular"
|
|
22 type <- "One_class"
|
|
23 class.col <- "2"
|
|
24 class1 <- "Blanks"
|
|
25 VM.output <- "new_VM.txt"
|
|
26 graphs.output <- "Barplots_and_Boxplots.pdf"
|
|
27 }
|
|
28
|
|
29 intens_check <- function(DM.name, SM.name, VM.name, type, class.col, class1, VM.output, graphs.output){
|
|
30 # This function allows to check the intensities considering classes with a fold calculation, the number and
|
|
31 # the proportion of NA
|
|
32 #
|
|
33 # Two options:
|
|
34 # - one class (selected by the user) against all the remaining samples ("One_class")
|
|
35 # - tests on each class ("Each_class")
|
|
36 #
|
|
37 # Parameters:
|
|
38 # DM.name, SM.name, VM.name: dataMatrix, sampleMetadata, variableMetadata files access
|
|
39 # type: "One_class" or "Each_class"
|
|
40 # class.col: number of the sampleMetadata's column with classes
|
|
41 # class1: name of the class if type="One_class"
|
|
42 # VM.output: output file's access (VM with the new columns)
|
|
43 # graphs.output: pdf with barplots for the proportion of NA and boxplots with the folds values
|
|
44
|
|
45
|
|
46
|
|
47
|
|
48 # Input ---------------------------------------------------------
|
|
49
|
|
50 DM <- read.table(DM.name, header=TRUE, sep="\t", check.names=FALSE)
|
|
51 SM <- read.table(SM.name, header=TRUE, sep="\t", check.names=FALSE)
|
|
52 VM <- read.table(VM.name, header=TRUE, sep="\t", check.names=FALSE)
|
|
53
|
|
54
|
|
55
|
|
56 # Table match check with Rchecklibrary
|
|
57 table.check <- match3(DM, SM, VM)
|
|
58 check.err(table.check)
|
|
59
|
|
60
|
|
61 rownames(DM) <- DM[,1]
|
|
62 var_names <- DM[,1]
|
|
63 DM <- DM[,-1]
|
|
64 DM <- data.frame(t(DM))
|
|
65
|
|
66 class.col <- colnames(SM)[as.numeric(class.col)]
|
|
67
|
|
68 # check class.col, class1 and the number of classes---------------
|
|
69
|
|
70 if(!(class.col %in% colnames(SM))){
|
|
71 stop("\n- - - - - - - - -\n", "The column ",class.col, " is not a part of the specify sample Metadata","\n- - - - - - - - -\n")
|
|
72 }
|
|
73
|
|
74 c_class <- SM[,class.col]
|
|
75 c_class <- as.factor(c_class)
|
|
76 nb_class <- nlevels(c_class)
|
|
77 classnames <- levels(c_class)
|
|
78
|
|
79
|
|
80 if((nb_class > (nrow(SM))/3)){
|
|
81 class.err <- c("\n There are too many classes, think about reducing the number of classes and excluding those
|
|
82 with few samples \n")
|
|
83 cat(class.err)
|
|
84 }
|
|
85
|
|
86 if(type == "One_class"){
|
|
87 if(!(class1 %in% classnames)){
|
|
88 list.class1 <- c("\n Classes:",classnames,"\n")
|
|
89 cat(list.class1)
|
|
90 err.class1 <- c("The class ",class1, " does not appear in the column ", class.col)
|
|
91 stop("\n- - - - - - - - -\n", err.class1,"\n- - - - - - - - -\n")
|
|
92 }
|
|
93 }
|
|
94
|
|
95 #If type is "one_class", change others classes in "other"
|
|
96 if(type == "One_class"){
|
|
97 for(i in 1:length(c_class)){
|
|
98 if(c_class[i]!=class1){
|
|
99 c_class <- as.character(c_class)
|
|
100 c_class[i] <- "Other"
|
|
101 c_class <- as.factor(c_class)
|
|
102 nb_class <- nlevels(c_class)
|
|
103 classnames <- levels(c_class)
|
|
104
|
|
105 }
|
|
106 }
|
|
107 }
|
|
108
|
|
109 DM <- cbind(DM,c_class)
|
|
110
|
|
111 # fold -------------------------------------------------------
|
|
112 n <- 1
|
|
113 fold <- data.frame()
|
|
114 for(j in 1:(nb_class-1)){
|
|
115 for(k in (j+1):nb_class) {
|
|
116 for (i in 1:(length(DM)-1)){
|
|
117 fold[i,n] <- mean(DM[which(DM$c_class==classnames[k]),i], na.rm=TRUE)/
|
|
118 mean(DM[which(DM$c_class==classnames[j]),i], na.rm=TRUE)}
|
|
119 names(fold)[n] <- paste("fold",classnames[k],"VS", classnames[j], sep="_")
|
|
120 n <- n + 1}
|
|
121 }
|
|
122
|
|
123 # NA ---------------------------------------------------------
|
|
124 calcul_NA <- data.frame()
|
|
125 pct_NA <- data.frame()
|
|
126 for (i in 1:(length(DM)-1)){
|
|
127 for (j in 1:nb_class){
|
|
128 n <- 0
|
|
129 new_DM <- DM[which(DM$c_class==classnames[j]),i]
|
|
130 for(k in 1:length(new_DM)){
|
|
131 if (is.na(new_DM[k])){
|
|
132 n <- n + 1}
|
|
133 calcul_NA[i,j] <- n
|
|
134 pct_NA[i,j] <- (calcul_NA[i,j]/length(new_DM))*100}
|
|
135 }
|
|
136 }
|
|
137 names(calcul_NA) <- paste("Nb_NA",classnames, sep="_")
|
|
138 names(pct_NA) <- paste("Pct_NA", classnames, sep="_")
|
|
139
|
|
140 # Alert message if there is no NA
|
|
141
|
|
142 sumNA <- colSums(calcul_NA)
|
|
143 sum_total <- sum(sumNA)
|
|
144
|
|
145 alerte <- NULL
|
|
146 if(sum_total==0){
|
|
147 alerte <- c(alerte, "Data Matrix contains no NA.\n")
|
|
148 }
|
|
149
|
|
150 if(length(alerte) != 0){
|
|
151 cat(alerte,"\n")
|
|
152 }
|
|
153
|
|
154 table_NA <- cbind(calcul_NA, pct_NA)
|
|
155
|
|
156 # check columns names ----------------------------------------
|
|
157
|
|
158 # Fold
|
|
159 VM.names <- colnames(VM)
|
|
160 fold.names <- colnames(fold)
|
|
161
|
|
162 for (i in 1:length(VM.names)){
|
|
163 for (j in 1:length(fold.names)){
|
|
164 if (VM.names[i]==fold.names[j]){
|
|
165 fold.names[j] <- paste(fold.names[j],"2", sep="_")
|
|
166 }
|
|
167 }
|
|
168 }
|
|
169 colnames(fold) <- fold.names
|
|
170
|
|
171 # NA
|
|
172 NA.names <- colnames(table_NA)
|
|
173
|
|
174 for (i in 1:length(VM.names)){
|
|
175 for (j in 1:length(NA.names)){
|
|
176 if (VM.names[i]==NA.names[j]){
|
|
177 NA.names[j] <- paste(NA.names[j],"2", sep="_")
|
|
178 }
|
|
179 }
|
|
180 }
|
|
181 colnames(table_NA) <- NA.names
|
|
182
|
|
183 #for NA barplots ---------------------------------------------
|
|
184
|
|
185 data_bp <- data.frame()
|
|
186
|
|
187 for (j in 1:ncol(pct_NA)){
|
|
188 Nb_NA_0_20 <- 0
|
|
189 Nb_NA_20_40 <- 0
|
|
190 Nb_NA_40_60 <- 0
|
|
191 Nb_NA_60_80 <- 0
|
|
192 Nb_NA_80_100 <- 0
|
|
193 for (i in 1:nrow(pct_NA)){
|
|
194
|
|
195 if ((0<=pct_NA[i,j])&(pct_NA[i,j]<20)){
|
|
196 Nb_NA_0_20=Nb_NA_0_20+1
|
|
197 }
|
|
198
|
|
199 if ((20<=pct_NA[i,j])&(pct_NA[i,j]<40)){
|
|
200 Nb_NA_20_40=Nb_NA_20_40+1}
|
|
201
|
|
202 if ((40<=pct_NA[i,j])&(pct_NA[i,j]<60)){
|
|
203 Nb_NA_40_60=Nb_NA_40_60+1}
|
|
204
|
|
205 if ((60<=pct_NA[i,j])&(pct_NA[i,j]<80)){
|
|
206 Nb_NA_60_80=Nb_NA_60_80+1}
|
|
207
|
|
208 if ((80<=pct_NA[i,j])&(pct_NA[i,j]<=100)){
|
|
209 Nb_NA_80_100=Nb_NA_80_100+1}
|
|
210 }
|
|
211 data_bp[1,j] <- Nb_NA_0_20
|
|
212 data_bp[2,j] <- Nb_NA_20_40
|
|
213 data_bp[3,j] <- Nb_NA_40_60
|
|
214 data_bp[4,j] <- Nb_NA_60_80
|
|
215 data_bp[5,j] <- Nb_NA_80_100
|
|
216 }
|
|
217 rownames(data_bp) <- c("0-20%", "20-40%", "40-60%", "60-80%", "80-100%")
|
|
218 colnames(data_bp) <- classnames
|
|
219 data_bp <- as.matrix(data_bp)
|
|
220
|
|
221
|
|
222 # Output--------------------------------------------------------
|
|
223
|
|
224 VM <- cbind(VM,fold,table_NA)
|
|
225
|
|
226 write.table(VM, VM.output,sep="\t", quote=FALSE, row.names=FALSE)
|
|
227
|
|
228 #graphics pdf
|
|
229
|
|
230 pdf(graphs.output)
|
|
231
|
|
232 #Boxplots for NA
|
|
233
|
|
234 par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE)
|
|
235
|
|
236 bp=barplot(data_bp, col=rainbow(nrow(data_bp)), main="Proportion of NA", xlab="Classes", ylab="Variables")
|
|
237 legend("topright", fill=rainbow(nrow(data_bp)),rownames(data_bp), inset=c(-0.3,0))
|
|
238
|
|
239
|
|
240 stock=0
|
|
241 for (i in 1:nrow(data_bp)){
|
|
242 text(bp, stock+data_bp[i,]/2, data_bp[i,], col="white")
|
|
243 stock <- stock+data_bp[i,]
|
|
244 }
|
|
245
|
|
246
|
|
247 #Boxplots for fold test
|
|
248 for (j in 1:ncol(fold)){
|
|
249 title=paste(fold.names[j])
|
|
250 boxplot(fold[j], main=title)
|
|
251 }
|
|
252
|
|
253 dev.off()
|
|
254
|
|
255 }
|
|
256
|
|
257
|
|
258 # Function call---------------
|
|
259
|
|
260 #setwd("Y:\\Developpement\\Intensity check\\Pour tests")
|
|
261 #intens_check("DM_NA.tabular", "SM_NA.tabular", "VM_NA.tabular", "One_class", "class", "Blanks", "VM_oneclass_NA.txt",
|
|
262 #"Barplots_and_Boxplots")
|
|
263
|
|
264 |