Mercurial > repos > melpetera > batchcorrection
comparison BC/batch_correction_3Lfct.R @ 4:23314e1192d4 draft default tip
Uploaded
author | melpetera |
---|---|
date | Thu, 14 Jan 2021 09:56:58 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
3:73892ef177e3 | 4:23314e1192d4 |
---|---|
1 # Author: jfmartin | |
2 # Modified by : mpetera | |
3 ############################################################################### | |
4 # Correction of analytical effects inter and intra batch on intensities using quality control pooled samples (QC-pools) | |
5 # according to the algorithm mentioned by Van der Kloet (J Prot Res 2009). | |
6 # Parameters : a dataframe of Ions intensities and an other of samples? metadata which must contains at least the three following columns : | |
7 # "batch" to identify the batches of analyses ; need at least 3 QC-pools for linear adjustment and 8 for lo(w)ess adjustment | |
8 # "injectionOrder" integer defining the injection order of all samples : QC-pools and analysed samples | |
9 # "sampleType" indicates if defining a sample with "sample" or a QC-pool with "pool" | |
10 # NO MISSING DATA are allowed | |
11 # Version 0.91 insertion of ok_norm function to assess correction feasibility | |
12 # Version 0.92 insertion of slope test in ok_norm | |
13 # Version 0.93 name of log file define as a parameter of the correction function | |
14 # Version 0.94 Within a batch, test if all QCpools or samples values = 0. Definition of an error code in ok_norm function (see function for details) | |
15 # Version 0.99 include non linear lowess correction. | |
16 # Version 1.00 the corrected result matrix is return transposed in Galaxy | |
17 # Version 1.01 standard deviation=0 instead of sum of value=0 is used to assess constant data in ok_norm function. Negative values in corrected matrix are converted to 0. | |
18 # Version 1.02 plotsituation create a result file with the error code of non execution of correction set by function ok_norm | |
19 # Version 1.03 fix bug in plot with "reg" option. suppression of ok_norm=4 condition if ok_norm function | |
20 # Version 2.00 Addition of loess function, correction indicator, plots ; modification of returned objects' format, some plots' displays and ok_norm ifelse format | |
21 # Version 2.01 Correction for pools negative values earlier in norm_QCpool | |
22 # Version 2.10 Script refreshing ; vocabulary adjustment ; span in parameters for lo(w)ess regression ; conditionning for third line ACP display ; order in loess display | |
23 # Version 2.11 ok1 and ok2 permutation (ok_norm) ; conditional display of regression (plotsituation) ; grouping of linked lignes + conditioning (normX) ; conditioning for CVplot | |
24 # Version 2.20 acplight function added from previous toolBox.R [# Version 1.01 "NA"-coding possibility added in acplight function] | |
25 # Version 2.30 addition of suppressWarnings() for known and controlled warnings ; suppression of one useless "cat" message ; change in Rdata names ; 'batch(es)' in cat | |
26 # Version 2.90 change in handling of generated negative and Inf values | |
27 # Version 2.91 Plot improvement | |
28 # Version 3.00 - handling of sample tags' parameters | |
29 # - accepting sample types beyond "pool" and "sample" | |
30 # - dealing with NA | |
31 # - changes in the normalisation strategy regarding mean values to adjust for NA or 0 values | |
32 # - changes in the normalisation strategy regarding unconsistant values (negative or Inf) | |
33 | |
34 ok_norm=function(qcp,qci,spl,spi,method,normref=NA,valimp="0") { | |
35 # Function used for one ion within one batch to determine whether or not batch correction is possible | |
36 # ok_norm values : | |
37 # 0 : no preliminary-condition problem | |
38 # 1 : standard deviation of QC-pools or samples = 0 | |
39 # 2 : insufficient number of QC-pools within a batch (n=3 for linear, n=8 for lowess or loess) | |
40 # 2.5 : less than 2 samples within a batch | |
41 # 3 : significant difference between QC-pools' and samples' means | |
42 # 4 : denominator =0 when on 1 pool per batch <> 0 | |
43 # 5 : (linear regression only) the slopes ratio ?QC-pools/samples? is lower than -0.2 | |
44 # 6 : (linear regression only) none of the pool or sample could be corrected if negative and infinite values are turned into NA | |
45 # Parameters: | |
46 # qcp: intensity of a given ion for pools | |
47 # qci: injection numbers for pools | |
48 # spl: intensity of a given ion for samples | |
49 # spi: injection numbers for samples | |
50 # method: to provide specific checks for "linear" | |
51 | |
52 ok=0 | |
53 if (method=="linear") {minQC=3} else {minQC=8} | |
54 if (length(qcp[!is.na(qcp)])<minQC) { ok=2 } else { if (length(spl[!is.na(spl)])<2) { ok=2.5 | |
55 } else { | |
56 if (sd(qcp,na.rm=TRUE)==0 | sd(spl,na.rm=TRUE)==0) { ok=1 | |
57 } else { | |
58 cvp= sd(qcp,na.rm=TRUE)/mean(qcp,na.rm=TRUE); cvs=sd(spl,na.rm=TRUE)/mean(spl,na.rm=TRUE) | |
59 rttest=t.test(qcp,y=spl) | |
60 reslsfit=lsfit(qci, qcp) | |
61 reslsfitSample=lsfit(spl, spi) | |
62 ordori=reslsfit$coefficients[1] | |
63 penteB=reslsfit$coefficients[2] | |
64 penteS=reslsfitSample$coefficients[2] | |
65 # Significant difference between samples and pools | |
66 if (rttest$p.value < 0.01) { ok=3 | |
67 } else { | |
68 # to avoid denominator =0 when on 1 pool per batch <> 0 | |
69 if (method=="linear" & length(which(((penteB*qci)+ordori)==0))>0 ){ ok=6 | |
70 } else { | |
71 # different sloop between samples and pools | |
72 if (method=="linear" & penteB/penteS < -0.20) { ok=5 | |
73 } else { | |
74 # | |
75 if (method=="linear" & !is.na(normref) & valimp=="NA") { | |
76 denom = (penteB * c(spi,qci) + ordori) | |
77 normval = c(spl,qcp)*normref / denom | |
78 if(length(which((normval==Inf)|(denom<1)))==length(normval)){ok=6} | |
79 } | |
80 }}}}}} | |
81 ok_norm=ok | |
82 } | |
83 | |
84 plotsituation <- function (x, nbid,outfic="plot_regression.pdf", outres="PreNormSummary.txt",fact="batch",span="none", | |
85 sm_meta=list(batch="batch", injectionOrder="injectionOrder", sampleType="sampleType", | |
86 sampleTag=list(pool="pool",blank="blank",sample="sample"))) { | |
87 # Checks for all ions in every batch if linear or lo(w)ess correction is possible. | |
88 # Uses ok_norm function and creates a file (PreNormSummary.txt) with the corresponding error codes. | |
89 # Also creates a pdf file with plots of linear and lo(w)ess regression lines. | |
90 # Parameters: | |
91 # x: dataframe with ions in columns and samples in rows ; x is the result of concatenation of sample metadata file and ions file | |
92 # nbid: number of samples description columns (id and factors) with at least : "batch","injectionOrder","sampleType" | |
93 # outfic: name of regression plots pdf file | |
94 # outres: name of summary table file | |
95 # fact: factor to be used as categorical variable for plots and PCA | |
96 # span: span value for lo(w)ess regression; "none" for linear or default values | |
97 # sm_meta: list of information about sample metadata coding | |
98 indfact=which(dimnames(x)[[2]]==fact) | |
99 indtypsamp=which(dimnames(x)[[2]]==sm_meta$sampleType) | |
100 indbatch=which(dimnames(x)[[2]]==sm_meta$batch) | |
101 indinject=which(dimnames(x)[[2]]==sm_meta$injectionOrder) | |
102 lastIon=dim(x)[2] | |
103 nbi=lastIon-nbid # Number of ions = total number of columns - number of identifying columns | |
104 nbb=length(levels(x[[sm_meta$batch]])) # Number of batch = number of levels of "batch" comlumn (factor) | |
105 nbs=length(x[[sm_meta$sampleType]][x[[sm_meta$sampleType]] %in% sm_meta$sampleTag$sample])# Number of samples = number of rows with "sample" value in sampleType | |
106 pdf(outfic,width=27,height=7*ceiling((nbb+2)/3)) | |
107 cat(nbi," ions ",nbb," batch(es) \n") | |
108 cv=data.frame(matrix(0,nrow=nbi,ncol=2))# initialisation de la dataset qui contiendra les CV | |
109 pre_bilan=matrix(0,nrow=nbi,ncol=3*nbb) # dataset of ok_norm function results | |
110 for (p in 1:nbi) {# for each ion | |
111 par (mfrow=c(ceiling((nbb+2)/3),3),ask=F,cex=1.2) | |
112 labion=dimnames(x)[[2]][p+nbid] | |
113 indpool=which(x[[sm_meta$sampleType]] %in% sm_meta$sampleTag$pool) # QCpools subscripts in x | |
114 pools1=x[indpool,p+nbid]; cv[p,1]=sd(pools1,na.rm=TRUE)/mean(pools1,na.rm=TRUE)# CV before correction | |
115 for (b in 1:nbb) {# for each batch... | |
116 xb=data.frame(x[(x[[sm_meta$batch]]==levels(x[[sm_meta$batch]])[b]),c(indtypsamp,indinject,p+nbid)]) | |
117 indpb = which(xb[[sm_meta$sampleType]] %in% sm_meta$sampleTag$pool)# QCpools subscripts in the current batch | |
118 indsp = which(xb[[sm_meta$sampleType]] %in% sm_meta$sampleTag$sample)# samples subscripts in the current batch | |
119 normLinearTest=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="linear",normref=mean(xb[c(indpb,indsp),3],na.rm=TRUE),valimp="NA") | |
120 normLoessTest=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="loess") | |
121 normLowessTest=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="lowess") | |
122 pre_bilan[ p,3*b-2]=normLinearTest | |
123 pre_bilan[ p,3*b-1]=normLoessTest | |
124 pre_bilan[ p,3*b]=normLowessTest | |
125 if(length(indpb)>1){ | |
126 if(span=="none"){span1<-1 ; span2<-2*length(indpool)/nbs}else{span1<-span ; span2<-span} | |
127 if(normLoessTest!=2){resloess=loess(xb[indpb,3]~xb[indpb,2],span=span1,degree=2,family="gaussian",iterations=4,surface="direct")} | |
128 if(length(which(!(is.na(xb[indsp,3]))))>1){resloessSample=loess(xb[indsp,3]~xb[indsp,2],span=2*length(indpool)/nbs,degree=2,family="gaussian",iterations=4,surface="direct") } | |
129 if(normLowessTest!=2){reslowess=lowess(xb[indpb,2],xb[indpb,3],f=span2)} | |
130 if(length(which(!(is.na(xb[indsp,3]))))>1){reslowessSample=lowess(xb[indsp,2],xb[indsp,3])} | |
131 liminf=min(xb[,3],na.rm=TRUE);limsup=max(xb[,3],na.rm=TRUE) | |
132 firstinj=min(xb[,2],na.rm=TRUE);lastinj=max(xb[,2],na.rm=TRUE) | |
133 plot(xb[indsp,2],xb[indsp,3],pch=16, main=paste(labion,"batch ",b),ylab="intensity",xlab="injection order",ylim=c(liminf,limsup),xlim=c(firstinj,lastinj)) | |
134 if(nrow(xb)>(length(indpb)+length(indsp))){points(xb[-c(indpb,indsp),2], xb[-c(indpb,indsp),3],pch=18,col="grey")} | |
135 points(xb[indpb,2], xb[indpb,3],pch=5) | |
136 if(normLoessTest!=2){points(cbind(resloess$x,resloess$fitted)[order(resloess$x),],type="l",col="green3")} | |
137 if(length(which(!(is.na(xb[indsp,3]))))>1){points(cbind(resloessSample$x,resloessSample$fitted)[order(resloessSample$x),],type="l",col="green3",lty=2)} | |
138 if(normLowessTest!=2){points(reslowess,type="l",col="red")}; if(length(which(!(is.na(xb[indsp,3]))))>1){points(reslowessSample,type="l",col="red",lty=2)} | |
139 abline(lsfit(xb[indpb,2],xb[indpb,3]),col="blue") | |
140 if(length(which(!(is.na(xb[indsp,3]))))>1){abline(lsfit(xb[indsp,2],xb[indsp,3]),lty=2,col="blue")} | |
141 legend("topleft",c("pools","samples"),lty=c(1,2),bty="n") | |
142 legend("topright",c("linear","lowess","loess"),lty=1,col=c("blue","red","green3"),bty="n") | |
143 } else { | |
144 plot.new() | |
145 legend("center","Plot only available when the\nbatch contains at least 2 pools.") | |
146 } | |
147 } | |
148 # series de plot avant correction | |
149 minval=min(x[p+nbid],na.rm=TRUE);maxval=max(x[p+nbid],na.rm=TRUE) | |
150 plot( x[[sm_meta$injectionOrder]], x[,p+nbid],col=x[[sm_meta$batch]],ylim=c(minval,maxval),ylab=labion, | |
151 main=paste0("before correction (CV for pools = ",round(cv[p,1],2),")"),xlab="injection order") | |
152 suppressWarnings(plot.design( x[c(indtypsamp,indbatch,indfact,p+nbid)],main="factors effect before correction")) | |
153 } | |
154 dev.off() | |
155 pre_bilan=data.frame(pre_bilan) | |
156 labion=dimnames(x)[[2]][nbid+1:nbi] | |
157 for (i in 1:nbb) { | |
158 dimnames(pre_bilan)[[2]][3*i-2]=paste("batch",i,"linear") | |
159 dimnames(pre_bilan)[[2]][3*i-1]=paste("batch",i,"loess") | |
160 dimnames(pre_bilan)[[2]][3*i]=paste("batch",i,"lowess") | |
161 } | |
162 bilan=data.frame(labion,pre_bilan) | |
163 write.table(bilan,file=outres,sep="\t",row.names=F,quote=F) | |
164 } | |
165 | |
166 | |
167 normlowess=function (xb,detail="no",vref=1,b,span=NULL,valneg="none",sm_meta=list(batch="batch", injectionOrder="injectionOrder", sampleType="sampleType", | |
168 sampleTag=list(pool="pool",blank="blank",sample="sample")),min_norm=1){ | |
169 # Correction function applied to 1 ion in 1 batch. | |
170 # Uses a lowess regression computed on QC-pools in order to correct samples intensity values | |
171 # xb: dataframe for 1 ion in columns and samples in rows. | |
172 # vref: reference value (average of ion) | |
173 # b: batch subscript | |
174 # detail: level of detail in the outlog file | |
175 # span: span value for lo(w)ess regression; NULL for default values | |
176 # valneg: to determine what to do with generated negative and Inf values | |
177 # sm_meta: list of information about sample metadata coding | |
178 # min_norm: minimum value accepted for normalisation term (denominator); should be strictly positive | |
179 indpb = which(xb[[sm_meta$sampleType]] %in% sm_meta$sampleTag$pool) # pools subscripts of current batch | |
180 indsp = which(xb[[sm_meta$sampleType]] %in% sm_meta$sampleTag$sample) # samples of current batch subscripts | |
181 labion=dimnames(xb)[[2]][3] | |
182 newval=xb[[3]] # initialisation of corrected values = intial values | |
183 ind <- 0 # initialisation of correction indicator | |
184 normTodo=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="lowess") | |
185 #cat("batch:",b," dim xb=",dim(xb)," ok=",normTodo,"\n") | |
186 if (normTodo==0) { | |
187 if(length(span)==0){span2<-2*length(indpb)/length(indsp)}else{span2<-span} | |
188 reslowess=lowess(xb[indpb,2],xb[indpb,3],f=span2) # lowess regression with QC-pools | |
189 if(length(which(reslowess$y<min_norm))!=0){ # to handle cases where 0<denominator<min_norm or negative | |
190 toajust <- which(reslowess$y<min_norm) | |
191 if(valneg=="NA"){ reslowess$y[toajust] <- NA | |
192 } else { if(valneg=="0"){ reslowess$y[toajust] <- -1 | |
193 } else { | |
194 mindenom <- min(reslowess$y[reslowess$y>=min_norm],na.rm=TRUE) | |
195 reslowess$y[toajust] <- mindenom | |
196 } } } | |
197 for(j in 1:nrow(xb)) { | |
198 if (j %in% indpb) { | |
199 newval[j]=(vref*xb[j,3]) / (reslowess$y[which(indpb==j)]) | |
200 } else { # for samples other than pools, the correction value "corv" correspond to the nearest QCpools | |
201 corv= reslowess$y[which(abs(reslowess$x-xb[j,2])==min(abs(reslowess$x-xb[j,2]),na.rm=TRUE))] | |
202 if (length(corv)>1) {corv=corv[1]} | |
203 newval[j]=(vref*xb[j,3]) / corv | |
204 } | |
205 if((!is.na(newval[j]))&(newval[j]<0)){newval[j]<-0} | |
206 } | |
207 if (detail=="reg") { | |
208 liminf=min(xb[,3],na.rm=TRUE);limsup=max(xb[,3],na.rm=TRUE) | |
209 firstinj=min(xb[,2],na.rm=TRUE);lastinj=max(xb[,2],na.rm=TRUE) | |
210 plot(xb[indsp,2],xb[indsp,3],pch=16,main=paste(labion,"batch ",b),ylab="intensity",xlab="injection order",ylim=c(liminf,limsup),xlim=c(firstinj,lastinj)) | |
211 if(nrow(xb)>(length(indpb)+length(indsp))){points(xb[-c(indpb,indsp),2], xb[-c(indpb,indsp),3],pch=18)} | |
212 points(xb[indpb,2], xb[indpb,3],pch=5) | |
213 points(reslowess,type="l",col="red") | |
214 } | |
215 ind <- 1 | |
216 } else {# if ok_norm != 0 , we perform a correction based on batch pool or sample average | |
217 if((length(which(!is.na(xb[indpb,3])))>0)&(length(which(xb[indpb,3]>0))>0)){ | |
218 moypool=mean(xb[indpb,3],na.rm=TRUE) | |
219 newval = (vref*xb[,3])/moypool | |
220 } else { | |
221 moysamp=mean(xb[indsp,3],na.rm=TRUE) | |
222 if((!is.na(moysamp))&(moysamp>0)){ | |
223 cat("Warning: no pool value >0 detected in batch",b,"of ion",labion,": sample mean used as normalisation term.\n") | |
224 newval = (vref*xb[,3])/moysamp | |
225 } else { | |
226 dev.off() | |
227 stop(paste("\n- - - -\nNo pool nor sample value >0 in batch",b,"of ion",labion,"- correction process aborted.\n- - - -\n")) | |
228 } | |
229 } | |
230 } | |
231 newval <- list(norm.ion=newval,norm.ind=ind) | |
232 return(newval) | |
233 } | |
234 | |
235 normlinear <- function (xb,detail="no",vref=1,b,valneg="none",sm_meta=list(batch="batch", injectionOrder="injectionOrder", sampleType="sampleType", | |
236 sampleTag=list(pool="pool",blank="blank",sample="sample")),min_norm=1){ | |
237 # Correction function applied to 1 ion in 1 batch. | |
238 # Uses a linear regression computed on QC-pools in order to correct samples intensity values | |
239 # xb: dataframe with ions in columns and samples in rows; x is a result of concatenation of sample metadata file and ion file | |
240 # detail: level of detail in the outlog file | |
241 # vref: reference value (average of ion) | |
242 # b: which batch it is | |
243 # valneg: to determine what to do with generated negative and Inf values | |
244 # sm_meta: list of information about sample metadata coding | |
245 # min_norm: minimum value accepted for normalisation term (denominator); should be strictly positive | |
246 indpb = which(xb[[sm_meta$sampleType]] %in% sm_meta$sampleTag$pool)# pools subscripts of current batch | |
247 indsp = which(xb[[sm_meta$sampleType]] %in% sm_meta$sampleTag$sample)# samples of current batch subscripts | |
248 labion=dimnames(xb)[[2]][3] | |
249 newval=xb[[3]] # initialisation of corrected values = intial values | |
250 ind <- 0 # initialisation of correction indicator | |
251 normTodo=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="linear",normref=vref,valimp=valneg) | |
252 if (normTodo==0) { | |
253 ind <- 1 | |
254 reslsfit=lsfit(xb[indpb,2],xb[indpb,3]) # linear regression for QCpools | |
255 reslsfitSample=lsfit(xb[indsp,2],xb[indsp,3]) # linear regression for samples | |
256 ordori=reslsfit$coefficients[1] | |
257 pente=reslsfit$coefficients[2] | |
258 if (detail=="reg") { | |
259 liminf=min(xb[,3],na.rm=TRUE);limsup=max(xb[,3],na.rm=TRUE) | |
260 firstinj=min(xb[,2],na.rm=TRUE);lastinj=max(xb[,2],na.rm=TRUE) | |
261 plot(xb[indsp,2],xb[indsp,3],pch=16, | |
262 main=paste(labion,"batch ",b),ylab="intensity",xlab="injection order",ylim=c(liminf,limsup),xlim=c(firstinj,lastinj)) | |
263 if(nrow(xb)>(length(indpb)+length(indsp))){points(xb[-c(indpb,indsp),2], xb[-c(indpb,indsp),3],pch=18)} | |
264 points(xb[indpb,2], xb[indpb,3],pch=5) | |
265 abline(reslsfit) | |
266 abline(reslsfitSample,lty=2) | |
267 } | |
268 # correction with rescaling of ion global intensity (vref) | |
269 newval = (vref*xb[,3]) / (pente * (xb[,2]) + ordori) | |
270 newval[which((pente * (xb[,2]) + ordori)<min_norm)] <- -1 # to handle cases where 0<denominator<1 or negative | |
271 # handling if any negative values | |
272 if(length(which((newval==Inf)|(newval<0)))!=0){ | |
273 toajust <- which((newval==Inf)|(newval<0)) | |
274 if(valneg=="NA"){ newval[toajust] <- NA | |
275 } else { if(valneg=="0"){ newval[toajust] <- 0 | |
276 } else { | |
277 mindenom <- (pente * (xb[,2]) + ordori) | |
278 mindenom <- min(mindenom[mindenom>=min_norm],na.rm=TRUE) | |
279 newval[toajust] <- vref * (xb[,3][toajust]) / mindenom | |
280 } | |
281 } | |
282 } | |
283 } else {# if ok_norm != 0 , we perform a correction based on batch pool or sample average | |
284 if((length(which(!is.na(xb[indpb,3])))>0)&(length(which(xb[indpb,3]>0))>0)){ | |
285 moypool=mean(xb[indpb,3],na.rm=TRUE) | |
286 newval = (vref*xb[,3])/moypool | |
287 } else { | |
288 moysamp=mean(xb[indsp,3],na.rm=TRUE) | |
289 if((!is.na(moysamp))&(moysamp>0)){ | |
290 cat("Warning: no pool value >0 detected in batch",b,"of ion",labion,": sample mean used as normalisation term.\n") | |
291 newval = (vref*xb[,3])/moysamp | |
292 } else { | |
293 dev.off() | |
294 stop(paste("\n- - - -\nNo pool nor sample value >0 in batch",b,"of ion",labion,"- correction process aborted.\n- - - -\n")) | |
295 } | |
296 } | |
297 } | |
298 newval <- list(norm.ion=newval,norm.ind=ind) | |
299 return(newval) | |
300 } | |
301 | |
302 | |
303 normloess <- function (xb,detail="no",vref=1,b,span=NULL,valneg="none",sm_meta=list(batch="batch", injectionOrder="injectionOrder", sampleType="sampleType", | |
304 sampleTag=list(pool="pool",blank="blank",sample="sample")),min_norm=1){ | |
305 # Correction function applied to 1 ion in 1 batch. | |
306 # Uses a loess regression computed on QC-pools in order to correct samples intensity values. | |
307 # xb: dataframe for 1 ion in columns and samples in rows. | |
308 # detail: level of detail in the outlog file. | |
309 # vref: reference value (average of ion) | |
310 # b: batch subscript | |
311 # span: span value for lo(w)ess regression; NULL for default values | |
312 # valneg: to determine what to do with generated negative and Inf values | |
313 # sm_meta: list of information about sample metadata coding | |
314 # min_norm: minimum value accepted for normalisation term (denominator); should be strictly positive | |
315 indpb = which(xb[[sm_meta$sampleType]] %in% sm_meta$sampleTag$pool) # pools subscripts of current batch | |
316 indsp = which(xb[[sm_meta$sampleType]] %in% sm_meta$sampleTag$sample) # samples of current batch subscripts | |
317 indbt = which(xb[[sm_meta$sampleType]] %in% c(sm_meta$sampleTag$sample,sm_meta$sampleTag$pool))# batch subscripts of samples and QCpools | |
318 labion=dimnames(xb)[[2]][3] | |
319 newval=xb[[3]] # initialisation of corrected values = intial values | |
320 ind <- 0 # initialisation of correction indicator | |
321 normTodo=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="loess") | |
322 if (normTodo==0) { | |
323 if(length(span)==0){span1<-1}else{span1<-span} | |
324 resloess=loess(xb[indpb,3]~xb[indpb,2],span=span1,degree=2,family="gaussian",iterations=4,surface="direct") # loess regression with QCpools | |
325 corv=predict(resloess,newdata=xb[,2]) | |
326 if(length(which(corv<min_norm))!=0){ # unconsistant values handling | |
327 toajust <- which(corv<min_norm) | |
328 if(valneg=="NA"){ corv[toajust] <- NA | |
329 } else { if(valneg=="0"){ corv[toajust] <- -1 | |
330 } else { | |
331 mindenom <- min(corv[corv>=min_norm],na.rm=TRUE) | |
332 corv[toajust] <- mindenom | |
333 } | |
334 } | |
335 } | |
336 newvalps=(vref*xb[indbt,3]) / corv[indbt] # to check if correction generates outlier values | |
337 refthresh=max(c(3*(quantile(newvalps,na.rm=TRUE)[4]),1.3*(xb[indbt,3])),na.rm=TRUE) | |
338 if(length(which(newvalps>refthresh))>0){ # if outliers | |
339 # in this case no modification of initial value | |
340 newval <- xb[,3] | |
341 } else { | |
342 newval=(vref*xb[,3]) / corv | |
343 newval[newval<0] <- 0 | |
344 ind <- 1 # confirmation of correction | |
345 } | |
346 if ((detail=="reg")&(ind==1)) { # plot | |
347 liminf=min(xb[,3],na.rm=TRUE);limsup=max(xb[,3],na.rm=TRUE) | |
348 firstinj=min(xb[,2],na.rm=TRUE);lastinj=max(xb[,2],na.rm=TRUE) | |
349 plot(xb[indsp,2],xb[indsp,3],pch=16,main=paste(labion,"batch ",b),ylab="intensity",xlab="injection order",ylim=c(liminf,limsup),xlim=c(firstinj,lastinj)) | |
350 if(nrow(xb)>(length(indpb)+length(indsp))){points(xb[-c(indpb,indsp),2], xb[-c(indpb,indsp),3],pch=18)} | |
351 points(xb[indpb,2], xb[indpb,3],pch=5) | |
352 points(cbind(resloess$x,resloess$fitted)[order(resloess$x),],type="l",col="red") | |
353 } | |
354 } | |
355 if (ind==0) {# if ok_norm != 0 or if correction creates outliers, we perform a correction based on batch pool or sample average | |
356 if((length(which(!is.na(xb[indpb,3])))>0)&(length(which(xb[indpb,3]>0))>0)){ | |
357 moypool=mean(xb[indpb,3],na.rm=TRUE) | |
358 newval = (vref*xb[,3])/moypool | |
359 } else { | |
360 moysamp=mean(xb[indsp,3],na.rm=TRUE) | |
361 if((!is.na(moysamp))&(moysamp>0)){ | |
362 cat("Warning: no pool value >0 detected in batch",b,"of ion",labion,": sample mean used as normalisation term.\n") | |
363 newval = (vref*xb[,3])/moysamp | |
364 } else { | |
365 dev.off() | |
366 stop(paste("\n- - - -\nNo pool nor sample value >0 in batch",b,"of ion",labion,"- correction process aborted.\n- - - -\n")) | |
367 } | |
368 } | |
369 } | |
370 newval <- list(norm.ion=newval,norm.ind=ind) | |
371 return(newval) | |
372 } | |
373 | |
374 | |
375 | |
376 norm_QCpool <- function (x, nbid, outlog, fact, metaion, detail="no", NormMoyPool=FALSE, NormInt=FALSE, method="linear",span="none",valNull="0", | |
377 sm_meta=list(batch="batch", injectionOrder="injectionOrder", sampleType="sampleType", | |
378 sampleTag=list(pool="pool",blank="blank",sample="sample")),min_norm=1) { | |
379 ### Correction applying linear or lo(w)ess correction function on all ions for every batch of a dataframe. | |
380 # x: dataframe with ions in column and samples' metadata | |
381 # nbid: number of sample description columns (id and factors) with at least "batch", "injectionOrder", "sampleType" | |
382 # outlog: name of regression plots and PCA pdf file | |
383 # fact: factor to be used as categorical variable for plots | |
384 # metaion: dataframe of ions' metadata | |
385 # detail: level of detail in the outlog file. detail="no" ACP + boxplot of CV before and after correction. | |
386 # detail="plot" with plot for all batch before and after correction. | |
387 # detail="reg" with added plots with regression lines for all batches. | |
388 # NormMoyPool: not used | |
389 # NormInt: not used | |
390 # method: regression method to be used to correct : "linear" or "lowess" or "loess" | |
391 # span: span value for lo(w)ess regression; "none" for linear or default values | |
392 # valNull: to determine what to do with negatively estimated intensities | |
393 # sm_meta: list of information about sample metadata coding | |
394 # min_norm: minimum value accepted for normalisation term (denominator); should be strictly positive | |
395 indfact=which(dimnames(x)[[2]]==fact) | |
396 indtypsamp=which(dimnames(x)[[2]]==sm_meta$sampleType) | |
397 indbatch=which(dimnames(x)[[2]]==sm_meta$batch) | |
398 indinject=which(dimnames(x)[[2]]==sm_meta$injectionOrder) | |
399 lastIon=dim(x)[2] | |
400 indpool=which(x[[sm_meta$sampleType]] %in% sm_meta$sampleTag$pool)# QCpools subscripts in all batches | |
401 valref=apply(as.matrix(x[indpool,(nbid+1):(lastIon)]),2,mean,na.rm=TRUE) # reference value for each ion used to still have the same rought size of values | |
402 nbi=lastIon-nbid # number of ions | |
403 nbb=length(levels(x[[sm_meta$batch]])) # Number of batch(es) = number of levels of factor "batch" (can be =1) | |
404 Xn=data.frame(x[,c(1:nbid)],matrix(0,nrow=nrow(x),ncol=nbi))# initialisation of the corrected dataframe (=initial dataframe) | |
405 dimnames(Xn)=dimnames(x) | |
406 cv=data.frame(matrix(NA,nrow=nbi,ncol=2))# initialisation of dataframe containing CV before and after correction | |
407 dimnames(cv)[[2]]=c("avant","apres") | |
408 if (detail!="reg" && detail!="plot" && detail!="no") {detail="no"} | |
409 pdf(outlog,width=27,height=20) | |
410 cat(nbi," ions ",nbb," batch(es) \n") | |
411 if (detail=="plot") {if(nbb<6){par(mfrow=c(3,3),ask=F,cex=1.5)}else{par(mfrow=c(4,4),ask=F,cex=1.5)}} | |
412 res.ind <- matrix(NA,ncol=nbb,nrow=nbi,dimnames=list(dimnames(x)[[2]][-c(1:nbid)],paste("norm.b",1:nbb,sep=""))) | |
413 for (p in 1:nbi) {# for each ion | |
414 labion=dimnames(x)[[2]][p+nbid] | |
415 pools1=x[indpool,p+nbid] | |
416 if(length(which(pools1[!(is.na(pools1))]>0))<2){ # if not enough pools >0 -> no normalisation | |
417 war.note <- paste("Warning: less than 2 pools with values >0 in",labion,"-> no normalisation for this ion.") | |
418 cat(war.note,"\n") | |
419 Xn[,p+nbid] <- x[,p+nbid] | |
420 res.ind[p,] <- rep(0,nbb) | |
421 if (detail=="reg" || detail=="plot" ) { | |
422 par(mfrow=c(2,2),ask=F,cex=1.5) | |
423 plot.new() | |
424 legend("center",war.note) | |
425 minval=min(x[p+nbid],na.rm=TRUE);maxval=max(x[p+nbid],na.rm=TRUE) | |
426 plot( x[[sm_meta$injectionOrder]], x[,p+nbid],col=x[[sm_meta$batch]],ylab=labion,ylim=c(minval,maxval), | |
427 main="No correction",xlab="injection order") | |
428 points(x[[sm_meta$injectionOrder]][indpool],x[indpool,p+nbid],col="maroon",pch=16,cex=1) | |
429 } | |
430 } else { | |
431 if (detail == "reg") {if(nbb<6){par(mfrow=c(3,3),ask=F,cex=1.5)}else{par(mfrow=c(4,4),ask=F,cex=1.5)}} | |
432 if (detail == "plot") {par(mfrow=c(2,2),ask=F,cex=1.5)} | |
433 cv[p,1]=sd(pools1,na.rm=TRUE)/mean(pools1,na.rm=TRUE)# CV before correction | |
434 for (b in 1:nbb) {# for every batch | |
435 indbt = which(x[[sm_meta$batch]]==(levels(x[[sm_meta$batch]])[b])) # subscripts of all samples | |
436 sub=data.frame(x[(x[[sm_meta$batch]]==levels(x[[sm_meta$batch]])[b]),c(indtypsamp,indinject,p+nbid)]) | |
437 if (method=="linear") { res.norm = normlinear(sub,detail,valref[p],b,valNull,sm_meta,min_norm) | |
438 } else { if (method=="loess"){ res.norm <- normloess(sub,detail,valref[p],b,span,valNull,sm_meta,min_norm) | |
439 } else { if (method=="lowess"){ res.norm <- normlowess(sub,detail,valref[p],b,span,valNull,sm_meta,min_norm) | |
440 } else {stop("\n--\nNo valid 'method' argument supplied.\nMust be 'linear','loess' or 'lowess'.\n--\n")} | |
441 }} | |
442 Xn[indbt,p+nbid] = res.norm[[1]] | |
443 res.ind[p,b] <- res.norm[[2]] | |
444 } | |
445 # Post correction CV calculation | |
446 pools2=Xn[indpool,p+nbid] | |
447 cv[p,2]=sd(pools2,na.rm=TRUE)/mean(pools2,na.rm=TRUE) | |
448 if (detail=="reg" || detail=="plot" ) { | |
449 # plot before and after correction | |
450 minval=min(cbind(x[p+nbid],Xn[p+nbid]),na.rm=TRUE);maxval=max(cbind(x[p+nbid],Xn[p+nbid]),na.rm=TRUE) | |
451 plot( x[[sm_meta$injectionOrder]], x[,p+nbid],col=x[[sm_meta$batch]],ylab=labion,ylim=c(minval,maxval), | |
452 main=paste0("before correction (CV for pools = ",round(cv[p,1],2),")"),xlab="injection order") | |
453 points(x[[sm_meta$injectionOrder]][indpool],x[indpool,p+nbid],col="maroon",pch=16,cex=1) | |
454 plot(Xn[[sm_meta$injectionOrder]],Xn[,p+nbid],col=x[[sm_meta$batch]],ylab="",ylim=c(minval,maxval), | |
455 main=paste0("after correction (CV for pools = ",round(cv[p,2],2),")"),xlab="injection order") | |
456 points(Xn[[sm_meta$injectionOrder]][indpool],Xn[indpool,p+nbid],col="maroon",pch=16,cex=1) | |
457 suppressWarnings(plot.design( x[c(indtypsamp,indbatch,indfact,p+nbid)],main="factors effect before correction")) | |
458 suppressWarnings(plot.design(Xn[c(indtypsamp,indbatch,indfact,p+nbid)],main="factors effect after correction")) | |
459 } | |
460 } | |
461 } | |
462 | |
463 if (detail=="reg" || detail=="plot" || detail=="no") { | |
464 if (nbi > 3) { | |
465 # Sum of ions before/after plot | |
466 par(mfrow=c(1,2),ask=F,cex=1.2) | |
467 xsum <- rowSums(x[,(nbid+1):lastIon],na.rm=TRUE) | |
468 Xnsum <- rowSums(Xn[,(nbid+1):lastIon],na.rm=TRUE) | |
469 plot(x[[sm_meta$injectionOrder]],xsum,col=x[[sm_meta$batch]],ylab="sum of variables' intensities",xlab="injection order", | |
470 ylim=c(min(c(xsum,Xnsum),na.rm=TRUE),max(c(xsum,Xnsum),na.rm=TRUE)),main="Sum of intensities\nBefore correction") | |
471 points(x[[sm_meta$injectionOrder]][indpool],xsum[indpool],col="maroon",pch=16,cex=1.2) | |
472 plot(x[[sm_meta$injectionOrder]],Xnsum,col=x[[sm_meta$batch]],ylab="sum of variables' intensities",xlab="injection order", | |
473 ylim=c(min(c(xsum,Xnsum),na.rm=TRUE),max(c(xsum,Xnsum),na.rm=TRUE)),main="Sum of intensities\nAfter correction") | |
474 points(x[[sm_meta$injectionOrder]][indpool],Xnsum[indpool],col="maroon",pch=16,cex=1.2) | |
475 # PCA Plot before/after, normed only and ions plot | |
476 par(mfrow=c(3,4),ask=F,cex=1.2) | |
477 acplight(x[,c(indtypsamp,indbatch,indtypsamp,indfact,(nbid+1):lastIon)],"uv",TRUE) | |
478 norm.ion <- which(colnames(Xn)%in%(rownames(res.ind)[which(rowSums(res.ind)>=1)])) | |
479 acplight(Xn[,c(indtypsamp,indbatch,indtypsamp,indfact,(nbid+1):lastIon)],"uv",TRUE,norm.ion) | |
480 if(length(norm.ion)>0){acplight(Xn[,c(indtypsamp,indbatch,indtypsamp,indfact,norm.ion)],"uv",TRUE)} | |
481 # Before/after boxplot | |
482 par(mfrow=c(1,2),ask=F,cex=1.2) | |
483 cvplot=cv[!is.na(cv[[1]])&!is.na(cv[[2]]),] | |
484 if(nrow(cvplot)>0){ | |
485 boxplot(cvplot[[1]],ylim=c(min(cvplot),max(cvplot)),main="CV of pools before correction") | |
486 boxplot(cvplot[[2]],ylim=c(min(cvplot),max(cvplot)),main="CV of pools after correction") | |
487 } | |
488 dev.off() | |
489 } | |
490 } | |
491 if (nbi<=3) {dev.off()} | |
492 # transposed matrix is return (format of the initial matrix with ions in rows) | |
493 Xr=Xn[,-c(1:nbid)]; dimnames(Xr)[[1]]=Xn[[1]] | |
494 Xr=t(Xr) ; Xr <- data.frame(ions=rownames(Xr),Xr) | |
495 | |
496 res.norm[[1]] <- Xr ; res.norm[[2]] <- data.frame(metaion,res.ind) ; res.norm[[3]] <- x[,c(1:nbid)] | |
497 names(res.norm) <- c("dataMatrix","variableMetadata","sampleMetadata") | |
498 return(res.norm) | |
499 } | |
500 | |
501 | |
502 | |
503 | |
504 | |
505 acplight <- function(ids, scaling="uv", indiv=FALSE,indcol=NULL) { | |
506 suppressPackageStartupMessages(library(ade4)) | |
507 suppressPackageStartupMessages(library(pcaMethods)) | |
508 # Make a PCA and plot scores and loadings. | |
509 # First column must contain samples' identifiers. | |
510 # Columns 2 to 4 contain factors to colour the plots. | |
511 for (i in 1:3) { | |
512 idss <- data.frame(ids) | |
513 idss[,i+1] <- as.character(idss[,i+1]) | |
514 idss[which(is.na(idss[,i+1])),i+1] <- "no_modality" | |
515 idss[which(idss[,i+1]=="NA"),i+1] <- "no_modality" | |
516 idss[which(idss[,i+1]==""),i+1] <- "no_modality" | |
517 classe=as.factor(idss[[i+1]]) | |
518 idsample=as.character(idss[[1]]) | |
519 colour=1:length(levels(classe)) | |
520 ions=as.matrix(idss[,5:dim(idss)[2]]) | |
521 # Removing ions containing NA (not compatible with standard PCA) | |
522 ions=t(na.omit(t(ions))) | |
523 if(i==1){if(ncol(ions)!=(ncol(idss)-4)){cat("Note:",(ncol(idss)-4)-ncol(ions),"ions were ignored for PCA display due to NA in intensities.\n")}} | |
524 # Scaling choice: "uv","none","pareto" | |
525 object=suppressWarnings(prep(ions, scale=scaling, center=TRUE)) | |
526 if(i==1){if(length(which(apply(ions,2,var)==0))>0){cat("Warning: there are",length(which(apply(ions,2,var)==0)),"constant ions.\n")}} | |
527 # ALGO: nipals,svdImpute, Bayesian, svd, probalistic=F | |
528 result <- pca(object, center=F, method="svd", nPcs=2) | |
529 # ADE4 : to plot samples' ellipsoid for each class | |
530 s.class(result@scores, classe, cpoint = 1,xax=1,yax=2,col=colour,sub=sprintf("Scores - PCs %sx%s",1,2), possub="bottomright") | |
531 #s.label(result@loadings,label = ions, cpoint = 0, clabel=0.4, xax=1,yax=2,sub="Loadings",possub="bottomright") | |
532 if(i==1){resulti <- result} | |
533 } | |
534 if(indiv) { | |
535 colour <- rep("darkblue",length(resulti@loadings)) ; if(!is.null(indcol)) {colour[-c(indcol)] <- "red"} | |
536 plot(resulti@loadings,col=colour,main="Loadings",xaxt="n",yaxt="n",pch=20, | |
537 xlab=bquote(PC1-R^2==.(resulti@R2[1])),ylab=bquote(PC2 - R^2 == .(resulti@R2[2]))) | |
538 abline(h=0,v=0)} | |
539 } | |
540 | |
541 |