Mercurial > repos > lecorguille > xcms_xcmsset
comparison lib.r @ 11:91311aa08cdc draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 08e7f269a5c59687a7768be8db5fcb4e4d736093
author | lecorguille |
---|---|
date | Mon, 30 Jan 2017 08:52:59 -0500 |
parents | 0888f7ef739a |
children | 15646e937936 |
comparison
equal
deleted
inserted
replaced
10:69eb0fc05837 | 11:91311aa08cdc |
---|---|
1 # lib.r version="2.0.1" | |
2 #Authors ABiMS TEAM | 1 #Authors ABiMS TEAM |
3 #Lib.r for Galaxy Workflow4Metabo | 2 #Lib.r for Galaxy Workflow4Metabolomics xcms tools |
3 # | |
4 #version 2.4: lecorguille | |
5 # add getPeaklistW4M | |
6 #version 2.3: yguitton | |
7 # correction for empty PDF when only 1 class | |
4 #version 2.2 | 8 #version 2.2 |
5 #Based on lib.r 2.1 | 9 # correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet |
6 #Modifications made by Guitton Yann | 10 # Note if scanrange is used a warning is prompted in R console but do not stop PDF generation |
7 #correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet | 11 #version 2.1: yguitton |
8 #Note if scanrange is used a warning is prompted in R console but do not stop PDF generation | 12 # Modifications made by Guitton Yann |
9 | 13 |
10 | 14 |
11 | 15 #@author G. Le Corguille |
16 #This function convert if it is required the Retention Time in minutes | |
17 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { | |
18 if (convertRTMinute){ | |
19 #converting the retention times (seconds) into minutes | |
20 print("converting the retention times into minutes in the variableMetadata") | |
21 variableMetadata[,"rt"]=variableMetadata[,"rt"]/60 | |
22 variableMetadata[,"rtmin"]=variableMetadata[,"rtmin"]/60 | |
23 variableMetadata[,"rtmax"]=variableMetadata[,"rtmax"]/60 | |
24 } | |
25 return (variableMetadata) | |
26 } | |
27 | |
28 #@author G. Le Corguille | |
29 #This function format ions identifiers | |
30 formatIonIdentifiers <- function(dataData, numDigitsRT=0, numDigitsMZ=0) { | |
31 return(make.unique(paste0("M",round(dataData[,"mz"],numDigitsMZ),"T",round(dataData[,"rt"],numDigitsRT)))) | |
32 } | |
33 | |
34 #@author G. Le Corguille | |
35 # value: intensity values to be used into, maxo or intb | |
36 getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) { | |
37 groups <- xset@groups | |
38 values <- groupval(xset, "medret", value=intval) | |
39 | |
40 # renamming of the column rtmed to rt to fit with camera peaklist function output | |
41 colnames(groups)[colnames(groups)=="rtmed"] <- "rt" | |
42 colnames(groups)[colnames(groups)=="mzmed"] <- "mz" | |
43 | |
44 ids <- formatIonIdentifiers(groups, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) | |
45 groups = RTSecondToMinute(groups, convertRTMinute) | |
46 | |
47 rownames(groups) = ids | |
48 rownames(values) = ids | |
49 | |
50 #@TODO: add "name" as the first column name | |
51 #colnames(groups)[1] = "name" | |
52 #colnames(values)[1] = "name" | |
53 | |
54 write.table(groups, file=variableMetadataOutput,sep="\t",quote=F,row.names = T,col.names = NA) | |
55 write.table(values, file=dataMatrixOutput,sep="\t",quote=F,row.names = T,col.names = NA) | |
56 } | |
12 | 57 |
13 #@author Y. Guitton | 58 #@author Y. Guitton |
14 getBPC <- function(file,rtcor=NULL, ...) { | 59 getBPC <- function(file,rtcor=NULL, ...) { |
15 object <- xcmsRaw(file) | 60 object <- xcmsRaw(file) |
16 sel <- profRange(object, ...) | 61 sel <- profRange(object, ...) |
42 | 87 |
43 | 88 |
44 for (j in 1:N) { | 89 for (j in 1:N) { |
45 | 90 |
46 TIC[[j]] <- getBPC(files[j]) | 91 TIC[[j]] <- getBPC(files[j]) |
47 #good for raw | 92 #good for raw |
48 # seems strange for corrected | 93 # seems strange for corrected |
49 #errors if scanrange used in xcmsSetgeneration | 94 #errors if scanrange used in xcmsSetgeneration |
50 if (!is.null(xcmsSet) && rt == "corrected") | 95 if (!is.null(xcmsSet) && rt == "corrected") |
51 rtcor <- xcmsSet@rt$corrected[[j]] else | 96 rtcor <- xcmsSet@rt$corrected[[j]] else |
52 rtcor <- NULL | 97 rtcor <- NULL |
53 | 98 |
54 TIC[[j]] <- getBPC(files[j],rtcor=rtcor) | 99 TIC[[j]] <- getBPC(files[j],rtcor=rtcor) |
55 # TIC[[j]][,1]<-rtcor | 100 # TIC[[j]][,1]<-rtcor |
56 } | 101 } |
57 | 102 |
58 | 103 |
66 ylim = range(sapply(TIC, function(x) range(x[,2]))) | 111 ylim = range(sapply(TIC, function(x) range(x[,2]))) |
67 ylim = c(-ylim[2], ylim[2]) | 112 ylim = c(-ylim[2], ylim[2]) |
68 | 113 |
69 | 114 |
70 ##plot start | 115 ##plot start |
71 | 116 |
72 if (length(class)>2){ | 117 if (length(class)>2){ |
73 for (k in 1:(length(class)-1)){ | 118 for (k in 1:(length(class)-1)){ |
74 for (l in (k+1):length(class)){ | 119 for (l in (k+1):length(class)){ |
75 #print(paste(class[k],"vs",class[l],sep=" ")) | 120 #print(paste(class[k],"vs",class[l],sep=" ")) |
76 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | 121 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") |
77 colvect<-NULL | 122 colvect<-NULL |
78 for (j in 1:length(classnames[[k]])) { | 123 for (j in 1:length(classnames[[k]])) { |
79 tic <- TIC[[classnames[[k]][j]]] | 124 tic <- TIC[[classnames[[k]][j]]] |
80 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | 125 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") |
113 } | 158 } |
114 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | 159 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) |
115 | 160 |
116 }#end length ==2 | 161 }#end length ==2 |
117 | 162 |
163 #case where only one class | |
164 if (length(class)==1){ | |
165 k=1 | |
166 ylim = range(sapply(TIC, function(x) range(x[,2]))) | |
167 colvect<-NULL | |
168 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | |
169 | |
170 for (j in 1:length(classnames[[k]])) { | |
171 tic <- TIC[[classnames[[k]][j]]] | |
172 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
173 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
174 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
175 } | |
176 | |
177 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) | |
178 | |
179 }#end length ==1 | |
180 | |
118 dev.off() #pdf(pdfname,w=16,h=10) | 181 dev.off() #pdf(pdfname,w=16,h=10) |
119 | 182 |
120 invisible(TIC) | 183 invisible(TIC) |
121 } | 184 } |
122 | 185 |
151 | 214 |
152 classnames<-vector("list",length(class)) | 215 classnames<-vector("list",length(class)) |
153 for (i in 1:length(class)){ | 216 for (i in 1:length(class)){ |
154 classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) | 217 classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) |
155 } | 218 } |
156 | 219 |
157 N <- length(files) | 220 N <- length(files) |
158 TIC <- vector("list",N) | 221 TIC <- vector("list",N) |
159 | 222 |
160 for (i in 1:N) { | 223 for (i in 1:N) { |
161 if (!is.null(xcmsSet) && rt == "corrected") | 224 if (!is.null(xcmsSet) && rt == "corrected") |
176 | 239 |
177 ##plot start | 240 ##plot start |
178 if (length(class)>2){ | 241 if (length(class)>2){ |
179 for (k in 1:(length(class)-1)){ | 242 for (k in 1:(length(class)-1)){ |
180 for (l in (k+1):length(class)){ | 243 for (l in (k+1):length(class)){ |
181 #print(paste(class[k],"vs",class[l],sep=" ")) | 244 #print(paste(class[k],"vs",class[l],sep=" ")) |
182 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | 245 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") |
183 colvect<-NULL | 246 colvect<-NULL |
184 for (j in 1:length(classnames[[k]])) { | 247 for (j in 1:length(classnames[[k]])) { |
185 | 248 |
186 tic <- TIC[[classnames[[k]][j]]] | 249 tic <- TIC[[classnames[[k]][j]]] |
217 colvect<-append(colvect,cols[classnames[[l]][j]]) | 280 colvect<-append(colvect,cols[classnames[[l]][j]]) |
218 } | 281 } |
219 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | 282 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) |
220 | 283 |
221 }#end length ==2 | 284 }#end length ==2 |
285 | |
286 #case where only one class | |
287 if (length(class)==1){ | |
288 k=1 | |
289 ylim = range(sapply(TIC, function(x) range(x[,2]))) | |
290 | |
291 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | |
292 colvect<-NULL | |
293 for (j in 1:length(classnames[[k]])) { | |
294 tic <- TIC[[classnames[[k]][j]]] | |
295 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
296 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
297 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
298 } | |
299 | |
300 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) | |
301 | |
302 }#end length ==1 | |
303 | |
222 dev.off() #pdf(pdfname,w=16,h=10) | 304 dev.off() #pdf(pdfname,w=16,h=10) |
223 | 305 |
224 invisible(TIC) | 306 invisible(TIC) |
225 } | 307 } |
226 | 308 |
235 | 317 |
236 #Create the sampleMetada dataframe | 318 #Create the sampleMetada dataframe |
237 sampleMetadata=xset@phenoData | 319 sampleMetadata=xset@phenoData |
238 sampleNamesOrigin=rownames(sampleMetadata) | 320 sampleNamesOrigin=rownames(sampleMetadata) |
239 sampleNamesMakeNames=make.names(sampleNamesOrigin) | 321 sampleNamesMakeNames=make.names(sampleNamesOrigin) |
240 | 322 |
241 if (any(duplicated(sampleNamesMakeNames))) { | 323 if (any(duplicated(sampleNamesMakeNames))) { |
242 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) | 324 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) |
243 for (sampleName in sampleNamesOrigin) { | 325 for (sampleName in sampleNamesOrigin) { |
244 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) | 326 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) |
245 } | 327 } |
283 library(tools) | 365 library(tools) |
284 samplename=file_path_sans_ext(filename) | 366 samplename=file_path_sans_ext(filename) |
285 | 367 |
286 #Set the polarity attribute | 368 #Set the polarity attribute |
287 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity | 369 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity |
288 | 370 |
289 #Delete xcmsRaw object because it creates a bug for the fillpeaks step | 371 #Delete xcmsRaw object because it creates a bug for the fillpeaks step |
290 rm(xcmsRaw) | 372 rm(xcmsRaw) |
291 } | 373 } |
292 | 374 |
293 } | 375 } |
319 # WHAT IS ON THE FILESYSTEM | 401 # WHAT IS ON THE FILESYSTEM |
320 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) | 402 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) |
321 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] | 403 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] |
322 | 404 |
323 # COMPARISON | 405 # COMPARISON |
324 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { | 406 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { |
325 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) | 407 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) |
326 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) | 408 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) |
327 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | 409 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") |
328 | 410 |
329 } | 411 } |
345 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) | 427 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) |
346 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) | 428 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) |
347 write(capture, stderr()) | 429 write(capture, stderr()) |
348 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") | 430 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") |
349 } | 431 } |
350 | 432 |
351 } | 433 } |
352 | 434 |
353 | 435 |
354 ## | 436 ## |
355 ## This function check if XML contain special characters | 437 ## This function check if XML contain special characters |
357 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 439 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
358 deleteXmlBadCharacters<- function (directory) { | 440 deleteXmlBadCharacters<- function (directory) { |
359 cat("Checking Non ASCII characters in the XML...\n") | 441 cat("Checking Non ASCII characters in the XML...\n") |
360 | 442 |
361 processed=F | 443 processed=F |
362 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) | 444 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) |
363 for (i in l){ | 445 for (i in l){ |
364 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") | 446 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") |
365 capture=suppressWarnings(system(cmd,intern=TRUE)) | 447 capture=suppressWarnings(system(cmd,intern=TRUE)) |
366 if (length(capture)>0){ | 448 if (length(capture)>0){ |
367 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) | 449 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) |
368 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) | 450 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) |
369 c=system(cmd,intern=TRUE) | 451 c=system(cmd,intern=TRUE) |
370 capture="" | 452 capture="" |
371 processed=T | 453 processed=T |
372 } | 454 } |
373 } | 455 } |
374 if (processed) cat("\n\n") | 456 if (processed) cat("\n\n") |
375 return(processed) | 457 return(processed) |
376 } | 458 } |
377 | 459 |
378 | 460 |
379 ## | 461 ## |
380 ## This function will compute MD5 checksum to check the data integrity | 462 ## This function will compute MD5 checksum to check the data integrity |
381 ## | 463 ## |
382 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 464 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
383 getMd5sum <- function (directory) { | 465 getMd5sum <- function (directory) { |
384 cat("Compute md5 checksum...\n") | 466 cat("Compute md5 checksum...\n") |
395 | 477 |
396 #cat("\n\n") | 478 #cat("\n\n") |
397 | 479 |
398 return(as.matrix(md5sum(files))) | 480 return(as.matrix(md5sum(files))) |
399 } | 481 } |
400 |