comparison lib.r @ 11:91311aa08cdc draft

planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 08e7f269a5c59687a7768be8db5fcb4e4d736093
author lecorguille
date Mon, 30 Jan 2017 08:52:59 -0500
parents 0888f7ef739a
children 15646e937936
comparison
equal deleted inserted replaced
10:69eb0fc05837 11:91311aa08cdc
1 # lib.r version="2.0.1"
2 #Authors ABiMS TEAM 1 #Authors ABiMS TEAM
3 #Lib.r for Galaxy Workflow4Metabo 2 #Lib.r for Galaxy Workflow4Metabolomics xcms tools
3 #
4 #version 2.4: lecorguille
5 # add getPeaklistW4M
6 #version 2.3: yguitton
7 # correction for empty PDF when only 1 class
4 #version 2.2 8 #version 2.2
5 #Based on lib.r 2.1 9 # correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet
6 #Modifications made by Guitton Yann 10 # Note if scanrange is used a warning is prompted in R console but do not stop PDF generation
7 #correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet 11 #version 2.1: yguitton
8 #Note if scanrange is used a warning is prompted in R console but do not stop PDF generation 12 # Modifications made by Guitton Yann
9 13
10 14
11 15 #@author G. Le Corguille
16 #This function convert if it is required the Retention Time in minutes
17 RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
18 if (convertRTMinute){
19 #converting the retention times (seconds) into minutes
20 print("converting the retention times into minutes in the variableMetadata")
21 variableMetadata[,"rt"]=variableMetadata[,"rt"]/60
22 variableMetadata[,"rtmin"]=variableMetadata[,"rtmin"]/60
23 variableMetadata[,"rtmax"]=variableMetadata[,"rtmax"]/60
24 }
25 return (variableMetadata)
26 }
27
28 #@author G. Le Corguille
29 #This function format ions identifiers
30 formatIonIdentifiers <- function(dataData, numDigitsRT=0, numDigitsMZ=0) {
31 return(make.unique(paste0("M",round(dataData[,"mz"],numDigitsMZ),"T",round(dataData[,"rt"],numDigitsRT))))
32 }
33
34 #@author G. Le Corguille
35 # value: intensity values to be used into, maxo or intb
36 getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) {
37 groups <- xset@groups
38 values <- groupval(xset, "medret", value=intval)
39
40 # renamming of the column rtmed to rt to fit with camera peaklist function output
41 colnames(groups)[colnames(groups)=="rtmed"] <- "rt"
42 colnames(groups)[colnames(groups)=="mzmed"] <- "mz"
43
44 ids <- formatIonIdentifiers(groups, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)
45 groups = RTSecondToMinute(groups, convertRTMinute)
46
47 rownames(groups) = ids
48 rownames(values) = ids
49
50 #@TODO: add "name" as the first column name
51 #colnames(groups)[1] = "name"
52 #colnames(values)[1] = "name"
53
54 write.table(groups, file=variableMetadataOutput,sep="\t",quote=F,row.names = T,col.names = NA)
55 write.table(values, file=dataMatrixOutput,sep="\t",quote=F,row.names = T,col.names = NA)
56 }
12 57
13 #@author Y. Guitton 58 #@author Y. Guitton
14 getBPC <- function(file,rtcor=NULL, ...) { 59 getBPC <- function(file,rtcor=NULL, ...) {
15 object <- xcmsRaw(file) 60 object <- xcmsRaw(file)
16 sel <- profRange(object, ...) 61 sel <- profRange(object, ...)
42 87
43 88
44 for (j in 1:N) { 89 for (j in 1:N) {
45 90
46 TIC[[j]] <- getBPC(files[j]) 91 TIC[[j]] <- getBPC(files[j])
47 #good for raw 92 #good for raw
48 # seems strange for corrected 93 # seems strange for corrected
49 #errors if scanrange used in xcmsSetgeneration 94 #errors if scanrange used in xcmsSetgeneration
50 if (!is.null(xcmsSet) && rt == "corrected") 95 if (!is.null(xcmsSet) && rt == "corrected")
51 rtcor <- xcmsSet@rt$corrected[[j]] else 96 rtcor <- xcmsSet@rt$corrected[[j]] else
52 rtcor <- NULL 97 rtcor <- NULL
53 98
54 TIC[[j]] <- getBPC(files[j],rtcor=rtcor) 99 TIC[[j]] <- getBPC(files[j],rtcor=rtcor)
55 # TIC[[j]][,1]<-rtcor 100 # TIC[[j]][,1]<-rtcor
56 } 101 }
57 102
58 103
66 ylim = range(sapply(TIC, function(x) range(x[,2]))) 111 ylim = range(sapply(TIC, function(x) range(x[,2])))
67 ylim = c(-ylim[2], ylim[2]) 112 ylim = c(-ylim[2], ylim[2])
68 113
69 114
70 ##plot start 115 ##plot start
71 116
72 if (length(class)>2){ 117 if (length(class)>2){
73 for (k in 1:(length(class)-1)){ 118 for (k in 1:(length(class)-1)){
74 for (l in (k+1):length(class)){ 119 for (l in (k+1):length(class)){
75 #print(paste(class[k],"vs",class[l],sep=" ")) 120 #print(paste(class[k],"vs",class[l],sep=" "))
76 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") 121 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
77 colvect<-NULL 122 colvect<-NULL
78 for (j in 1:length(classnames[[k]])) { 123 for (j in 1:length(classnames[[k]])) {
79 tic <- TIC[[classnames[[k]][j]]] 124 tic <- TIC[[classnames[[k]][j]]]
80 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") 125 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
113 } 158 }
114 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) 159 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
115 160
116 }#end length ==2 161 }#end length ==2
117 162
163 #case where only one class
164 if (length(class)==1){
165 k=1
166 ylim = range(sapply(TIC, function(x) range(x[,2])))
167 colvect<-NULL
168 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
169
170 for (j in 1:length(classnames[[k]])) {
171 tic <- TIC[[classnames[[k]][j]]]
172 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
173 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
174 colvect<-append(colvect,cols[classnames[[k]][j]])
175 }
176
177 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
178
179 }#end length ==1
180
118 dev.off() #pdf(pdfname,w=16,h=10) 181 dev.off() #pdf(pdfname,w=16,h=10)
119 182
120 invisible(TIC) 183 invisible(TIC)
121 } 184 }
122 185
151 214
152 classnames<-vector("list",length(class)) 215 classnames<-vector("list",length(class))
153 for (i in 1:length(class)){ 216 for (i in 1:length(class)){
154 classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) 217 classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i])
155 } 218 }
156 219
157 N <- length(files) 220 N <- length(files)
158 TIC <- vector("list",N) 221 TIC <- vector("list",N)
159 222
160 for (i in 1:N) { 223 for (i in 1:N) {
161 if (!is.null(xcmsSet) && rt == "corrected") 224 if (!is.null(xcmsSet) && rt == "corrected")
176 239
177 ##plot start 240 ##plot start
178 if (length(class)>2){ 241 if (length(class)>2){
179 for (k in 1:(length(class)-1)){ 242 for (k in 1:(length(class)-1)){
180 for (l in (k+1):length(class)){ 243 for (l in (k+1):length(class)){
181 #print(paste(class[k],"vs",class[l],sep=" ")) 244 #print(paste(class[k],"vs",class[l],sep=" "))
182 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") 245 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
183 colvect<-NULL 246 colvect<-NULL
184 for (j in 1:length(classnames[[k]])) { 247 for (j in 1:length(classnames[[k]])) {
185 248
186 tic <- TIC[[classnames[[k]][j]]] 249 tic <- TIC[[classnames[[k]][j]]]
217 colvect<-append(colvect,cols[classnames[[l]][j]]) 280 colvect<-append(colvect,cols[classnames[[l]][j]])
218 } 281 }
219 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) 282 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
220 283
221 }#end length ==2 284 }#end length ==2
285
286 #case where only one class
287 if (length(class)==1){
288 k=1
289 ylim = range(sapply(TIC, function(x) range(x[,2])))
290
291 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
292 colvect<-NULL
293 for (j in 1:length(classnames[[k]])) {
294 tic <- TIC[[classnames[[k]][j]]]
295 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
296 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
297 colvect<-append(colvect,cols[classnames[[k]][j]])
298 }
299
300 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
301
302 }#end length ==1
303
222 dev.off() #pdf(pdfname,w=16,h=10) 304 dev.off() #pdf(pdfname,w=16,h=10)
223 305
224 invisible(TIC) 306 invisible(TIC)
225 } 307 }
226 308
235 317
236 #Create the sampleMetada dataframe 318 #Create the sampleMetada dataframe
237 sampleMetadata=xset@phenoData 319 sampleMetadata=xset@phenoData
238 sampleNamesOrigin=rownames(sampleMetadata) 320 sampleNamesOrigin=rownames(sampleMetadata)
239 sampleNamesMakeNames=make.names(sampleNamesOrigin) 321 sampleNamesMakeNames=make.names(sampleNamesOrigin)
240 322
241 if (any(duplicated(sampleNamesMakeNames))) { 323 if (any(duplicated(sampleNamesMakeNames))) {
242 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) 324 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())
243 for (sampleName in sampleNamesOrigin) { 325 for (sampleName in sampleNamesOrigin) {
244 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) 326 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr())
245 } 327 }
283 library(tools) 365 library(tools)
284 samplename=file_path_sans_ext(filename) 366 samplename=file_path_sans_ext(filename)
285 367
286 #Set the polarity attribute 368 #Set the polarity attribute
287 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity 369 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity
288 370
289 #Delete xcmsRaw object because it creates a bug for the fillpeaks step 371 #Delete xcmsRaw object because it creates a bug for the fillpeaks step
290 rm(xcmsRaw) 372 rm(xcmsRaw)
291 } 373 }
292 374
293 } 375 }
319 # WHAT IS ON THE FILESYSTEM 401 # WHAT IS ON THE FILESYSTEM
320 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) 402 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T)
321 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] 403 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
322 404
323 # COMPARISON 405 # COMPARISON
324 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { 406 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
325 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) 407 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
326 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) 408 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
327 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") 409 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
328 410
329 } 411 }
345 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) 427 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
346 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) 428 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
347 write(capture, stderr()) 429 write(capture, stderr())
348 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") 430 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files")
349 } 431 }
350 432
351 } 433 }
352 434
353 435
354 ## 436 ##
355 ## This function check if XML contain special characters 437 ## This function check if XML contain special characters
357 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 439 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
358 deleteXmlBadCharacters<- function (directory) { 440 deleteXmlBadCharacters<- function (directory) {
359 cat("Checking Non ASCII characters in the XML...\n") 441 cat("Checking Non ASCII characters in the XML...\n")
360 442
361 processed=F 443 processed=F
362 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) 444 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE)
363 for (i in l){ 445 for (i in l){
364 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") 446 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="")
365 capture=suppressWarnings(system(cmd,intern=TRUE)) 447 capture=suppressWarnings(system(cmd,intern=TRUE))
366 if (length(capture)>0){ 448 if (length(capture)>0){
367 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) 449 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i)
368 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) 450 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") )
369 c=system(cmd,intern=TRUE) 451 c=system(cmd,intern=TRUE)
370 capture="" 452 capture=""
371 processed=T 453 processed=T
372 } 454 }
373 } 455 }
374 if (processed) cat("\n\n") 456 if (processed) cat("\n\n")
375 return(processed) 457 return(processed)
376 } 458 }
377 459
378 460
379 ## 461 ##
380 ## This function will compute MD5 checksum to check the data integrity 462 ## This function will compute MD5 checksum to check the data integrity
381 ## 463 ##
382 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 464 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
383 getMd5sum <- function (directory) { 465 getMd5sum <- function (directory) {
384 cat("Compute md5 checksum...\n") 466 cat("Compute md5 checksum...\n")
395 477
396 #cat("\n\n") 478 #cat("\n\n")
397 479
398 return(as.matrix(md5sum(files))) 480 return(as.matrix(md5sum(files)))
399 } 481 }
400