Mercurial > repos > lecorguille > xcms_merge
comparison lib.r @ 13:39797c768bba draft
"planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f1caf2a3bf23cf319a75dd12c86402555dd02617"
author | workflow4metabolomics |
---|---|
date | Wed, 12 Feb 2020 08:29:39 -0500 |
parents | a301f001835c |
children | 5bd125a3f3b0 |
comparison
equal
deleted
inserted
replaced
12:a301f001835c | 13:39797c768bba |
---|---|
40 mergeXData <- function(args) { | 40 mergeXData <- function(args) { |
41 chromTIC <- NULL | 41 chromTIC <- NULL |
42 chromBPI <- NULL | 42 chromBPI <- NULL |
43 chromTIC_adjusted <- NULL | 43 chromTIC_adjusted <- NULL |
44 chromBPI_adjusted <- NULL | 44 chromBPI_adjusted <- NULL |
45 md5sumList <- NULL | |
45 for(image in args$images) { | 46 for(image in args$images) { |
46 | 47 |
47 load(image) | 48 load(image) |
48 # Handle infiles | 49 # Handle infiles |
49 if (!exists("singlefile")) singlefile <- NULL | 50 if (!exists("singlefile")) singlefile <- NULL |
50 if (!exists("zipfile")) zipfile <- NULL | 51 if (!exists("zipfile")) zipfile <- NULL |
51 rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args) | 52 rawFilePath <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile, args) |
52 zipfile <- rawFilePath$zipfile | 53 zipfile <- rawFilePath$zipfile |
53 singlefile <- rawFilePath$singlefile | 54 singlefile <- rawFilePath$singlefile |
54 retrieveRawfileInTheWorkingDirectory(singlefile, zipfile) | |
55 | 55 |
56 if (exists("raw_data")) xdata <- raw_data | 56 if (exists("raw_data")) xdata <- raw_data |
57 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") | 57 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") |
58 | 58 |
59 cat(sampleNamesList$sampleNamesOrigin,"\n") | 59 cat(sampleNamesList$sampleNamesOrigin,"\n") |
147 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit=4) { | 147 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit=4) { |
148 pdf(file="plotChromPeakDensity.pdf", width=16, height=12) | 148 pdf(file="plotChromPeakDensity.pdf", width=16, height=12) |
149 | 149 |
150 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) | 150 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) |
151 | 151 |
152 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | 152 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") |
153 names(group_colors) <- unique(xdata$sample_group) | 153 names(group_colors) <- unique(xdata$sample_group) |
154 | 154 |
155 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) | 155 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) |
156 for (i in 1:nrow(featureDefinitions(xdata))) { | 156 for (i in 1:nrow(featureDefinitions(xdata))) { |
157 mzmin = featureDefinitions(xdata)[i,]$mzmin | 157 mzmin = featureDefinitions(xdata)[i,]$mzmin |
168 getPlotAdjustedRtime <- function(xdata) { | 168 getPlotAdjustedRtime <- function(xdata) { |
169 | 169 |
170 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) | 170 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) |
171 | 171 |
172 # Color by group | 172 # Color by group |
173 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | 173 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") |
174 if (length(group_colors) > 1) { | 174 if (length(group_colors) > 1) { |
175 names(group_colors) <- unique(xdata$sample_group) | 175 names(group_colors) <- unique(xdata$sample_group) |
176 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) | 176 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) |
177 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | 177 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) |
178 } | 178 } |
237 main <- paste(type,":",adjusted,"data") | 237 main <- paste(type,":",adjusted,"data") |
238 | 238 |
239 pdf(pdfname, width=16, height=10) | 239 pdf(pdfname, width=16, height=10) |
240 | 240 |
241 # Color by group | 241 # Color by group |
242 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | 242 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") |
243 if (length(group_colors) > 1) { | 243 if (length(group_colors) > 1) { |
244 names(group_colors) <- unique(xdata$sample_group) | 244 names(group_colors) <- unique(xdata$sample_group) |
245 plot(chrom, col = group_colors[chrom$sample_group], main=main) | 245 plot(chrom, col = group_colors[as.factor(chrom$sample_group)], main=main, peakType = "none") |
246 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | 246 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) |
247 } | 247 } |
248 | 248 |
249 # Color by sample | 249 # Color by sample |
250 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main) | 250 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main, peakType = "none") |
251 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) | 251 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) |
252 | 252 |
253 dev.off() | 253 dev.off() |
254 } | 254 } |
255 | 255 |
315 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames)) | 315 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames)) |
316 | 316 |
317 } | 317 } |
318 | 318 |
319 | 319 |
320 # This function check if xcms will found all the files | |
321 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | |
322 checkFilesCompatibilityWithXcms <- function(directory) { | |
323 cat("Checking files filenames compatibilities with xmcs...\n") | |
324 # WHAT XCMS WILL FIND | |
325 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
326 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | |
327 info <- file.info(directory) | |
328 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) | |
329 files <- c(directory[!info$isdir], listed) | |
330 files_abs <- file.path(getwd(), files) | |
331 exists <- file.exists(files_abs) | |
332 files[exists] <- files_abs[exists] | |
333 files[exists] <- sub("//","/",files[exists]) | |
334 | |
335 # WHAT IS ON THE FILESYSTEM | |
336 filesystem_filepaths <- system(paste0("find \"",getwd(),"/",directory,"\" -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\""), intern=T) | |
337 filesystem_filepaths <- filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] | |
338 | |
339 # COMPARISON | |
340 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { | |
341 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) | |
342 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) | |
343 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | |
344 } | |
345 } | |
346 | |
347 | |
348 #This function list the compatible files within the directory as xcms did | |
349 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | |
350 getMSFiles <- function (directory) { | |
351 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
352 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | |
353 info <- file.info(directory) | |
354 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) | |
355 files <- c(directory[!info$isdir], listed) | |
356 exists <- file.exists(files) | |
357 files <- files[exists] | |
358 return(files) | |
359 } | |
360 | |
361 # This function check if XML contains special caracters. It also checks integrity and completness. | |
362 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | |
363 checkXmlStructure <- function (directory) { | |
364 cat("Checking XML structure...\n") | |
365 | |
366 cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") | |
367 capture <- system(cmd, intern=TRUE) | |
368 | |
369 if (length(capture)>0){ | |
370 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) | |
371 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) | |
372 write(capture, stderr()) | |
373 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") | |
374 } | |
375 | |
376 } | |
377 | |
378 | |
379 # This function check if XML contain special characters | |
380 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | |
381 deleteXmlBadCharacters<- function (directory) { | |
382 cat("Checking Non ASCII characters in the XML...\n") | |
383 | |
384 processed <- F | |
385 l <- system( paste0("find '",directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE) | |
386 for (i in l){ | |
387 cmd <- paste("LC_ALL=C grep '[^ -~]' \"", i, "\"", sep="") | |
388 capture <- suppressWarnings(system(cmd, intern=TRUE)) | |
389 if (length(capture)>0){ | |
390 cmd <- paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) | |
391 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) | |
392 c <- system(cmd, intern=TRUE) | |
393 capture <- "" | |
394 processed <- T | |
395 } | |
396 } | |
397 if (processed) cat("\n\n") | |
398 return(processed) | |
399 } | |
400 | |
401 | |
402 # This function will compute MD5 checksum to check the data integrity | 320 # This function will compute MD5 checksum to check the data integrity |
403 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 321 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
404 getMd5sum <- function (directory) { | 322 getMd5sum <- function (files) { |
405 cat("Compute md5 checksum...\n") | 323 cat("Compute md5 checksum...\n") |
406 # WHAT XCMS WILL FIND | |
407 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
408 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | |
409 info <- file.info(directory) | |
410 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) | |
411 files <- c(directory[!info$isdir], listed) | |
412 exists <- file.exists(files) | |
413 files <- files[exists] | |
414 | |
415 library(tools) | 324 library(tools) |
416 | |
417 #cat("\n\n") | |
418 | |
419 return(as.matrix(md5sum(files))) | 325 return(as.matrix(md5sum(files))) |
420 } | |
421 | |
422 | |
423 # This function get the raw file path from the arguments | |
424 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
425 getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix="") { | |
426 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") | |
427 | |
428 if (!is.null(args[[paste0("zipfile",prefix)]])) zipfile <- args[[paste0("zipfile",prefix)]] | |
429 | |
430 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) { | |
431 singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath",prefix)]] | |
432 singlefile_sampleNames <- args[[paste0("singlefile_sampleName",prefix)]] | |
433 } | |
434 if (exists("singlefile_galaxyPaths")){ | |
435 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|")) | |
436 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|")) | |
437 | |
438 singlefile <- NULL | |
439 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { | |
440 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] | |
441 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] | |
442 # In case, an url is used to import data within Galaxy | |
443 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1) | |
444 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath | |
445 } | |
446 } | |
447 return(list(zipfile=zipfile, singlefile=singlefile)) | |
448 } | 326 } |
449 | 327 |
450 # This function retrieve the raw file in the working directory | 328 # This function retrieve the raw file in the working directory |
451 # - if zipfile: unzip the file with its directory tree | 329 # - if zipfile: unzip the file with its directory tree |
452 # - if singlefiles: set symlink with the good filename | 330 # - if singlefiles: set symlink with the good filename |
453 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 331 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
454 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { | 332 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile, args, prefix="") { |
333 | |
334 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") | |
335 | |
336 # single - if the file are passed in the command arguments -> refresh singlefile | |
337 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) { | |
338 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath",prefix)]],"\\|")) | |
339 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName",prefix)]],"\\|")) | |
340 | |
341 singlefile <- NULL | |
342 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { | |
343 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] | |
344 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] | |
345 # In case, an url is used to import data within Galaxy | |
346 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1) | |
347 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath | |
348 } | |
349 } | |
350 # zipfile - if the file are passed in the command arguments -> refresh zipfile | |
351 if (!is.null(args[[paste0("zipfile",prefix)]])) | |
352 zipfile <- args[[paste0("zipfile",prefix)]] | |
353 | |
354 # single | |
455 if(!is.null(singlefile) && (length("singlefile")>0)) { | 355 if(!is.null(singlefile) && (length("singlefile")>0)) { |
356 files <- vector() | |
456 for (singlefile_sampleName in names(singlefile)) { | 357 for (singlefile_sampleName in names(singlefile)) { |
457 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] | 358 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] |
458 if(!file.exists(singlefile_galaxyPath)){ | 359 if(!file.exists(singlefile_galaxyPath)){ |
459 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") | 360 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") |
460 print(error_message); stop(error_message) | 361 print(error_message); stop(error_message) |
461 } | 362 } |
462 | 363 |
463 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) | 364 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) |
464 file.copy(singlefile_galaxyPath, singlefile_sampleName) | 365 file.copy(singlefile_galaxyPath, singlefile_sampleName) |
465 | 366 files <- c(files, singlefile_sampleName) |
466 } | 367 } |
467 directory <- "." | 368 } |
468 | 369 # zipfile |
469 } | |
470 if(!is.null(zipfile) && (zipfile != "")) { | 370 if(!is.null(zipfile) && (zipfile != "")) { |
471 if(!file.exists(zipfile)){ | 371 if(!file.exists(zipfile)){ |
472 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") | 372 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") |
473 print(error_message) | 373 print(error_message) |
474 stop(error_message) | 374 stop(error_message) |
475 } | 375 } |
476 | |
477 #list all file in the zip file | |
478 #zip_files <- unzip(zipfile,list=T)[,"Name"] | |
479 | |
480 #unzip | |
481 suppressWarnings(unzip(zipfile, unzip="unzip")) | 376 suppressWarnings(unzip(zipfile, unzip="unzip")) |
482 | 377 |
483 #get the directory name | 378 #get the directory name |
484 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) | 379 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) |
485 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) | 380 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) |
487 directory <- "." | 382 directory <- "." |
488 if (length(directories) == 1) directory <- directories | 383 if (length(directories) == 1) directory <- directories |
489 | 384 |
490 cat("files_root_directory\t",directory,"\n") | 385 cat("files_root_directory\t",directory,"\n") |
491 | 386 |
492 } | 387 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") |
493 return (directory) | 388 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") |
389 info <- file.info(directory) | |
390 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) | |
391 files <- c(directory[!info$isdir], listed) | |
392 exists <- file.exists(files) | |
393 files <- files[exists] | |
394 | |
395 } | |
396 return(list(zipfile=zipfile, singlefile=singlefile, files=files)) | |
397 | |
494 } | 398 } |
495 | 399 |
496 | 400 |
497 # This function retrieve a xset like object | 401 # This function retrieve a xset like object |
498 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 402 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |