Mercurial > repos > lecorguille > xcms_fillpeaks
comparison lib.r @ 24:999f9db0ca2c draft
"planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f1caf2a3bf23cf319a75dd12c86402555dd02617"
| author | workflow4metabolomics |
|---|---|
| date | Wed, 12 Feb 2020 08:28:33 -0500 |
| parents | 6824468351ca |
| children | 26d77e9ceb49 |
comparison
equal
deleted
inserted
replaced
| 23:3bc94c3abb28 | 24:999f9db0ca2c |
|---|---|
| 40 mergeXData <- function(args) { | 40 mergeXData <- function(args) { |
| 41 chromTIC <- NULL | 41 chromTIC <- NULL |
| 42 chromBPI <- NULL | 42 chromBPI <- NULL |
| 43 chromTIC_adjusted <- NULL | 43 chromTIC_adjusted <- NULL |
| 44 chromBPI_adjusted <- NULL | 44 chromBPI_adjusted <- NULL |
| 45 md5sumList <- NULL | |
| 45 for(image in args$images) { | 46 for(image in args$images) { |
| 46 | 47 |
| 47 load(image) | 48 load(image) |
| 48 # Handle infiles | 49 # Handle infiles |
| 49 if (!exists("singlefile")) singlefile <- NULL | 50 if (!exists("singlefile")) singlefile <- NULL |
| 50 if (!exists("zipfile")) zipfile <- NULL | 51 if (!exists("zipfile")) zipfile <- NULL |
| 51 rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args) | 52 rawFilePath <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile, args) |
| 52 zipfile <- rawFilePath$zipfile | 53 zipfile <- rawFilePath$zipfile |
| 53 singlefile <- rawFilePath$singlefile | 54 singlefile <- rawFilePath$singlefile |
| 54 retrieveRawfileInTheWorkingDirectory(singlefile, zipfile) | |
| 55 | 55 |
| 56 if (exists("raw_data")) xdata <- raw_data | 56 if (exists("raw_data")) xdata <- raw_data |
| 57 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") | 57 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") |
| 58 | 58 |
| 59 cat(sampleNamesList$sampleNamesOrigin,"\n") | 59 cat(sampleNamesList$sampleNamesOrigin,"\n") |
| 147 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit=4) { | 147 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit=4) { |
| 148 pdf(file="plotChromPeakDensity.pdf", width=16, height=12) | 148 pdf(file="plotChromPeakDensity.pdf", width=16, height=12) |
| 149 | 149 |
| 150 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) | 150 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) |
| 151 | 151 |
| 152 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | 152 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") |
| 153 names(group_colors) <- unique(xdata$sample_group) | 153 names(group_colors) <- unique(xdata$sample_group) |
| 154 | 154 |
| 155 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) | 155 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) |
| 156 for (i in 1:nrow(featureDefinitions(xdata))) { | 156 for (i in 1:nrow(featureDefinitions(xdata))) { |
| 157 mzmin = featureDefinitions(xdata)[i,]$mzmin | 157 mzmin = featureDefinitions(xdata)[i,]$mzmin |
| 168 getPlotAdjustedRtime <- function(xdata) { | 168 getPlotAdjustedRtime <- function(xdata) { |
| 169 | 169 |
| 170 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) | 170 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) |
| 171 | 171 |
| 172 # Color by group | 172 # Color by group |
| 173 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | 173 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") |
| 174 if (length(group_colors) > 1) { | 174 if (length(group_colors) > 1) { |
| 175 names(group_colors) <- unique(xdata$sample_group) | 175 names(group_colors) <- unique(xdata$sample_group) |
| 176 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) | 176 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) |
| 177 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | 177 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) |
| 178 } | 178 } |
| 237 main <- paste(type,":",adjusted,"data") | 237 main <- paste(type,":",adjusted,"data") |
| 238 | 238 |
| 239 pdf(pdfname, width=16, height=10) | 239 pdf(pdfname, width=16, height=10) |
| 240 | 240 |
| 241 # Color by group | 241 # Color by group |
| 242 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | 242 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") |
| 243 if (length(group_colors) > 1) { | 243 if (length(group_colors) > 1) { |
| 244 names(group_colors) <- unique(xdata$sample_group) | 244 names(group_colors) <- unique(xdata$sample_group) |
| 245 plot(chrom, col = group_colors[chrom$sample_group], main=main) | 245 plot(chrom, col = group_colors[as.factor(chrom$sample_group)], main=main, peakType = "none") |
| 246 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | 246 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) |
| 247 } | 247 } |
| 248 | 248 |
| 249 # Color by sample | 249 # Color by sample |
| 250 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main) | 250 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main, peakType = "none") |
| 251 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) | 251 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) |
| 252 | 252 |
| 253 dev.off() | 253 dev.off() |
| 254 } | 254 } |
| 255 | 255 |
| 315 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames)) | 315 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames)) |
| 316 | 316 |
| 317 } | 317 } |
| 318 | 318 |
| 319 | 319 |
| 320 # This function check if xcms will found all the files | |
| 321 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | |
| 322 checkFilesCompatibilityWithXcms <- function(directory) { | |
| 323 cat("Checking files filenames compatibilities with xmcs...\n") | |
| 324 # WHAT XCMS WILL FIND | |
| 325 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
| 326 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | |
| 327 info <- file.info(directory) | |
| 328 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) | |
| 329 files <- c(directory[!info$isdir], listed) | |
| 330 files_abs <- file.path(getwd(), files) | |
| 331 exists <- file.exists(files_abs) | |
| 332 files[exists] <- files_abs[exists] | |
| 333 files[exists] <- sub("//","/",files[exists]) | |
| 334 | |
| 335 # WHAT IS ON THE FILESYSTEM | |
| 336 filesystem_filepaths <- system(paste0("find \"",getwd(),"/",directory,"\" -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\""), intern=T) | |
| 337 filesystem_filepaths <- filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] | |
| 338 | |
| 339 # COMPARISON | |
| 340 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { | |
| 341 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) | |
| 342 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) | |
| 343 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | |
| 344 } | |
| 345 } | |
| 346 | |
| 347 | |
| 348 #This function list the compatible files within the directory as xcms did | |
| 349 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | |
| 350 getMSFiles <- function (directory) { | |
| 351 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
| 352 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | |
| 353 info <- file.info(directory) | |
| 354 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) | |
| 355 files <- c(directory[!info$isdir], listed) | |
| 356 exists <- file.exists(files) | |
| 357 files <- files[exists] | |
| 358 return(files) | |
| 359 } | |
| 360 | |
| 361 # This function check if XML contains special caracters. It also checks integrity and completness. | |
| 362 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | |
| 363 checkXmlStructure <- function (directory) { | |
| 364 cat("Checking XML structure...\n") | |
| 365 | |
| 366 cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") | |
| 367 capture <- system(cmd, intern=TRUE) | |
| 368 | |
| 369 if (length(capture)>0){ | |
| 370 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) | |
| 371 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) | |
| 372 write(capture, stderr()) | |
| 373 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") | |
| 374 } | |
| 375 | |
| 376 } | |
| 377 | |
| 378 | |
| 379 # This function check if XML contain special characters | |
| 380 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | |
| 381 deleteXmlBadCharacters<- function (directory) { | |
| 382 cat("Checking Non ASCII characters in the XML...\n") | |
| 383 | |
| 384 processed <- F | |
| 385 l <- system( paste0("find '",directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE) | |
| 386 for (i in l){ | |
| 387 cmd <- paste("LC_ALL=C grep '[^ -~]' \"", i, "\"", sep="") | |
| 388 capture <- suppressWarnings(system(cmd, intern=TRUE)) | |
| 389 if (length(capture)>0){ | |
| 390 cmd <- paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) | |
| 391 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) | |
| 392 c <- system(cmd, intern=TRUE) | |
| 393 capture <- "" | |
| 394 processed <- T | |
| 395 } | |
| 396 } | |
| 397 if (processed) cat("\n\n") | |
| 398 return(processed) | |
| 399 } | |
| 400 | |
| 401 | |
| 402 # This function will compute MD5 checksum to check the data integrity | 320 # This function will compute MD5 checksum to check the data integrity |
| 403 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 321 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
| 404 getMd5sum <- function (directory) { | 322 getMd5sum <- function (files) { |
| 405 cat("Compute md5 checksum...\n") | 323 cat("Compute md5 checksum...\n") |
| 406 # WHAT XCMS WILL FIND | |
| 407 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
| 408 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | |
| 409 info <- file.info(directory) | |
| 410 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) | |
| 411 files <- c(directory[!info$isdir], listed) | |
| 412 exists <- file.exists(files) | |
| 413 files <- files[exists] | |
| 414 | |
| 415 library(tools) | 324 library(tools) |
| 416 | |
| 417 #cat("\n\n") | |
| 418 | |
| 419 return(as.matrix(md5sum(files))) | 325 return(as.matrix(md5sum(files))) |
| 420 } | |
| 421 | |
| 422 | |
| 423 # This function get the raw file path from the arguments | |
| 424 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
| 425 getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix="") { | |
| 426 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") | |
| 427 | |
| 428 if (!is.null(args[[paste0("zipfile",prefix)]])) zipfile <- args[[paste0("zipfile",prefix)]] | |
| 429 | |
| 430 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) { | |
| 431 singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath",prefix)]] | |
| 432 singlefile_sampleNames <- args[[paste0("singlefile_sampleName",prefix)]] | |
| 433 } | |
| 434 if (exists("singlefile_galaxyPaths")){ | |
| 435 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|")) | |
| 436 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|")) | |
| 437 | |
| 438 singlefile <- NULL | |
| 439 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { | |
| 440 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] | |
| 441 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] | |
| 442 # In case, an url is used to import data within Galaxy | |
| 443 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1) | |
| 444 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath | |
| 445 } | |
| 446 } | |
| 447 return(list(zipfile=zipfile, singlefile=singlefile)) | |
| 448 } | 326 } |
| 449 | 327 |
| 450 # This function retrieve the raw file in the working directory | 328 # This function retrieve the raw file in the working directory |
| 451 # - if zipfile: unzip the file with its directory tree | 329 # - if zipfile: unzip the file with its directory tree |
| 452 # - if singlefiles: set symlink with the good filename | 330 # - if singlefiles: set symlink with the good filename |
| 453 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 331 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
| 454 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { | 332 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile, args, prefix="") { |
| 333 | |
| 334 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") | |
| 335 | |
| 336 # single - if the file are passed in the command arguments -> refresh singlefile | |
| 337 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) { | |
| 338 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath",prefix)]],"\\|")) | |
| 339 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName",prefix)]],"\\|")) | |
| 340 | |
| 341 singlefile <- NULL | |
| 342 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { | |
| 343 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] | |
| 344 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] | |
| 345 # In case, an url is used to import data within Galaxy | |
| 346 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1) | |
| 347 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath | |
| 348 } | |
| 349 } | |
| 350 # zipfile - if the file are passed in the command arguments -> refresh zipfile | |
| 351 if (!is.null(args[[paste0("zipfile",prefix)]])) | |
| 352 zipfile <- args[[paste0("zipfile",prefix)]] | |
| 353 | |
| 354 # single | |
| 455 if(!is.null(singlefile) && (length("singlefile")>0)) { | 355 if(!is.null(singlefile) && (length("singlefile")>0)) { |
| 356 files <- vector() | |
| 456 for (singlefile_sampleName in names(singlefile)) { | 357 for (singlefile_sampleName in names(singlefile)) { |
| 457 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] | 358 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] |
| 458 if(!file.exists(singlefile_galaxyPath)){ | 359 if(!file.exists(singlefile_galaxyPath)){ |
| 459 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") | 360 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") |
| 460 print(error_message); stop(error_message) | 361 print(error_message); stop(error_message) |
| 461 } | 362 } |
| 462 | 363 |
| 463 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) | 364 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) |
| 464 file.copy(singlefile_galaxyPath, singlefile_sampleName) | 365 file.copy(singlefile_galaxyPath, singlefile_sampleName) |
| 465 | 366 files <- c(files, singlefile_sampleName) |
| 466 } | 367 } |
| 467 directory <- "." | 368 } |
| 468 | 369 # zipfile |
| 469 } | |
| 470 if(!is.null(zipfile) && (zipfile != "")) { | 370 if(!is.null(zipfile) && (zipfile != "")) { |
| 471 if(!file.exists(zipfile)){ | 371 if(!file.exists(zipfile)){ |
| 472 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") | 372 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") |
| 473 print(error_message) | 373 print(error_message) |
| 474 stop(error_message) | 374 stop(error_message) |
| 475 } | 375 } |
| 476 | |
| 477 #list all file in the zip file | |
| 478 #zip_files <- unzip(zipfile,list=T)[,"Name"] | |
| 479 | |
| 480 #unzip | |
| 481 suppressWarnings(unzip(zipfile, unzip="unzip")) | 376 suppressWarnings(unzip(zipfile, unzip="unzip")) |
| 482 | 377 |
| 483 #get the directory name | 378 #get the directory name |
| 484 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) | 379 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) |
| 485 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) | 380 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) |
| 487 directory <- "." | 382 directory <- "." |
| 488 if (length(directories) == 1) directory <- directories | 383 if (length(directories) == 1) directory <- directories |
| 489 | 384 |
| 490 cat("files_root_directory\t",directory,"\n") | 385 cat("files_root_directory\t",directory,"\n") |
| 491 | 386 |
| 492 } | 387 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") |
| 493 return (directory) | 388 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") |
| 389 info <- file.info(directory) | |
| 390 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) | |
| 391 files <- c(directory[!info$isdir], listed) | |
| 392 exists <- file.exists(files) | |
| 393 files <- files[exists] | |
| 394 | |
| 395 } | |
| 396 return(list(zipfile=zipfile, singlefile=singlefile, files=files)) | |
| 397 | |
| 494 } | 398 } |
| 495 | 399 |
| 496 | 400 |
| 497 # This function retrieve a xset like object | 401 # This function retrieve a xset like object |
| 498 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 402 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
