diff lib.r @ 24:761913919c21 draft

"planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f1caf2a3bf23cf319a75dd12c86402555dd02617"
author workflow4metabolomics
date Wed, 12 Feb 2020 08:29:15 -0500
parents fe4002ac5193
children a066b8581b97
line wrap: on
line diff
--- a/lib.r	Mon Apr 29 06:28:25 2019 -0400
+++ b/lib.r	Wed Feb 12 08:29:15 2020 -0500
@@ -42,16 +42,16 @@
     chromBPI <- NULL
     chromTIC_adjusted <- NULL
     chromBPI_adjusted <- NULL
+    md5sumList <- NULL
     for(image in args$images) {
 
         load(image)
         # Handle infiles
         if (!exists("singlefile")) singlefile <- NULL
         if (!exists("zipfile")) zipfile <- NULL
-        rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args)
+        rawFilePath <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile, args)
         zipfile <- rawFilePath$zipfile
         singlefile <- rawFilePath$singlefile
-        retrieveRawfileInTheWorkingDirectory(singlefile, zipfile)
 
         if (exists("raw_data")) xdata <- raw_data
         if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*")
@@ -149,7 +149,7 @@
 
     par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5))
 
-    group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
+    group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
     names(group_colors) <- unique(xdata$sample_group)
 
     xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax))
@@ -170,7 +170,7 @@
     pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12)
 
     # Color by group
-    group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
+    group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
     if (length(group_colors) > 1) {
         names(group_colors) <- unique(xdata$sample_group)
         plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group])
@@ -239,15 +239,15 @@
     pdf(pdfname, width=16, height=10)
 
     # Color by group
-    group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
+    group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
     if (length(group_colors) > 1) {
         names(group_colors) <- unique(xdata$sample_group)
-        plot(chrom, col = group_colors[chrom$sample_group], main=main)
+        plot(chrom, col = group_colors[as.factor(chrom$sample_group)], main=main, peakType = "none")
         legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
     }
 
     # Color by sample
-    plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main)
+    plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main, peakType = "none")
     legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1)
 
     dev.off()
@@ -317,142 +317,43 @@
 }
 
 
-# This function check if xcms will found all the files
-#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
-checkFilesCompatibilityWithXcms <- function(directory) {
-    cat("Checking files filenames compatibilities with xmcs...\n")
-    # WHAT XCMS WILL FIND
-    filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
-    filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
-    info <- file.info(directory)
-    listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE)
-    files <- c(directory[!info$isdir], listed)
-    files_abs <- file.path(getwd(), files)
-    exists <- file.exists(files_abs)
-    files[exists] <- files_abs[exists]
-    files[exists] <- sub("//","/",files[exists])
-
-    # WHAT IS ON THE FILESYSTEM
-    filesystem_filepaths <- system(paste0("find \"",getwd(),"/",directory,"\" -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\""), intern=T)
-    filesystem_filepaths <- filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
-
-    # COMPARISON
-    if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
-        write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
-        write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
-        stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
-    }
-}
-
-
-#This function list the compatible files within the directory as xcms did
-#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
-getMSFiles <- function (directory) {
-    filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
-    filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
-    info <- file.info(directory)
-    listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE)
-    files <- c(directory[!info$isdir], listed)
-    exists <- file.exists(files)
-    files <- files[exists]
-    return(files)
-}
-
-# This function check if XML contains special caracters. It also checks integrity and completness.
-#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
-checkXmlStructure <- function (directory) {
-    cat("Checking XML structure...\n")
-
-    cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
-    capture <- system(cmd, intern=TRUE)
-
-    if (length(capture)>0){
-        #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
-        write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
-        write(capture, stderr())
-        stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files")
-    }
-
-}
-
-
-# This function check if XML contain special characters
-#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
-deleteXmlBadCharacters<- function (directory) {
-    cat("Checking Non ASCII characters in the XML...\n")
-
-    processed <- F
-    l <- system( paste0("find '",directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE)
-    for (i in l){
-        cmd <- paste("LC_ALL=C grep '[^ -~]' \"", i, "\"", sep="")
-        capture <- suppressWarnings(system(cmd, intern=TRUE))
-        if (length(capture)>0){
-            cmd <- paste("perl -i -pe 's/[^[:ascii:]]//g;'",i)
-            print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") )
-            c <- system(cmd, intern=TRUE)
-            capture <- ""
-            processed <- T
-        }
-    }
-    if (processed) cat("\n\n")
-    return(processed)
-}
-
-
 # This function will compute MD5 checksum to check the data integrity
 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
-getMd5sum <- function (directory) {
+getMd5sum <- function (files) {
     cat("Compute md5 checksum...\n")
-    # WHAT XCMS WILL FIND
-    filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
-    filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
-    info <- file.info(directory)
-    listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE)
-    files <- c(directory[!info$isdir], listed)
-    exists <- file.exists(files)
-    files <- files[exists]
-
     library(tools)
-
-    #cat("\n\n")
-
     return(as.matrix(md5sum(files)))
 }
 
-
-# This function get the raw file path from the arguments
-#@author Gildas Le Corguille lecorguille@sb-roscoff.fr
-getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix="") {
-  if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
-
-  if (!is.null(args[[paste0("zipfile",prefix)]])) zipfile <- args[[paste0("zipfile",prefix)]]
-
-  if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) {
-    singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath",prefix)]]
-    singlefile_sampleNames <- args[[paste0("singlefile_sampleName",prefix)]]
-  }
-  if (exists("singlefile_galaxyPaths")){
-    singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|"))
-    singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|"))
-
-    singlefile <- NULL
-    for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
-      singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
-      singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
-      # In case, an url is used to import data within Galaxy
-      singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1)
-      singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
-    }
-  }
-  return(list(zipfile=zipfile, singlefile=singlefile))
-}
-
 # This function retrieve the raw file in the working directory
 #   - if zipfile: unzip the file with its directory tree
 #   - if singlefiles: set symlink with the good filename
 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
-retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) {
+retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile, args, prefix="") {
+
+    if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
+
+    # single - if the file are passed in the command arguments -> refresh singlefile
+    if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) {
+      singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath",prefix)]],"\\|"))
+      singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName",prefix)]],"\\|"))
+
+      singlefile <- NULL
+      for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
+        singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
+        singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
+        # In case, an url is used to import data within Galaxy
+        singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1)
+        singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
+      }
+    }
+    # zipfile - if the file are passed in the command arguments -> refresh zipfile
+    if (!is.null(args[[paste0("zipfile",prefix)]]))
+      zipfile <- args[[paste0("zipfile",prefix)]]
+
+    # single
     if(!is.null(singlefile) && (length("singlefile")>0)) {
+        files <- vector()
         for (singlefile_sampleName in names(singlefile)) {
             singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
             if(!file.exists(singlefile_galaxyPath)){
@@ -462,22 +363,16 @@
 
             if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T)))
                 file.copy(singlefile_galaxyPath, singlefile_sampleName)
-
+            files <- c(files, singlefile_sampleName)
         }
-        directory <- "."
-
     }
+    # zipfile
     if(!is.null(zipfile) && (zipfile != "")) {
         if(!file.exists(zipfile)){
             error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")
             print(error_message)
             stop(error_message)
         }
-
-        #list all file in the zip file
-        #zip_files <- unzip(zipfile,list=T)[,"Name"]
-
-        #unzip
         suppressWarnings(unzip(zipfile, unzip="unzip"))
 
         #get the directory name
@@ -489,8 +384,17 @@
 
         cat("files_root_directory\t",directory,"\n")
 
+        filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
+        filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
+        info <- file.info(directory)
+        listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE)
+        files <- c(directory[!info$isdir], listed)
+        exists <- file.exists(files)
+        files <- files[exists]
+
     }
-    return (directory)
+    return(list(zipfile=zipfile, singlefile=singlefile, files=files))
+
 }