diff lib.r @ 1:ae8de756dfcf draft

"planemo upload for repository https://github.com/rietho/IPO commit 5083f3b5800bdd8515519f2f6398046b41e1df97"
author workflow4metabolomics
date Mon, 16 Dec 2019 05:26:42 -0500
parents ac5f2936575b
children 8e5f667359cb
line wrap: on
line diff
--- a/lib.r	Thu Aug 03 06:00:00 2017 -0400
+++ b/lib.r	Mon Dec 16 05:26:42 2019 -0500
@@ -1,11 +1,37 @@
+#@author G. Le Corguille
+# solve an issue with batch if arguments are logical TRUE/FALSE
+parseCommandArgs <- function(...) {
+    args <- batch::parseCommandArgs(...)
+    for (key in names(args)) {
+        if (args[key] %in% c("TRUE","FALSE"))
+            args[key] = as.logical(args[key])
+    }
+    return(args)
+}
+
+#@author G. Le Corguille
+# This function will
+# - load the packages
+# - display the sessionInfo
+loadAndDisplayPackages <- function(pkgs) {
+    for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE)))
+    sessioninfo = sessionInfo()
+    cat(sessioninfo$R.version$version.string,"\n")
+    cat("Main packages:\n")
+    for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
+    cat("Other loaded packages:\n")
+    for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
+}
+
 ##
 ## This function launch IPO functions to get the best parameters for xcmsSet
 ## A sample among the whole dataset is used to save time
 ##
-ipo4xcmsSet = function(directory, parametersOutput, listArguments, samplebyclass=4) {
+ipo4xcmsSet = function(directory, parametersOutput, args, samplebyclass=4) {
     setwd(directory)
 
     files = list.files(".", recursive=T)  # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF"
+    files = files[!files %in% c("conda_activate.log", "log.txt")]
     files_classes = basename(dirname(files))    # "KO", "KO", "WT", "WT"
 
     mzmlfile = files
@@ -27,14 +53,13 @@
     cat("\t\tSamples used:\n")
     print(mzmlfile)
 
-    peakpickingParameters = getDefaultXcmsSetStartingParams(listArguments[["method"]]) #get default parameters of IPO
+    peakpickingParameters = getDefaultXcmsSetStartingParams(args$method) #get default parameters of IPO
 
-    # filter listArguments to only get releavant parameters and complete with those that are not declared
-    peakpickingParametersUser = c(listArguments[names(listArguments) %in% names(peakpickingParameters)], peakpickingParameters[!(names(peakpickingParameters) %in% names(listArguments))])
+    # filter args to only get releavant parameters and complete with those that are not declared
+    peakpickingParametersUser = c(args[names(args) %in% names(peakpickingParameters)], peakpickingParameters[!(names(peakpickingParameters) %in% names(args))])
     peakpickingParametersUser$verbose.columns = TRUE
-
     #peakpickingParametersUser$profparam <- list(step=0.005) #not yet used by IPO have to think of it for futur improvement
-    resultPeakpicking = optimizeXcmsSet(mzmlfile, peakpickingParametersUser, nSlaves=peakpickingParametersUser$nSlaves, subdir="../IPO_results") #some images generated by IPO
+    resultPeakpicking = optimizeXcmsSet(mzmlfile, peakpickingParametersUser, nSlaves=args$nSlaves, subdir="../IPO_results") #some images generated by IPO
 
     # export
     resultPeakpicking_best_settings_parameters = resultPeakpicking$best_settings$parameters[!(names(resultPeakpicking$best_settings$parameters) %in% c("nSlaves","verbose.columns"))]
@@ -46,19 +71,20 @@
 ##
 ## This function launch IPO functions to get the best parameters for group and retcor
 ##
-ipo4retgroup = function(xset, directory, parametersOutput, listArguments, samplebyclass=4) {
+ipo4retgroup = function(xset, directory, parametersOutput, args, samplebyclass=4) {
     setwd(directory)
 
     files = list.files(".", recursive=T)  # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF"
+    files = files[!files %in% c("conda_activate.log", "log.txt")]
     files_classes = basename(dirname(files))    # "KO", "KO", "WT", "WT"
 
-    retcorGroupParameters = getDefaultRetGroupStartingParams(listArguments[["retcorMethod"]]) #get default parameters of IPO
+    retcorGroupParameters = getDefaultRetGroupStartingParams(args$retcorMethod) #get default parameters of IPO
     print(retcorGroupParameters)
-    # filter listArguments to only get releavant parameters and complete with those that are not declared
-    retcorGroupParametersUser = c(listArguments[names(listArguments) %in% names(retcorGroupParameters)], retcorGroupParameters[!(names(retcorGroupParameters) %in% names(listArguments))])
+    # filter args to only get releavant parameters and complete with those that are not declared
+    retcorGroupParametersUser = c(args[names(args) %in% names(retcorGroupParameters)], retcorGroupParameters[!(names(retcorGroupParameters) %in% names(args))])
     print("retcorGroupParametersUser")
     print(retcorGroupParametersUser)
-    resultRetcorGroup = optimizeRetGroup(xset, retcorGroupParametersUser, nSlaves=listArguments[["nSlaves"]], subdir="../IPO_results") #some images generated by IPO
+    resultRetcorGroup = optimizeRetGroup(xset, retcorGroupParametersUser, nSlaves=args$nSlaves, subdir="../IPO_results") #some images generated by IPO
 
     # export
     resultRetcorGroup_best_settings_parameters = resultRetcorGroup$best_settings
@@ -67,48 +93,13 @@
 
 
 
-
-##
-## This function check if xcms will found all the files
-##
-#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
-checkFilesCompatibilityWithXcms <- function(directory) {
-  cat("Checking files filenames compatibilities with xmcs...\n")
-  # WHAT XCMS WILL FIND
-  filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
-  filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|")
-  info <- file.info(directory)
-  listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE)
-  files <- c(directory[!info$isdir], listed)
-  files_abs <- file.path(getwd(), files)
-  exists <- file.exists(files_abs)
-  files[exists] <- files_abs[exists]
-  files[exists] <- sub("//","/",files[exists])
-
-  # WHAT IS ON THE FILESYSTEM
-  filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T)
-  filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
-
-  # COMPARISON
-  if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
-    write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
-    write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
-    stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
-
-  }
-}
-
-
-
-##
-## This function check if XML contains special caracters. It also checks integrity and completness.
-##
+# This function check if XML contains special caracters. It also checks integrity and completness.
 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
 checkXmlStructure <- function (directory) {
   cat("Checking XML structure...\n")
 
-  cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
-  capture=system(cmd,intern=TRUE)
+  cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
+  capture <- system(cmd, intern=TRUE)
 
   if (length(capture)>0){
     #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
@@ -118,3 +109,96 @@
   }
 
 }
+
+
+# This function get the raw file path from the arguments
+#@author Gildas Le Corguille lecorguille@sb-roscoff.fr
+getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix="") {
+  if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
+
+  if (!is.null(args[[paste0("zipfile",prefix)]])) zipfile <- args[[paste0("zipfile",prefix)]]
+
+  if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) {
+    singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath",prefix)]]
+    singlefile_sampleNames <- args[[paste0("singlefile_sampleName",prefix)]]
+  }
+  if (exists("singlefile_galaxyPaths")){
+    singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|"))
+    singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|"))
+
+    singlefile <- NULL
+    for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
+      singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
+      singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
+      # In case, an url is used to import data within Galaxy
+      singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1)
+      singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
+    }
+  }
+  return(list(zipfile=zipfile, singlefile=singlefile))
+}
+
+# This function retrieve the raw file in the working directory
+#   - if zipfile: unzip the file with its directory tree
+#   - if singlefiles: set symlink with the good filename
+#@author Gildas Le Corguille lecorguille@sb-roscoff.fr
+retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) {
+    if(!is.null(singlefile) && (length("singlefile")>0)) {
+        for (singlefile_sampleName in names(singlefile)) {
+            singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
+            if(!file.exists(singlefile_galaxyPath)){
+                error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!")
+                print(error_message); stop(error_message)
+            }
+
+            if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T)))
+                file.copy(singlefile_galaxyPath, singlefile_sampleName)
+
+        }
+        directory <- "."
+
+    }
+    if(!is.null(zipfile) && (zipfile != "")) {
+        if(!file.exists(zipfile)){
+            error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")
+            print(error_message)
+            stop(error_message)
+        }
+
+        #list all file in the zip file
+        #zip_files <- unzip(zipfile,list=T)[,"Name"]
+
+        #unzip
+        suppressWarnings(unzip(zipfile, unzip="unzip"))
+
+        #get the directory name
+        suppressWarnings(filesInZip <- unzip(zipfile, list=T))
+        directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])))
+        directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
+        directory <- "."
+        if (length(directories) == 1) directory <- directories
+
+        cat("files_root_directory\t",directory,"\n")
+
+    }
+    return (directory)
+}
+
+
+# This function retrieve a xset like object
+#@author Gildas Le Corguille lecorguille@sb-roscoff.fr
+getxcmsSetObject <- function(xobject) {
+    # XCMS 1.x
+    if (class(xobject) == "xcmsSet")
+        return (xobject)
+    # XCMS 3.x
+    if (class(xobject) == "XCMSnExp") {
+        # Get the legacy xcmsSet object
+        suppressWarnings(xset <- as(xobject, 'xcmsSet'))
+        if (!is.null(xset@phenoData$sample_group))
+            sampclass(xset) <- xset@phenoData$sample_group
+        else
+            sampclass(xset) <- "."
+        return (xset)
+    }
+}