Mercurial > repos > lecorguille > ipo
diff lib.r @ 1:ae8de756dfcf draft
"planemo upload for repository https://github.com/rietho/IPO commit 5083f3b5800bdd8515519f2f6398046b41e1df97"
author | workflow4metabolomics |
---|---|
date | Mon, 16 Dec 2019 05:26:42 -0500 |
parents | ac5f2936575b |
children | 8e5f667359cb |
line wrap: on
line diff
--- a/lib.r Thu Aug 03 06:00:00 2017 -0400 +++ b/lib.r Mon Dec 16 05:26:42 2019 -0500 @@ -1,11 +1,37 @@ +#@author G. Le Corguille +# solve an issue with batch if arguments are logical TRUE/FALSE +parseCommandArgs <- function(...) { + args <- batch::parseCommandArgs(...) + for (key in names(args)) { + if (args[key] %in% c("TRUE","FALSE")) + args[key] = as.logical(args[key]) + } + return(args) +} + +#@author G. Le Corguille +# This function will +# - load the packages +# - display the sessionInfo +loadAndDisplayPackages <- function(pkgs) { + for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) + sessioninfo = sessionInfo() + cat(sessioninfo$R.version$version.string,"\n") + cat("Main packages:\n") + for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") + cat("Other loaded packages:\n") + for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") +} + ## ## This function launch IPO functions to get the best parameters for xcmsSet ## A sample among the whole dataset is used to save time ## -ipo4xcmsSet = function(directory, parametersOutput, listArguments, samplebyclass=4) { +ipo4xcmsSet = function(directory, parametersOutput, args, samplebyclass=4) { setwd(directory) files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF" + files = files[!files %in% c("conda_activate.log", "log.txt")] files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT" mzmlfile = files @@ -27,14 +53,13 @@ cat("\t\tSamples used:\n") print(mzmlfile) - peakpickingParameters = getDefaultXcmsSetStartingParams(listArguments[["method"]]) #get default parameters of IPO + peakpickingParameters = getDefaultXcmsSetStartingParams(args$method) #get default parameters of IPO - # filter listArguments to only get releavant parameters and complete with those that are not declared - peakpickingParametersUser = c(listArguments[names(listArguments) %in% names(peakpickingParameters)], peakpickingParameters[!(names(peakpickingParameters) %in% names(listArguments))]) + # filter args to only get releavant parameters and complete with those that are not declared + peakpickingParametersUser = c(args[names(args) %in% names(peakpickingParameters)], peakpickingParameters[!(names(peakpickingParameters) %in% names(args))]) peakpickingParametersUser$verbose.columns = TRUE - #peakpickingParametersUser$profparam <- list(step=0.005) #not yet used by IPO have to think of it for futur improvement - resultPeakpicking = optimizeXcmsSet(mzmlfile, peakpickingParametersUser, nSlaves=peakpickingParametersUser$nSlaves, subdir="../IPO_results") #some images generated by IPO + resultPeakpicking = optimizeXcmsSet(mzmlfile, peakpickingParametersUser, nSlaves=args$nSlaves, subdir="../IPO_results") #some images generated by IPO # export resultPeakpicking_best_settings_parameters = resultPeakpicking$best_settings$parameters[!(names(resultPeakpicking$best_settings$parameters) %in% c("nSlaves","verbose.columns"))] @@ -46,19 +71,20 @@ ## ## This function launch IPO functions to get the best parameters for group and retcor ## -ipo4retgroup = function(xset, directory, parametersOutput, listArguments, samplebyclass=4) { +ipo4retgroup = function(xset, directory, parametersOutput, args, samplebyclass=4) { setwd(directory) files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF" + files = files[!files %in% c("conda_activate.log", "log.txt")] files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT" - retcorGroupParameters = getDefaultRetGroupStartingParams(listArguments[["retcorMethod"]]) #get default parameters of IPO + retcorGroupParameters = getDefaultRetGroupStartingParams(args$retcorMethod) #get default parameters of IPO print(retcorGroupParameters) - # filter listArguments to only get releavant parameters and complete with those that are not declared - retcorGroupParametersUser = c(listArguments[names(listArguments) %in% names(retcorGroupParameters)], retcorGroupParameters[!(names(retcorGroupParameters) %in% names(listArguments))]) + # filter args to only get releavant parameters and complete with those that are not declared + retcorGroupParametersUser = c(args[names(args) %in% names(retcorGroupParameters)], retcorGroupParameters[!(names(retcorGroupParameters) %in% names(args))]) print("retcorGroupParametersUser") print(retcorGroupParametersUser) - resultRetcorGroup = optimizeRetGroup(xset, retcorGroupParametersUser, nSlaves=listArguments[["nSlaves"]], subdir="../IPO_results") #some images generated by IPO + resultRetcorGroup = optimizeRetGroup(xset, retcorGroupParametersUser, nSlaves=args$nSlaves, subdir="../IPO_results") #some images generated by IPO # export resultRetcorGroup_best_settings_parameters = resultRetcorGroup$best_settings @@ -67,48 +93,13 @@ - -## -## This function check if xcms will found all the files -## -#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM -checkFilesCompatibilityWithXcms <- function(directory) { - cat("Checking files filenames compatibilities with xmcs...\n") - # WHAT XCMS WILL FIND - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") - info <- file.info(directory) - listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) - files <- c(directory[!info$isdir], listed) - files_abs <- file.path(getwd(), files) - exists <- file.exists(files_abs) - files[exists] <- files_abs[exists] - files[exists] <- sub("//","/",files[exists]) - - # WHAT IS ON THE FILESYSTEM - filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) - filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] - - # COMPARISON - if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { - write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) - write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) - stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") - - } -} - - - -## -## This function check if XML contains special caracters. It also checks integrity and completness. -## +# This function check if XML contains special caracters. It also checks integrity and completness. #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM checkXmlStructure <- function (directory) { cat("Checking XML structure...\n") - cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") - capture=system(cmd,intern=TRUE) + cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") + capture <- system(cmd, intern=TRUE) if (length(capture)>0){ #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) @@ -118,3 +109,96 @@ } } + + +# This function get the raw file path from the arguments +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr +getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix="") { + if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") + + if (!is.null(args[[paste0("zipfile",prefix)]])) zipfile <- args[[paste0("zipfile",prefix)]] + + if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) { + singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath",prefix)]] + singlefile_sampleNames <- args[[paste0("singlefile_sampleName",prefix)]] + } + if (exists("singlefile_galaxyPaths")){ + singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|")) + singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|")) + + singlefile <- NULL + for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { + singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] + singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] + # In case, an url is used to import data within Galaxy + singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1) + singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath + } + } + return(list(zipfile=zipfile, singlefile=singlefile)) +} + +# This function retrieve the raw file in the working directory +# - if zipfile: unzip the file with its directory tree +# - if singlefiles: set symlink with the good filename +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr +retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { + if(!is.null(singlefile) && (length("singlefile")>0)) { + for (singlefile_sampleName in names(singlefile)) { + singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] + if(!file.exists(singlefile_galaxyPath)){ + error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") + print(error_message); stop(error_message) + } + + if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) + file.copy(singlefile_galaxyPath, singlefile_sampleName) + + } + directory <- "." + + } + if(!is.null(zipfile) && (zipfile != "")) { + if(!file.exists(zipfile)){ + error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") + print(error_message) + stop(error_message) + } + + #list all file in the zip file + #zip_files <- unzip(zipfile,list=T)[,"Name"] + + #unzip + suppressWarnings(unzip(zipfile, unzip="unzip")) + + #get the directory name + suppressWarnings(filesInZip <- unzip(zipfile, list=T)) + directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) + directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] + directory <- "." + if (length(directories) == 1) directory <- directories + + cat("files_root_directory\t",directory,"\n") + + } + return (directory) +} + + +# This function retrieve a xset like object +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr +getxcmsSetObject <- function(xobject) { + # XCMS 1.x + if (class(xobject) == "xcmsSet") + return (xobject) + # XCMS 3.x + if (class(xobject) == "XCMSnExp") { + # Get the legacy xcmsSet object + suppressWarnings(xset <- as(xobject, 'xcmsSet')) + if (!is.null(xset@phenoData$sample_group)) + sampclass(xset) <- xset@phenoData$sample_group + else + sampclass(xset) <- "." + return (xset) + } +}