Mercurial > repos > lecorguille > ipo
comparison lib.r @ 1:ae8de756dfcf draft
"planemo upload for repository https://github.com/rietho/IPO commit 5083f3b5800bdd8515519f2f6398046b41e1df97"
| author | workflow4metabolomics |
|---|---|
| date | Mon, 16 Dec 2019 05:26:42 -0500 |
| parents | ac5f2936575b |
| children | 8e5f667359cb |
comparison
equal
deleted
inserted
replaced
| 0:ac5f2936575b | 1:ae8de756dfcf |
|---|---|
| 1 #@author G. Le Corguille | |
| 2 # solve an issue with batch if arguments are logical TRUE/FALSE | |
| 3 parseCommandArgs <- function(...) { | |
| 4 args <- batch::parseCommandArgs(...) | |
| 5 for (key in names(args)) { | |
| 6 if (args[key] %in% c("TRUE","FALSE")) | |
| 7 args[key] = as.logical(args[key]) | |
| 8 } | |
| 9 return(args) | |
| 10 } | |
| 11 | |
| 12 #@author G. Le Corguille | |
| 13 # This function will | |
| 14 # - load the packages | |
| 15 # - display the sessionInfo | |
| 16 loadAndDisplayPackages <- function(pkgs) { | |
| 17 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) | |
| 18 sessioninfo = sessionInfo() | |
| 19 cat(sessioninfo$R.version$version.string,"\n") | |
| 20 cat("Main packages:\n") | |
| 21 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | |
| 22 cat("Other loaded packages:\n") | |
| 23 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | |
| 24 } | |
| 25 | |
| 1 ## | 26 ## |
| 2 ## This function launch IPO functions to get the best parameters for xcmsSet | 27 ## This function launch IPO functions to get the best parameters for xcmsSet |
| 3 ## A sample among the whole dataset is used to save time | 28 ## A sample among the whole dataset is used to save time |
| 4 ## | 29 ## |
| 5 ipo4xcmsSet = function(directory, parametersOutput, listArguments, samplebyclass=4) { | 30 ipo4xcmsSet = function(directory, parametersOutput, args, samplebyclass=4) { |
| 6 setwd(directory) | 31 setwd(directory) |
| 7 | 32 |
| 8 files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF" | 33 files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF" |
| 34 files = files[!files %in% c("conda_activate.log", "log.txt")] | |
| 9 files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT" | 35 files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT" |
| 10 | 36 |
| 11 mzmlfile = files | 37 mzmlfile = files |
| 12 if (samplebyclass > 0) { | 38 if (samplebyclass > 0) { |
| 13 #random selection of N files for IPO in each class | 39 #random selection of N files for IPO in each class |
| 25 #@TODO: else, must we keep the RData to been use directly by group? | 51 #@TODO: else, must we keep the RData to been use directly by group? |
| 26 | 52 |
| 27 cat("\t\tSamples used:\n") | 53 cat("\t\tSamples used:\n") |
| 28 print(mzmlfile) | 54 print(mzmlfile) |
| 29 | 55 |
| 30 peakpickingParameters = getDefaultXcmsSetStartingParams(listArguments[["method"]]) #get default parameters of IPO | 56 peakpickingParameters = getDefaultXcmsSetStartingParams(args$method) #get default parameters of IPO |
| 31 | 57 |
| 32 # filter listArguments to only get releavant parameters and complete with those that are not declared | 58 # filter args to only get releavant parameters and complete with those that are not declared |
| 33 peakpickingParametersUser = c(listArguments[names(listArguments) %in% names(peakpickingParameters)], peakpickingParameters[!(names(peakpickingParameters) %in% names(listArguments))]) | 59 peakpickingParametersUser = c(args[names(args) %in% names(peakpickingParameters)], peakpickingParameters[!(names(peakpickingParameters) %in% names(args))]) |
| 34 peakpickingParametersUser$verbose.columns = TRUE | 60 peakpickingParametersUser$verbose.columns = TRUE |
| 35 | |
| 36 #peakpickingParametersUser$profparam <- list(step=0.005) #not yet used by IPO have to think of it for futur improvement | 61 #peakpickingParametersUser$profparam <- list(step=0.005) #not yet used by IPO have to think of it for futur improvement |
| 37 resultPeakpicking = optimizeXcmsSet(mzmlfile, peakpickingParametersUser, nSlaves=peakpickingParametersUser$nSlaves, subdir="../IPO_results") #some images generated by IPO | 62 resultPeakpicking = optimizeXcmsSet(mzmlfile, peakpickingParametersUser, nSlaves=args$nSlaves, subdir="../IPO_results") #some images generated by IPO |
| 38 | 63 |
| 39 # export | 64 # export |
| 40 resultPeakpicking_best_settings_parameters = resultPeakpicking$best_settings$parameters[!(names(resultPeakpicking$best_settings$parameters) %in% c("nSlaves","verbose.columns"))] | 65 resultPeakpicking_best_settings_parameters = resultPeakpicking$best_settings$parameters[!(names(resultPeakpicking$best_settings$parameters) %in% c("nSlaves","verbose.columns"))] |
| 41 write.table(t(as.data.frame(resultPeakpicking_best_settings_parameters)), file=parametersOutput, sep="\t", row.names=T, col.names=F, quote=F) #can be read by user | 66 write.table(t(as.data.frame(resultPeakpicking_best_settings_parameters)), file=parametersOutput, sep="\t", row.names=T, col.names=F, quote=F) #can be read by user |
| 42 | 67 |
| 44 } | 69 } |
| 45 | 70 |
| 46 ## | 71 ## |
| 47 ## This function launch IPO functions to get the best parameters for group and retcor | 72 ## This function launch IPO functions to get the best parameters for group and retcor |
| 48 ## | 73 ## |
| 49 ipo4retgroup = function(xset, directory, parametersOutput, listArguments, samplebyclass=4) { | 74 ipo4retgroup = function(xset, directory, parametersOutput, args, samplebyclass=4) { |
| 50 setwd(directory) | 75 setwd(directory) |
| 51 | 76 |
| 52 files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF" | 77 files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF" |
| 78 files = files[!files %in% c("conda_activate.log", "log.txt")] | |
| 53 files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT" | 79 files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT" |
| 54 | 80 |
| 55 retcorGroupParameters = getDefaultRetGroupStartingParams(listArguments[["retcorMethod"]]) #get default parameters of IPO | 81 retcorGroupParameters = getDefaultRetGroupStartingParams(args$retcorMethod) #get default parameters of IPO |
| 56 print(retcorGroupParameters) | 82 print(retcorGroupParameters) |
| 57 # filter listArguments to only get releavant parameters and complete with those that are not declared | 83 # filter args to only get releavant parameters and complete with those that are not declared |
| 58 retcorGroupParametersUser = c(listArguments[names(listArguments) %in% names(retcorGroupParameters)], retcorGroupParameters[!(names(retcorGroupParameters) %in% names(listArguments))]) | 84 retcorGroupParametersUser = c(args[names(args) %in% names(retcorGroupParameters)], retcorGroupParameters[!(names(retcorGroupParameters) %in% names(args))]) |
| 59 print("retcorGroupParametersUser") | 85 print("retcorGroupParametersUser") |
| 60 print(retcorGroupParametersUser) | 86 print(retcorGroupParametersUser) |
| 61 resultRetcorGroup = optimizeRetGroup(xset, retcorGroupParametersUser, nSlaves=listArguments[["nSlaves"]], subdir="../IPO_results") #some images generated by IPO | 87 resultRetcorGroup = optimizeRetGroup(xset, retcorGroupParametersUser, nSlaves=args$nSlaves, subdir="../IPO_results") #some images generated by IPO |
| 62 | 88 |
| 63 # export | 89 # export |
| 64 resultRetcorGroup_best_settings_parameters = resultRetcorGroup$best_settings | 90 resultRetcorGroup_best_settings_parameters = resultRetcorGroup$best_settings |
| 65 write.table(t(as.data.frame(resultRetcorGroup_best_settings_parameters)), file=parametersOutput, sep="\t", row.names=T, col.names=F, quote=F) #can be read by user | 91 write.table(t(as.data.frame(resultRetcorGroup_best_settings_parameters)), file=parametersOutput, sep="\t", row.names=T, col.names=F, quote=F) #can be read by user |
| 66 } | 92 } |
| 67 | 93 |
| 68 | 94 |
| 69 | 95 |
| 70 | 96 # This function check if XML contains special caracters. It also checks integrity and completness. |
| 71 ## | |
| 72 ## This function check if xcms will found all the files | |
| 73 ## | |
| 74 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | |
| 75 checkFilesCompatibilityWithXcms <- function(directory) { | |
| 76 cat("Checking files filenames compatibilities with xmcs...\n") | |
| 77 # WHAT XCMS WILL FIND | |
| 78 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
| 79 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") | |
| 80 info <- file.info(directory) | |
| 81 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) | |
| 82 files <- c(directory[!info$isdir], listed) | |
| 83 files_abs <- file.path(getwd(), files) | |
| 84 exists <- file.exists(files_abs) | |
| 85 files[exists] <- files_abs[exists] | |
| 86 files[exists] <- sub("//","/",files[exists]) | |
| 87 | |
| 88 # WHAT IS ON THE FILESYSTEM | |
| 89 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) | |
| 90 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] | |
| 91 | |
| 92 # COMPARISON | |
| 93 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { | |
| 94 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) | |
| 95 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) | |
| 96 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | |
| 97 | |
| 98 } | |
| 99 } | |
| 100 | |
| 101 | |
| 102 | |
| 103 ## | |
| 104 ## This function check if XML contains special caracters. It also checks integrity and completness. | |
| 105 ## | |
| 106 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 97 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
| 107 checkXmlStructure <- function (directory) { | 98 checkXmlStructure <- function (directory) { |
| 108 cat("Checking XML structure...\n") | 99 cat("Checking XML structure...\n") |
| 109 | 100 |
| 110 cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") | 101 cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") |
| 111 capture=system(cmd,intern=TRUE) | 102 capture <- system(cmd, intern=TRUE) |
| 112 | 103 |
| 113 if (length(capture)>0){ | 104 if (length(capture)>0){ |
| 114 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) | 105 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) |
| 115 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) | 106 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) |
| 116 write(capture, stderr()) | 107 write(capture, stderr()) |
| 117 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") | 108 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") |
| 118 } | 109 } |
| 119 | 110 |
| 120 } | 111 } |
| 112 | |
| 113 | |
| 114 # This function get the raw file path from the arguments | |
| 115 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
| 116 getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix="") { | |
| 117 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") | |
| 118 | |
| 119 if (!is.null(args[[paste0("zipfile",prefix)]])) zipfile <- args[[paste0("zipfile",prefix)]] | |
| 120 | |
| 121 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) { | |
| 122 singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath",prefix)]] | |
| 123 singlefile_sampleNames <- args[[paste0("singlefile_sampleName",prefix)]] | |
| 124 } | |
| 125 if (exists("singlefile_galaxyPaths")){ | |
| 126 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|")) | |
| 127 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|")) | |
| 128 | |
| 129 singlefile <- NULL | |
| 130 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { | |
| 131 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] | |
| 132 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] | |
| 133 # In case, an url is used to import data within Galaxy | |
| 134 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1) | |
| 135 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath | |
| 136 } | |
| 137 } | |
| 138 return(list(zipfile=zipfile, singlefile=singlefile)) | |
| 139 } | |
| 140 | |
| 141 # This function retrieve the raw file in the working directory | |
| 142 # - if zipfile: unzip the file with its directory tree | |
| 143 # - if singlefiles: set symlink with the good filename | |
| 144 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
| 145 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { | |
| 146 if(!is.null(singlefile) && (length("singlefile")>0)) { | |
| 147 for (singlefile_sampleName in names(singlefile)) { | |
| 148 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] | |
| 149 if(!file.exists(singlefile_galaxyPath)){ | |
| 150 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") | |
| 151 print(error_message); stop(error_message) | |
| 152 } | |
| 153 | |
| 154 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) | |
| 155 file.copy(singlefile_galaxyPath, singlefile_sampleName) | |
| 156 | |
| 157 } | |
| 158 directory <- "." | |
| 159 | |
| 160 } | |
| 161 if(!is.null(zipfile) && (zipfile != "")) { | |
| 162 if(!file.exists(zipfile)){ | |
| 163 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") | |
| 164 print(error_message) | |
| 165 stop(error_message) | |
| 166 } | |
| 167 | |
| 168 #list all file in the zip file | |
| 169 #zip_files <- unzip(zipfile,list=T)[,"Name"] | |
| 170 | |
| 171 #unzip | |
| 172 suppressWarnings(unzip(zipfile, unzip="unzip")) | |
| 173 | |
| 174 #get the directory name | |
| 175 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) | |
| 176 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) | |
| 177 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] | |
| 178 directory <- "." | |
| 179 if (length(directories) == 1) directory <- directories | |
| 180 | |
| 181 cat("files_root_directory\t",directory,"\n") | |
| 182 | |
| 183 } | |
| 184 return (directory) | |
| 185 } | |
| 186 | |
| 187 | |
| 188 # This function retrieve a xset like object | |
| 189 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
| 190 getxcmsSetObject <- function(xobject) { | |
| 191 # XCMS 1.x | |
| 192 if (class(xobject) == "xcmsSet") | |
| 193 return (xobject) | |
| 194 # XCMS 3.x | |
| 195 if (class(xobject) == "XCMSnExp") { | |
| 196 # Get the legacy xcmsSet object | |
| 197 suppressWarnings(xset <- as(xobject, 'xcmsSet')) | |
| 198 if (!is.null(xset@phenoData$sample_group)) | |
| 199 sampclass(xset) <- xset@phenoData$sample_group | |
| 200 else | |
| 201 sampclass(xset) <- "." | |
| 202 return (xset) | |
| 203 } | |
| 204 } |
