comparison lib.r @ 1:ae8de756dfcf draft

"planemo upload for repository https://github.com/rietho/IPO commit 5083f3b5800bdd8515519f2f6398046b41e1df97"
author workflow4metabolomics
date Mon, 16 Dec 2019 05:26:42 -0500
parents ac5f2936575b
children 8e5f667359cb
comparison
equal deleted inserted replaced
0:ac5f2936575b 1:ae8de756dfcf
1 #@author G. Le Corguille
2 # solve an issue with batch if arguments are logical TRUE/FALSE
3 parseCommandArgs <- function(...) {
4 args <- batch::parseCommandArgs(...)
5 for (key in names(args)) {
6 if (args[key] %in% c("TRUE","FALSE"))
7 args[key] = as.logical(args[key])
8 }
9 return(args)
10 }
11
12 #@author G. Le Corguille
13 # This function will
14 # - load the packages
15 # - display the sessionInfo
16 loadAndDisplayPackages <- function(pkgs) {
17 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE)))
18 sessioninfo = sessionInfo()
19 cat(sessioninfo$R.version$version.string,"\n")
20 cat("Main packages:\n")
21 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
22 cat("Other loaded packages:\n")
23 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
24 }
25
1 ## 26 ##
2 ## This function launch IPO functions to get the best parameters for xcmsSet 27 ## This function launch IPO functions to get the best parameters for xcmsSet
3 ## A sample among the whole dataset is used to save time 28 ## A sample among the whole dataset is used to save time
4 ## 29 ##
5 ipo4xcmsSet = function(directory, parametersOutput, listArguments, samplebyclass=4) { 30 ipo4xcmsSet = function(directory, parametersOutput, args, samplebyclass=4) {
6 setwd(directory) 31 setwd(directory)
7 32
8 files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF" 33 files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF"
34 files = files[!files %in% c("conda_activate.log", "log.txt")]
9 files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT" 35 files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT"
10 36
11 mzmlfile = files 37 mzmlfile = files
12 if (samplebyclass > 0) { 38 if (samplebyclass > 0) {
13 #random selection of N files for IPO in each class 39 #random selection of N files for IPO in each class
25 #@TODO: else, must we keep the RData to been use directly by group? 51 #@TODO: else, must we keep the RData to been use directly by group?
26 52
27 cat("\t\tSamples used:\n") 53 cat("\t\tSamples used:\n")
28 print(mzmlfile) 54 print(mzmlfile)
29 55
30 peakpickingParameters = getDefaultXcmsSetStartingParams(listArguments[["method"]]) #get default parameters of IPO 56 peakpickingParameters = getDefaultXcmsSetStartingParams(args$method) #get default parameters of IPO
31 57
32 # filter listArguments to only get releavant parameters and complete with those that are not declared 58 # filter args to only get releavant parameters and complete with those that are not declared
33 peakpickingParametersUser = c(listArguments[names(listArguments) %in% names(peakpickingParameters)], peakpickingParameters[!(names(peakpickingParameters) %in% names(listArguments))]) 59 peakpickingParametersUser = c(args[names(args) %in% names(peakpickingParameters)], peakpickingParameters[!(names(peakpickingParameters) %in% names(args))])
34 peakpickingParametersUser$verbose.columns = TRUE 60 peakpickingParametersUser$verbose.columns = TRUE
35
36 #peakpickingParametersUser$profparam <- list(step=0.005) #not yet used by IPO have to think of it for futur improvement 61 #peakpickingParametersUser$profparam <- list(step=0.005) #not yet used by IPO have to think of it for futur improvement
37 resultPeakpicking = optimizeXcmsSet(mzmlfile, peakpickingParametersUser, nSlaves=peakpickingParametersUser$nSlaves, subdir="../IPO_results") #some images generated by IPO 62 resultPeakpicking = optimizeXcmsSet(mzmlfile, peakpickingParametersUser, nSlaves=args$nSlaves, subdir="../IPO_results") #some images generated by IPO
38 63
39 # export 64 # export
40 resultPeakpicking_best_settings_parameters = resultPeakpicking$best_settings$parameters[!(names(resultPeakpicking$best_settings$parameters) %in% c("nSlaves","verbose.columns"))] 65 resultPeakpicking_best_settings_parameters = resultPeakpicking$best_settings$parameters[!(names(resultPeakpicking$best_settings$parameters) %in% c("nSlaves","verbose.columns"))]
41 write.table(t(as.data.frame(resultPeakpicking_best_settings_parameters)), file=parametersOutput, sep="\t", row.names=T, col.names=F, quote=F) #can be read by user 66 write.table(t(as.data.frame(resultPeakpicking_best_settings_parameters)), file=parametersOutput, sep="\t", row.names=T, col.names=F, quote=F) #can be read by user
42 67
44 } 69 }
45 70
46 ## 71 ##
47 ## This function launch IPO functions to get the best parameters for group and retcor 72 ## This function launch IPO functions to get the best parameters for group and retcor
48 ## 73 ##
49 ipo4retgroup = function(xset, directory, parametersOutput, listArguments, samplebyclass=4) { 74 ipo4retgroup = function(xset, directory, parametersOutput, args, samplebyclass=4) {
50 setwd(directory) 75 setwd(directory)
51 76
52 files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF" 77 files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF"
78 files = files[!files %in% c("conda_activate.log", "log.txt")]
53 files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT" 79 files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT"
54 80
55 retcorGroupParameters = getDefaultRetGroupStartingParams(listArguments[["retcorMethod"]]) #get default parameters of IPO 81 retcorGroupParameters = getDefaultRetGroupStartingParams(args$retcorMethod) #get default parameters of IPO
56 print(retcorGroupParameters) 82 print(retcorGroupParameters)
57 # filter listArguments to only get releavant parameters and complete with those that are not declared 83 # filter args to only get releavant parameters and complete with those that are not declared
58 retcorGroupParametersUser = c(listArguments[names(listArguments) %in% names(retcorGroupParameters)], retcorGroupParameters[!(names(retcorGroupParameters) %in% names(listArguments))]) 84 retcorGroupParametersUser = c(args[names(args) %in% names(retcorGroupParameters)], retcorGroupParameters[!(names(retcorGroupParameters) %in% names(args))])
59 print("retcorGroupParametersUser") 85 print("retcorGroupParametersUser")
60 print(retcorGroupParametersUser) 86 print(retcorGroupParametersUser)
61 resultRetcorGroup = optimizeRetGroup(xset, retcorGroupParametersUser, nSlaves=listArguments[["nSlaves"]], subdir="../IPO_results") #some images generated by IPO 87 resultRetcorGroup = optimizeRetGroup(xset, retcorGroupParametersUser, nSlaves=args$nSlaves, subdir="../IPO_results") #some images generated by IPO
62 88
63 # export 89 # export
64 resultRetcorGroup_best_settings_parameters = resultRetcorGroup$best_settings 90 resultRetcorGroup_best_settings_parameters = resultRetcorGroup$best_settings
65 write.table(t(as.data.frame(resultRetcorGroup_best_settings_parameters)), file=parametersOutput, sep="\t", row.names=T, col.names=F, quote=F) #can be read by user 91 write.table(t(as.data.frame(resultRetcorGroup_best_settings_parameters)), file=parametersOutput, sep="\t", row.names=T, col.names=F, quote=F) #can be read by user
66 } 92 }
67 93
68 94
69 95
70 96 # This function check if XML contains special caracters. It also checks integrity and completness.
71 ##
72 ## This function check if xcms will found all the files
73 ##
74 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
75 checkFilesCompatibilityWithXcms <- function(directory) {
76 cat("Checking files filenames compatibilities with xmcs...\n")
77 # WHAT XCMS WILL FIND
78 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
79 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|")
80 info <- file.info(directory)
81 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE)
82 files <- c(directory[!info$isdir], listed)
83 files_abs <- file.path(getwd(), files)
84 exists <- file.exists(files_abs)
85 files[exists] <- files_abs[exists]
86 files[exists] <- sub("//","/",files[exists])
87
88 # WHAT IS ON THE FILESYSTEM
89 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T)
90 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
91
92 # COMPARISON
93 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
94 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
95 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
96 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
97
98 }
99 }
100
101
102
103 ##
104 ## This function check if XML contains special caracters. It also checks integrity and completness.
105 ##
106 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 97 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
107 checkXmlStructure <- function (directory) { 98 checkXmlStructure <- function (directory) {
108 cat("Checking XML structure...\n") 99 cat("Checking XML structure...\n")
109 100
110 cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") 101 cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
111 capture=system(cmd,intern=TRUE) 102 capture <- system(cmd, intern=TRUE)
112 103
113 if (length(capture)>0){ 104 if (length(capture)>0){
114 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) 105 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
115 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) 106 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
116 write(capture, stderr()) 107 write(capture, stderr())
117 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") 108 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files")
118 } 109 }
119 110
120 } 111 }
112
113
114 # This function get the raw file path from the arguments
115 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
116 getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix="") {
117 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
118
119 if (!is.null(args[[paste0("zipfile",prefix)]])) zipfile <- args[[paste0("zipfile",prefix)]]
120
121 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) {
122 singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath",prefix)]]
123 singlefile_sampleNames <- args[[paste0("singlefile_sampleName",prefix)]]
124 }
125 if (exists("singlefile_galaxyPaths")){
126 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|"))
127 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|"))
128
129 singlefile <- NULL
130 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
131 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
132 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
133 # In case, an url is used to import data within Galaxy
134 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1)
135 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
136 }
137 }
138 return(list(zipfile=zipfile, singlefile=singlefile))
139 }
140
141 # This function retrieve the raw file in the working directory
142 # - if zipfile: unzip the file with its directory tree
143 # - if singlefiles: set symlink with the good filename
144 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
145 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) {
146 if(!is.null(singlefile) && (length("singlefile")>0)) {
147 for (singlefile_sampleName in names(singlefile)) {
148 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
149 if(!file.exists(singlefile_galaxyPath)){
150 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!")
151 print(error_message); stop(error_message)
152 }
153
154 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T)))
155 file.copy(singlefile_galaxyPath, singlefile_sampleName)
156
157 }
158 directory <- "."
159
160 }
161 if(!is.null(zipfile) && (zipfile != "")) {
162 if(!file.exists(zipfile)){
163 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")
164 print(error_message)
165 stop(error_message)
166 }
167
168 #list all file in the zip file
169 #zip_files <- unzip(zipfile,list=T)[,"Name"]
170
171 #unzip
172 suppressWarnings(unzip(zipfile, unzip="unzip"))
173
174 #get the directory name
175 suppressWarnings(filesInZip <- unzip(zipfile, list=T))
176 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])))
177 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
178 directory <- "."
179 if (length(directories) == 1) directory <- directories
180
181 cat("files_root_directory\t",directory,"\n")
182
183 }
184 return (directory)
185 }
186
187
188 # This function retrieve a xset like object
189 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
190 getxcmsSetObject <- function(xobject) {
191 # XCMS 1.x
192 if (class(xobject) == "xcmsSet")
193 return (xobject)
194 # XCMS 3.x
195 if (class(xobject) == "XCMSnExp") {
196 # Get the legacy xcmsSet object
197 suppressWarnings(xset <- as(xobject, 'xcmsSet'))
198 if (!is.null(xset@phenoData$sample_group))
199 sampclass(xset) <- xset@phenoData$sample_group
200 else
201 sampclass(xset) <- "."
202 return (xset)
203 }
204 }