Mercurial > repos > lecorguille > xcms_merge
comparison lib.r @ 3:f439ed7a8f03 draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 9f72e947d9c241d11221cad561f3525d27231857
author | lecorguille |
---|---|
date | Tue, 18 Sep 2018 16:09:25 -0400 |
parents | |
children | 7bb43bf0b546 |
comparison
equal
deleted
inserted
replaced
2:3a5204f14fff | 3:f439ed7a8f03 |
---|---|
1 #@authors ABiMS TEAM, Y. Guitton | |
2 # lib.r for Galaxy Workflow4Metabolomics xcms tools | |
3 | |
4 #@author G. Le Corguille | |
5 # solve an issue with batch if arguments are logical TRUE/FALSE | |
6 parseCommandArgs <- function(...) { | |
7 args <- batch::parseCommandArgs(...) | |
8 for (key in names(args)) { | |
9 if (args[key] %in% c("TRUE","FALSE")) | |
10 args[key] = as.logical(args[key]) | |
11 } | |
12 return(args) | |
13 } | |
14 | |
15 #@author G. Le Corguille | |
16 # This function will | |
17 # - load the packages | |
18 # - display the sessionInfo | |
19 loadAndDisplayPackages <- function(pkgs) { | |
20 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) | |
21 | |
22 sessioninfo = sessionInfo() | |
23 cat(sessioninfo$R.version$version.string,"\n") | |
24 cat("Main packages:\n") | |
25 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | |
26 cat("Other loaded packages:\n") | |
27 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | |
28 } | |
29 | |
30 #@author G. Le Corguille | |
31 # This function merge several chromBPI or chromTIC into one. | |
32 mergeChrom <- function(chrom_merged, chrom) { | |
33 if (is.null(chrom_merged)) return(NULL) | |
34 chrom_merged@.Data <- cbind(chrom_merged@.Data, chrom@.Data) | |
35 return(chrom_merged) | |
36 } | |
37 | |
38 #@author G. Le Corguille | |
39 # This function merge several xdata into one. | |
40 mergeXData <- function(args) { | |
41 chromTIC <- NULL | |
42 chromBPI <- NULL | |
43 chromTIC_adjusted <- NULL | |
44 chromBPI_adjusted <- NULL | |
45 for(image in args$images) { | |
46 | |
47 load(image) | |
48 # Handle infiles | |
49 if (!exists("singlefile")) singlefile <- NULL | |
50 if (!exists("zipfile")) zipfile <- NULL | |
51 rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args) | |
52 zipfile <- rawFilePath$zipfile | |
53 singlefile <- rawFilePath$singlefile | |
54 retrieveRawfileInTheWorkingDirectory(singlefile, zipfile) | |
55 | |
56 if (exists("raw_data")) xdata <- raw_data | |
57 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") | |
58 | |
59 cat(sampleNamesList$sampleNamesOrigin,"\n") | |
60 | |
61 if (!exists("xdata_merged")) { | |
62 xdata_merged <- xdata | |
63 singlefile_merged <- singlefile | |
64 md5sumList_merged <- md5sumList | |
65 sampleNamesList_merged <- sampleNamesList | |
66 chromTIC_merged <- chromTIC | |
67 chromBPI_merged <- chromBPI | |
68 chromTIC_adjusted_merged <- chromTIC_adjusted | |
69 chromBPI_adjusted_merged <- chromBPI_adjusted | |
70 } else { | |
71 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged,xdata) | |
72 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- .concatenate_OnDiskMSnExp(xdata_merged,xdata) | |
73 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata") | |
74 | |
75 singlefile_merged <- c(singlefile_merged,singlefile) | |
76 md5sumList_merged$origin <- rbind(md5sumList_merged$origin,md5sumList$origin) | |
77 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin,sampleNamesList$sampleNamesOrigin) | |
78 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames,sampleNamesList$sampleNamesMakeNames) | |
79 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC) | |
80 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI) | |
81 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted) | |
82 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted) | |
83 } | |
84 } | |
85 rm(image) | |
86 xdata <- xdata_merged; rm(xdata_merged) | |
87 singlefile <- singlefile_merged; rm(singlefile_merged) | |
88 md5sumList <- md5sumList_merged; rm(md5sumList_merged) | |
89 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged) | |
90 | |
91 if (!is.null(args$sampleMetadata)) { | |
92 cat("\tXSET PHENODATA SETTING...\n") | |
93 sampleMetadataFile <- args$sampleMetadata | |
94 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header=F) | |
95 xdata@phenoData@data$sample_group=sampleMetadata$V2[match(xdata@phenoData@data$sample_name,sampleMetadata$V1)] | |
96 | |
97 if (any(is.na(pData(xdata)$sample_group))) { | |
98 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)] | |
99 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse=" ")) | |
100 print(error_message) | |
101 stop(error_message) | |
102 } | |
103 } | |
104 | |
105 if (!is.null(chromTIC_merged)) { chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData } | |
106 if (!is.null(chromBPI_merged)) { chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData } | |
107 if (!is.null(chromTIC_adjusted_merged)) { chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData } | |
108 if (!is.null(chromBPI_adjusted_merged)) { chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData } | |
109 | |
110 return(list("xdata"=xdata, "singlefile"=singlefile, "md5sumList"=md5sumList,"sampleNamesList"=sampleNamesList, "chromTIC"=chromTIC, "chromBPI"=chromBPI, "chromTIC_adjusted"=chromTIC_adjusted, "chromBPI_adjusted"=chromBPI_adjusted)) | |
111 } | |
112 | |
113 #@author G. Le Corguille | |
114 # This function convert if it is required the Retention Time in minutes | |
115 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { | |
116 if (convertRTMinute){ | |
117 #converting the retention times (seconds) into minutes | |
118 print("converting the retention times into minutes in the variableMetadata") | |
119 variableMetadata[,"rt"] <- variableMetadata[,"rt"]/60 | |
120 variableMetadata[,"rtmin"] <- variableMetadata[,"rtmin"]/60 | |
121 variableMetadata[,"rtmax"] <- variableMetadata[,"rtmax"]/60 | |
122 } | |
123 return (variableMetadata) | |
124 } | |
125 | |
126 #@author G. Le Corguille | |
127 # This function format ions identifiers | |
128 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { | |
129 splitDeco <- strsplit(as.character(variableMetadata$name),"_") | |
130 idsDeco <- sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) | |
131 namecustom <- make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) | |
132 variableMetadata <- cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) | |
133 return(variableMetadata) | |
134 } | |
135 | |
136 #@author G. Le Corguille | |
137 # This function convert the remain NA to 0 in the dataMatrix | |
138 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) { | |
139 if (naTOzero){ | |
140 dataMatrix[is.na(dataMatrix)] <- 0 | |
141 } | |
142 return (dataMatrix) | |
143 } | |
144 | |
145 #@author G. Le Corguille | |
146 # Draw the plotChromPeakDensity 3 per page in a pdf file | |
147 getPlotChromPeakDensity <- function(xdata, mzdigit=4) { | |
148 pdf(file="plotChromPeakDensity.pdf", width=16, height=12) | |
149 | |
150 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) | |
151 | |
152 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | |
153 names(group_colors) <- unique(xdata$sample_group) | |
154 | |
155 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) | |
156 for (i in 1:nrow(featureDefinitions(xdata))) { | |
157 mzmin = featureDefinitions(xdata)[i,]$mzmin | |
158 mzmax = featureDefinitions(xdata)[i,]$mzmax | |
159 plotChromPeakDensity(xdata, mz=c(mzmin,mzmax), col=group_colors, pch=16, xlim=xlim, main=paste(round(mzmin,mzdigit),round(mzmax,mzdigit))) | |
160 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | |
161 } | |
162 | |
163 dev.off() | |
164 } | |
165 | |
166 #@author G. Le Corguille | |
167 # Draw the plotChromPeakDensity 3 per page in a pdf file | |
168 getPlotAdjustedRtime <- function(xdata) { | |
169 | |
170 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) | |
171 | |
172 # Color by group | |
173 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | |
174 if (length(group_colors) > 1) { | |
175 names(group_colors) <- unique(xdata$sample_group) | |
176 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) | |
177 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | |
178 } | |
179 | |
180 # Color by sample | |
181 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) | |
182 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) | |
183 | |
184 dev.off() | |
185 } | |
186 | |
187 #@author G. Le Corguille | |
188 # value: intensity values to be used into, maxo or intb | |
189 getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, naTOzero=T, variableMetadataOutput, dataMatrixOutput) { | |
190 dataMatrix <- featureValues(xdata, method="medret", value=intval) | |
191 colnames(dataMatrix) <- tools::file_path_sans_ext(colnames(dataMatrix)) | |
192 dataMatrix = cbind(name=groupnamesW4M(xdata), dataMatrix) | |
193 variableMetadata <- featureDefinitions(xdata) | |
194 colnames(variableMetadata)[1] = "mz"; colnames(variableMetadata)[4] = "rt" | |
195 variableMetadata = data.frame(name=groupnamesW4M(xdata), variableMetadata) | |
196 | |
197 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) | |
198 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) | |
199 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero) | |
200 | |
201 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) | |
202 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) | |
203 | |
204 } | |
205 | |
206 #@author G. Le Corguille | |
207 # It allow different of field separators | |
208 getDataFrameFromFile <- function(filename, header=T) { | |
209 myDataFrame <- read.table(filename, header=header, sep=";", stringsAsFactors=F) | |
210 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep="\t", stringsAsFactors=F) | |
211 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep=",", stringsAsFactors=F) | |
212 if (ncol(myDataFrame) < 2) { | |
213 error_message="Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation" | |
214 print(error_message) | |
215 stop(error_message) | |
216 } | |
217 return(myDataFrame) | |
218 } | |
219 | |
220 #@author G. Le Corguille | |
221 # Draw the BPI and TIC graphics | |
222 # colored by sample names or class names | |
223 getPlotChromatogram <- function(chrom, xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") { | |
224 | |
225 if (aggregationFun == "sum") | |
226 type="Total Ion Chromatograms" | |
227 else | |
228 type="Base Peak Intensity Chromatograms" | |
229 | |
230 adjusted="Raw" | |
231 if (hasAdjustedRtime(xdata)) | |
232 adjusted="Adjusted" | |
233 | |
234 main <- paste(type,":",adjusted,"data") | |
235 | |
236 pdf(pdfname, width=16, height=10) | |
237 | |
238 # Color by group | |
239 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | |
240 if (length(group_colors) > 1) { | |
241 names(group_colors) <- unique(xdata$sample_group) | |
242 plot(chrom, col = group_colors[chrom$sample_group], main=main) | |
243 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | |
244 } | |
245 | |
246 # Color by sample | |
247 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main) | |
248 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) | |
249 | |
250 dev.off() | |
251 } | |
252 | |
253 | |
254 # Get the polarities from all the samples of a condition | |
255 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | |
256 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | |
257 getSampleMetadata <- function(xdata=NULL, sampleMetadataOutput="sampleMetadata.tsv") { | |
258 cat("Creating the sampleMetadata file...\n") | |
259 | |
260 #Create the sampleMetada dataframe | |
261 sampleMetadata <- xdata@phenoData@data | |
262 rownames(sampleMetadata) <- NULL | |
263 colnames(sampleMetadata) <- c("sampleMetadata", "class") | |
264 | |
265 sampleNamesOrigin <- sampleMetadata$sampleMetadata | |
266 sampleNamesMakeNames <- make.names(sampleNamesOrigin) | |
267 | |
268 if (any(duplicated(sampleNamesMakeNames))) { | |
269 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) | |
270 for (sampleName in sampleNamesOrigin) { | |
271 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) | |
272 } | |
273 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | |
274 } | |
275 | |
276 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { | |
277 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") | |
278 for (sampleName in sampleNamesOrigin) { | |
279 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) | |
280 } | |
281 } | |
282 | |
283 sampleMetadata$sampleMetadata <- sampleNamesMakeNames | |
284 | |
285 | |
286 #For each sample file, the following actions are done | |
287 for (fileIdx in 1:length(fileNames(xdata))) { | |
288 #Check if the file is in the CDF format | |
289 if (!mzR:::netCDFIsFile(fileNames(xdata))) { | |
290 | |
291 # If the column isn't exist, with add one filled with NA | |
292 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA | |
293 | |
294 #Extract the polarity (a list of polarities) | |
295 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx,"polarity"] | |
296 #Verify if all the scans have the same polarity | |
297 uniq_list <- unique(polarity) | |
298 if (length(uniq_list)>1){ | |
299 polarity <- "mixed" | |
300 } else { | |
301 polarity <- as.character(uniq_list) | |
302 } | |
303 | |
304 #Set the polarity attribute | |
305 sampleMetadata$polarity[fileIdx] <- polarity | |
306 } | |
307 | |
308 } | |
309 | |
310 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) | |
311 | |
312 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames)) | |
313 | |
314 } | |
315 | |
316 | |
317 # This function check if xcms will found all the files | |
318 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | |
319 checkFilesCompatibilityWithXcms <- function(directory) { | |
320 cat("Checking files filenames compatibilities with xmcs...\n") | |
321 # WHAT XCMS WILL FIND | |
322 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
323 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | |
324 info <- file.info(directory) | |
325 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) | |
326 files <- c(directory[!info$isdir], listed) | |
327 files_abs <- file.path(getwd(), files) | |
328 exists <- file.exists(files_abs) | |
329 files[exists] <- files_abs[exists] | |
330 files[exists] <- sub("//","/",files[exists]) | |
331 | |
332 # WHAT IS ON THE FILESYSTEM | |
333 filesystem_filepaths <- system(paste0("find \"$PWD/",directory,"\" -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\""), intern=T) | |
334 filesystem_filepaths <- filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] | |
335 | |
336 # COMPARISON | |
337 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { | |
338 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) | |
339 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) | |
340 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | |
341 } | |
342 } | |
343 | |
344 | |
345 #This function list the compatible files within the directory as xcms did | |
346 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | |
347 getMSFiles <- function (directory) { | |
348 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
349 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | |
350 info <- file.info(directory) | |
351 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) | |
352 files <- c(directory[!info$isdir], listed) | |
353 exists <- file.exists(files) | |
354 files <- files[exists] | |
355 return(files) | |
356 } | |
357 | |
358 # This function check if XML contains special caracters. It also checks integrity and completness. | |
359 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | |
360 checkXmlStructure <- function (directory) { | |
361 cat("Checking XML structure...\n") | |
362 | |
363 cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") | |
364 capture <- system(cmd, intern=TRUE) | |
365 | |
366 if (length(capture)>0){ | |
367 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) | |
368 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) | |
369 write(capture, stderr()) | |
370 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") | |
371 } | |
372 | |
373 } | |
374 | |
375 | |
376 # This function check if XML contain special characters | |
377 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | |
378 deleteXmlBadCharacters<- function (directory) { | |
379 cat("Checking Non ASCII characters in the XML...\n") | |
380 | |
381 processed <- F | |
382 l <- system( paste0("find '",directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE) | |
383 for (i in l){ | |
384 cmd <- paste("LC_ALL=C grep '[^ -~]' \"", i, "\"", sep="") | |
385 capture <- suppressWarnings(system(cmd, intern=TRUE)) | |
386 if (length(capture)>0){ | |
387 cmd <- paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) | |
388 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) | |
389 c <- system(cmd, intern=TRUE) | |
390 capture <- "" | |
391 processed <- T | |
392 } | |
393 } | |
394 if (processed) cat("\n\n") | |
395 return(processed) | |
396 } | |
397 | |
398 | |
399 # This function will compute MD5 checksum to check the data integrity | |
400 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
401 getMd5sum <- function (directory) { | |
402 cat("Compute md5 checksum...\n") | |
403 # WHAT XCMS WILL FIND | |
404 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
405 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | |
406 info <- file.info(directory) | |
407 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) | |
408 files <- c(directory[!info$isdir], listed) | |
409 exists <- file.exists(files) | |
410 files <- files[exists] | |
411 | |
412 library(tools) | |
413 | |
414 #cat("\n\n") | |
415 | |
416 return(as.matrix(md5sum(files))) | |
417 } | |
418 | |
419 | |
420 # This function get the raw file path from the arguments | |
421 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
422 getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix="") { | |
423 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") | |
424 | |
425 if (!is.null(args[[paste0("zipfile",prefix)]])) zipfile <- args[[paste0("zipfile",prefix)]] | |
426 | |
427 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) { | |
428 singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath",prefix)]] | |
429 singlefile_sampleNames <- args[[paste0("singlefile_sampleName",prefix)]] | |
430 } | |
431 if (exists("singlefile_galaxyPaths")){ | |
432 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|")) | |
433 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|")) | |
434 | |
435 singlefile <- NULL | |
436 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { | |
437 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] | |
438 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] | |
439 # In case, an url is used to import data within Galaxy | |
440 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1) | |
441 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath | |
442 } | |
443 } | |
444 return(list(zipfile=zipfile, singlefile=singlefile)) | |
445 } | |
446 | |
447 # This function retrieve the raw file in the working directory | |
448 # - if zipfile: unzip the file with its directory tree | |
449 # - if singlefiles: set symlink with the good filename | |
450 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
451 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { | |
452 if(!is.null(singlefile) && (length("singlefile")>0)) { | |
453 for (singlefile_sampleName in names(singlefile)) { | |
454 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] | |
455 if(!file.exists(singlefile_galaxyPath)){ | |
456 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") | |
457 print(error_message); stop(error_message) | |
458 } | |
459 | |
460 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) | |
461 file.copy(singlefile_galaxyPath, singlefile_sampleName) | |
462 | |
463 } | |
464 directory <- "." | |
465 | |
466 } | |
467 if(!is.null(zipfile) && (zipfile != "")) { | |
468 if(!file.exists(zipfile)){ | |
469 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") | |
470 print(error_message) | |
471 stop(error_message) | |
472 } | |
473 | |
474 #list all file in the zip file | |
475 #zip_files <- unzip(zipfile,list=T)[,"Name"] | |
476 | |
477 #unzip | |
478 suppressWarnings(unzip(zipfile, unzip="unzip")) | |
479 | |
480 #get the directory name | |
481 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) | |
482 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) | |
483 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] | |
484 directory <- "." | |
485 if (length(directories) == 1) directory <- directories | |
486 | |
487 cat("files_root_directory\t",directory,"\n") | |
488 | |
489 } | |
490 return (directory) | |
491 } | |
492 | |
493 | |
494 # This function retrieve a xset like object | |
495 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
496 getxcmsSetObject <- function(xobject) { | |
497 # XCMS 1.x | |
498 if (class(xobject) == "xcmsSet") | |
499 return (xobject) | |
500 # XCMS 3.x | |
501 if (class(xobject) == "XCMSnExp") { | |
502 # Get the legacy xcmsSet object | |
503 suppressWarnings(xset <- as(xobject, 'xcmsSet')) | |
504 if (!is.null(xset@phenoData$sample_group)) | |
505 sampclass(xset) <- xset@phenoData$sample_group | |
506 else | |
507 sampclass(xset) <- "." | |
508 return (xset) | |
509 } | |
510 } |