comparison lib.r @ 0:eb115eb8f25c draft

"planemo upload for repository https://github.com/workflow4metabolomics/xcms commit d4bb3c31e2beca6b4059758c9e76d42c26f8aa55"
author workflow4metabolomics
date Thu, 17 Jun 2021 07:42:55 +0000
parents
children 20f8ebc3a391
comparison
equal deleted inserted replaced
-1:000000000000 0:eb115eb8f25c
1 #@authors ABiMS TEAM, Y. Guitton
2 # lib.r for Galaxy Workflow4Metabolomics xcms tools
3
4 #@author G. Le Corguille
5 # solve an issue with batch if arguments are logical TRUE/FALSE
6 parseCommandArgs <- function(...) {
7 args <- batch::parseCommandArgs(...)
8 for (key in names(args)) {
9 if (args[key] %in% c("TRUE", "FALSE"))
10 args[key] <- as.logical(args[key])
11 }
12 return(args)
13 }
14
15 #@author G. Le Corguille
16 # This function will
17 # - load the packages
18 # - display the sessionInfo
19 loadAndDisplayPackages <- function(pkgs) {
20 for (pkg in pkgs) suppressPackageStartupMessages(stopifnot(library(pkg, quietly = TRUE, logical.return = TRUE, character.only = TRUE)))
21
22 sessioninfo <- sessionInfo()
23 cat(sessioninfo$R.version$version.string, "\n")
24 cat("Main packages:\n")
25 for (pkg in names(sessioninfo$otherPkgs)) {
26 cat(paste(pkg, packageVersion(pkg)), "\t")
27 }
28 cat("\n")
29 cat("Other loaded packages:\n")
30 for (pkg in names(sessioninfo$loadedOnly)) {
31 cat(paste(pkg, packageVersion(pkg)), "\t")
32 }
33 cat("\n")
34 }
35
36 #@author G. Le Corguille
37 # This function merge several chromBPI or chromTIC into one.
38 mergeChrom <- function(chrom_merged, chrom) {
39 if (is.null(chrom_merged)) return(NULL)
40 chrom_merged@.Data <- cbind(chrom_merged@.Data, chrom@.Data)
41 return(chrom_merged)
42 }
43
44 #@author G. Le Corguille
45 # This function merge several xdata into one.
46 mergeXData <- function(args) {
47 chromTIC <- NULL
48 chromBPI <- NULL
49 chromTIC_adjusted <- NULL
50 chromBPI_adjusted <- NULL
51 md5sumList <- NULL
52 for (image in args$images) {
53
54 load(image)
55 # Handle infiles
56 if (!exists("singlefile")) singlefile <- NULL
57 if (!exists("zipfile")) zipfile <- NULL
58 rawFilePath <- retrieveRawfileInTheWorkingDir(singlefile, zipfile, args)
59 zipfile <- rawFilePath$zipfile
60 singlefile <- rawFilePath$singlefile
61
62 if (exists("raw_data")) xdata <- raw_data
63 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*")
64
65 cat(sampleNamesList$sampleNamesOrigin, "\n")
66
67 if (!exists("xdata_merged")) {
68 xdata_merged <- xdata
69 singlefile_merged <- singlefile
70 md5sumList_merged <- md5sumList
71 sampleNamesList_merged <- sampleNamesList
72 chromTIC_merged <- chromTIC
73 chromBPI_merged <- chromBPI
74 chromTIC_adjusted_merged <- chromTIC_adjusted
75 chromBPI_adjusted_merged <- chromBPI_adjusted
76 } else {
77 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged, xdata)
78 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- xcms:::.concatenate_OnDiskMSnExp(xdata_merged, xdata)
79 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata")
80
81 singlefile_merged <- c(singlefile_merged, singlefile)
82 md5sumList_merged$origin <- rbind(md5sumList_merged$origin, md5sumList$origin)
83 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin, sampleNamesList$sampleNamesOrigin)
84 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames, sampleNamesList$sampleNamesMakeNames)
85 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC)
86 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI)
87 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted)
88 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted)
89 }
90 }
91 rm(image)
92 xdata <- xdata_merged; rm(xdata_merged)
93 singlefile <- singlefile_merged; rm(singlefile_merged)
94 md5sumList <- md5sumList_merged; rm(md5sumList_merged)
95 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged)
96
97 if (!is.null(args$sampleMetadata)) {
98 cat("\tXSET PHENODATA SETTING...\n")
99 sampleMetadataFile <- args$sampleMetadata
100 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header = F)
101 xdata@phenoData@data$sample_group <- sampleMetadata$V2[match(xdata@phenoData@data$sample_name, sampleMetadata$V1)]
102
103 if (any(is.na(pData(xdata)$sample_group))) {
104 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)]
105 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse = " "))
106 print(error_message)
107 stop(error_message)
108 }
109 }
110
111 if (!is.null(chromTIC_merged)) {
112 chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData
113 }
114 if (!is.null(chromBPI_merged)) {
115 chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData
116 }
117 if (!is.null(chromTIC_adjusted_merged)) {
118 chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData
119 }
120 if (!is.null(chromBPI_adjusted_merged)) {
121 chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData
122 }
123
124 return(list("xdata" = xdata, "singlefile" = singlefile, "md5sumList" = md5sumList, "sampleNamesList" = sampleNamesList, "chromTIC" = chromTIC, "chromBPI" = chromBPI, "chromTIC_adjusted" = chromTIC_adjusted, "chromBPI_adjusted" = chromBPI_adjusted))
125 }
126
127 #@author G. Le Corguille
128 # This function convert if it is required the Retention Time in minutes
129 RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
130 if (convertRTMinute) {
131 #converting the retention times (seconds) into minutes
132 print("converting the retention times into minutes in the variableMetadata")
133 variableMetadata[, "rt"] <- variableMetadata[, "rt"] / 60
134 variableMetadata[, "rtmin"] <- variableMetadata[, "rtmin"] / 60
135 variableMetadata[, "rtmax"] <- variableMetadata[, "rtmax"] / 60
136 }
137 return(variableMetadata)
138 }
139
140 #@author G. Le Corguille
141 # This function format ions identifiers
142 formatIonIdentifiers <- function(variableMetadata, numDigitsRT = 0, numDigitsMZ = 0) {
143 splitDeco <- strsplit(as.character(variableMetadata$name), "_")
144 idsDeco <- sapply(splitDeco,
145 function(x) {
146 deco <- unlist(x)[2]; if (is.na(deco)) return("") else return(paste0("_", deco))
147 }
148 )
149 namecustom <- make.unique(paste0("M", round(variableMetadata[, "mz"], numDigitsMZ), "T", round(variableMetadata[, "rt"], numDigitsRT), idsDeco))
150 variableMetadata <- cbind(name = variableMetadata$name, namecustom = namecustom, variableMetadata[, !(colnames(variableMetadata) %in% c("name"))])
151 return(variableMetadata)
152 }
153
154 #@author G. Le Corguille
155 # This function convert the remain NA to 0 in the dataMatrix
156 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) {
157 if (naTOzero) {
158 dataMatrix[is.na(dataMatrix)] <- 0
159 }
160 return(dataMatrix)
161 }
162
163 #@author G. Le Corguille
164 # Draw the plotChromPeakDensity 3 per page in a pdf file
165 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit = 4) {
166 pdf(file = "plotChromPeakDensity.pdf", width = 16, height = 12)
167
168 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5))
169
170 if (length(unique(xdata$sample_group)) < 10) {
171 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
172 }else{
173 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3")
174 }
175 names(group_colors) <- unique(xdata$sample_group)
176 col_per_samp <- as.character(xdata$sample_group)
177 for (i in seq_len(length(group_colors))) {
178 col_per_samp[col_per_samp == (names(group_colors)[i])] <- group_colors[i]
179 }
180
181 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax))
182 for (i in seq_len(nrow(featureDefinitions(xdata)))) {
183 mzmin <- featureDefinitions(xdata)[i, ]$mzmin
184 mzmax <- featureDefinitions(xdata)[i, ]$mzmax
185 plotChromPeakDensity(xdata, param = param, mz = c(mzmin, mzmax), col = col_per_samp, pch = 16, xlim = xlim, main = paste(round(mzmin, mzdigit), round(mzmax, mzdigit)))
186 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1)
187 }
188
189 dev.off()
190 }
191
192 #@author G. Le Corguille
193 # Draw the plotChromPeakDensity 3 per page in a pdf file
194 getPlotAdjustedRtime <- function(xdata) {
195
196 pdf(file = "raw_vs_adjusted_rt.pdf", width = 16, height = 12)
197
198 # Color by group
199 if (length(unique(xdata$sample_group)) < 10) {
200 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
201 } else {
202 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3")
203 }
204 if (length(group_colors) > 1) {
205 names(group_colors) <- unique(xdata$sample_group)
206 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group])
207 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1)
208 }
209
210 # Color by sample
211 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name)))
212 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1)
213
214 dev.off()
215 }
216
217 #@author G. Le Corguille
218 # value: intensity values to be used into, maxo or intb
219 getPeaklistW4M <- function(xdata, intval = "into", convertRTMinute = F, numDigitsMZ = 4, numDigitsRT = 0, naTOzero = T, variableMetadataOutput, dataMatrixOutput, sampleNamesList) {
220 dataMatrix <- featureValues(xdata, method = "medret", value = intval)
221 colnames(dataMatrix) <- make.names(tools::file_path_sans_ext(colnames(dataMatrix)))
222 dataMatrix <- cbind(name = groupnames(xdata), dataMatrix)
223 variableMetadata <- featureDefinitions(xdata)
224 colnames(variableMetadata)[1] <- "mz"; colnames(variableMetadata)[4] <- "rt"
225 variableMetadata <- data.frame(name = groupnames(xdata), variableMetadata)
226
227 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute)
228 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = numDigitsRT, numDigitsMZ = numDigitsMZ)
229 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero)
230
231 # FIX: issue when the vector at peakidx is too long and is written in a new line during the export
232 variableMetadata[, "peakidx"] <- vapply(variableMetadata[, "peakidx"], FUN = paste, FUN.VALUE = character(1), collapse = ",")
233
234 write.table(variableMetadata, file = variableMetadataOutput, sep = "\t", quote = F, row.names = F)
235 write.table(dataMatrix, file = dataMatrixOutput, sep = "\t", quote = F, row.names = F)
236
237 }
238
239 #@author G. Le Corguille
240 # It allow different of field separators
241 getDataFrameFromFile <- function(filename, header = T) {
242 myDataFrame <- read.table(filename, header = header, sep = ";", stringsAsFactors = F)
243 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = "\t", stringsAsFactors = F)
244 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = ",", stringsAsFactors = F)
245 if (ncol(myDataFrame) < 2) {
246 error_message <- "Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation"
247 print(error_message)
248 stop(error_message)
249 }
250 return(myDataFrame)
251 }
252
253 #@author G. Le Corguille
254 # Draw the BPI and TIC graphics
255 # colored by sample names or class names
256 getPlotChromatogram <- function(chrom, xdata, pdfname = "Chromatogram.pdf", aggregationFun = "max") {
257
258 if (aggregationFun == "sum")
259 type <- "Total Ion Chromatograms"
260 else
261 type <- "Base Peak Intensity Chromatograms"
262
263 adjusted <- "Raw"
264 if (hasAdjustedRtime(xdata))
265 adjusted <- "Adjusted"
266
267 main <- paste(type, ":", adjusted, "data")
268
269 pdf(pdfname, width = 16, height = 10)
270
271 # Color by group
272 if (length(unique(xdata$sample_group)) < 10) {
273 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
274 }else{
275 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3")
276 }
277 if (length(group_colors) > 1) {
278 names(group_colors) <- unique(xdata$sample_group)
279 plot(chrom, col = group_colors[chrom$sample_group], main = main, peakType = "none")
280 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1)
281 }
282
283 # Color by sample
284 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main = main, peakType = "none")
285 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1)
286
287 dev.off()
288 }
289
290
291 # Get the polarities from all the samples of a condition
292 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
293 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
294 getSampleMetadata <- function(xdata = NULL, sampleMetadataOutput = "sampleMetadata.tsv") {
295 cat("Creating the sampleMetadata file...\n")
296
297 #Create the sampleMetada dataframe
298 sampleMetadata <- xdata@phenoData@data
299 rownames(sampleMetadata) <- NULL
300 colnames(sampleMetadata) <- c("sample_name", "class")
301
302 sampleNamesOrigin <- sampleMetadata$sample_name
303 sampleNamesMakeNames <- make.names(sampleNamesOrigin)
304
305 if (any(duplicated(sampleNamesMakeNames))) {
306 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())
307 for (sampleName in sampleNamesOrigin) {
308 write(paste(sampleName, "\t->\t", make.names(sampleName)), stderr())
309 }
310 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
311 }
312
313 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) {
314 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n")
315 for (sampleName in sampleNamesOrigin) {
316 cat(paste(sampleName, "\t->\t", make.names(sampleName), "\n"))
317 }
318 }
319
320 sampleMetadata$sample_name <- sampleNamesMakeNames
321
322
323 #For each sample file, the following actions are done
324 for (fileIdx in seq_len(length(fileNames(xdata)))) {
325 #Check if the file is in the CDF format
326 if (!mzR:::netCDFIsFile(fileNames(xdata))) {
327
328 # If the column isn't exist, with add one filled with NA
329 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA
330
331 #Extract the polarity (a list of polarities)
332 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx, "polarity"]
333 #Verify if all the scans have the same polarity
334 uniq_list <- unique(polarity)
335 if (length(uniq_list) > 1) {
336 polarity <- "mixed"
337 } else {
338 polarity <- as.character(uniq_list)
339 }
340
341 #Set the polarity attribute
342 sampleMetadata$polarity[fileIdx] <- polarity
343 }
344
345 }
346
347 write.table(sampleMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = sampleMetadataOutput)
348
349 return(list("sampleNamesOrigin" = sampleNamesOrigin, "sampleNamesMakeNames" = sampleNamesMakeNames))
350
351 }
352
353
354 # This function will compute MD5 checksum to check the data integrity
355 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
356 getMd5sum <- function(files) {
357 cat("Compute md5 checksum...\n")
358 library(tools)
359 return(as.matrix(md5sum(files)))
360 }
361
362 # This function retrieve the raw file in the working directory
363 # - if zipfile: unzip the file with its directory tree
364 # - if singlefiles: set symlink with the good filename
365 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
366 retrieveRawfileInTheWorkingDir <- function(singlefile, zipfile, args, prefix = "") {
367
368 if (!(prefix %in% c("", "Positive", "Negative", "MS1", "MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
369
370 # single - if the file are passed in the command arguments -> refresh singlefile
371 if (!is.null(args[[paste0("singlefile_galaxyPath", prefix)]])) {
372 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath", prefix)]], "\\|"))
373 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName", prefix)]], "\\|"))
374
375 singlefile <- NULL
376 for (singlefile_galaxyPath_i in seq_len(length(singlefile_galaxyPaths))) {
377 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
378 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
379 # In case, an url is used to import data within Galaxy
380 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName, "/")), n = 1)
381 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
382 }
383 }
384 # zipfile - if the file are passed in the command arguments -> refresh zipfile
385 if (!is.null(args[[paste0("zipfile", prefix)]]))
386 zipfile <- args[[paste0("zipfile", prefix)]]
387
388 # single
389 if (!is.null(singlefile) && (length("singlefile") > 0)) {
390 files <- vector()
391 for (singlefile_sampleName in names(singlefile)) {
392 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
393 if (!file.exists(singlefile_galaxyPath)) {
394 error_message <- paste("Cannot access the sample:", singlefile_sampleName, "located:", singlefile_galaxyPath, ". Please, contact your administrator ... if you have one!")
395 print(error_message); stop(error_message)
396 }
397
398 if (!suppressWarnings(try(file.link(singlefile_galaxyPath, singlefile_sampleName), silent = T)))
399 file.copy(singlefile_galaxyPath, singlefile_sampleName)
400 files <- c(files, singlefile_sampleName)
401 }
402 }
403 # zipfile
404 if (!is.null(zipfile) && (zipfile != "")) {
405 if (!file.exists(zipfile)) {
406 error_message <- paste("Cannot access the Zip file:", zipfile, ". Please, contact your administrator ... if you have one!")
407 print(error_message)
408 stop(error_message)
409 }
410 suppressWarnings(unzip(zipfile, unzip = "unzip"))
411
412 #get the directory name
413 suppressWarnings(filesInZip <- unzip(zipfile, list = T))
414 directories <- unique(unlist(lapply(strsplit(filesInZip$Name, "/"), function(x) x[1])))
415 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
416 directory <- "."
417 if (length(directories) == 1) directory <- directories
418
419 cat("files_root_directory\t", directory, "\n")
420
421 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
422 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
423 info <- file.info(directory)
424 listed <- list.files(directory[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE)
425 files <- c(directory[!info$isdir], listed)
426 exists <- file.exists(files)
427 files <- files[exists]
428
429 }
430 return(list(zipfile = zipfile, singlefile = singlefile, files = files))
431
432 }
433
434
435 # This function retrieve a xset like object
436 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
437 getxcmsSetObject <- function(xobject) {
438 # XCMS 1.x
439 if (class(xobject) == "xcmsSet")
440 return(xobject)
441 # XCMS 3.x
442 if (class(xobject) == "XCMSnExp") {
443 # Get the legacy xcmsSet object
444 suppressWarnings(xset <- as(xobject, "xcmsSet"))
445 if (!is.null(xset@phenoData$sample_group))
446 sampclass(xset) <- xset@phenoData$sample_group
447 else
448 sampclass(xset) <- "."
449 return(xset)
450 }
451 }