comparison lib.r @ 0:6aea0427511e draft default tip

planemo upload commit 24d44ee26b7c23380c2b42fae2f7f6e58472100d
author workflow4metabolomics
date Sun, 24 Nov 2024 21:28:57 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:6aea0427511e
1 # lib.r
2
3 # @author G. Le Corguille
4 # solve an issue with batch if arguments are logical TRUE/FALSE
5 parseCommandArgs <- function(...) {
6 args <- batch::parseCommandArgs(...)
7 for (key in names(args)) {
8 if (args[key] %in% c("TRUE", "FALSE")) {
9 args[key] <- as.logical(args[key])
10 }
11 }
12 return(args)
13 }
14
15 # @author G. Le Corguille
16 # This function will
17 # - load the packages
18 # - display the sessionInfo
19 loadAndDisplayPackages <- function(pkgs) {
20 for (pkg in pkgs) suppressPackageStartupMessages(stopifnot(library(pkg, quietly = TRUE, logical.return = TRUE, character.only = TRUE)))
21
22 sessioninfo <- sessionInfo()
23 cat(sessioninfo$R.version$version.string, "\n")
24 cat("Main packages:\n")
25 for (pkg in names(sessioninfo$otherPkgs)) {
26 cat(paste(pkg, packageVersion(pkg)), "\t")
27 }
28 cat("\n")
29 cat("Other loaded packages:\n")
30 for (pkg in names(sessioninfo$loadedOnly)) {
31 cat(paste(pkg, packageVersion(pkg)), "\t")
32 }
33 cat("\n")
34 }
35
36 # This function retrieve a xset like object
37 # @author Gildas Le Corguille lecorguille@sb-roscoff.fr
38 getxcmsSetObject <- function(xobject) {
39 # XCMS 1.x
40 if (class(xobject) == "xcmsSet") {
41 return(xobject)
42 }
43 # XCMS 3.x
44 if (class(xobject) == "XCMSnExp") {
45 # Get the legacy xcmsSet object
46 suppressWarnings(xset <- as(xobject, "xcmsSet"))
47 if (is.null(xset@phenoData$sample_group)) {
48 sampclass(xset) <- "."
49 } else {
50 sampclass(xset) <- xset@phenoData$sample_group
51 }
52 if (!is.null(xset@phenoData$sample_name)) {
53 rownames(xset@phenoData) <- xset@phenoData$sample_name
54 }
55 return(xset)
56 }
57 }
58
59 # @author G. Le Corguille
60 # The function create a pdf from the different png generated by diffreport
61 diffreport_png2pdf <- function(filebase) {
62 dir.create("pdf")
63
64 pdfEicOutput <- paste0("pdf/", filebase, "-eic_pdf.pdf")
65 pdfBoxOutput <- paste0("pdf/", filebase, "-box_pdf.pdf")
66
67 system(paste0("gm convert ", filebase, "_eic/*.png ", pdfEicOutput))
68 system(paste0("gm convert ", filebase, "_box/*.png ", pdfBoxOutput))
69 }
70
71 # @author G. Le Corguille
72 # The function create a zip archive from the different png generated by diffreport
73 diffreport_png2zip <- function() {
74 zip("eic.zip", dir(pattern = "_eic"), zip = Sys.which("zip"))
75 zip("box.zip", dir(pattern = "_box"), zip = Sys.which("zip"))
76 }
77
78 # The function create a zip archive from the different tabular generated by diffreport
79 diffreport_tabular2zip <- function() {
80 zip("tabular.zip", dir(pattern = "tabular/*"), zip = Sys.which("zip"))
81 }
82
83 # @author G. Le Corguille
84 # This function convert if it is required the Retention Time in minutes
85 RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
86 if (convertRTMinute) {
87 # converting the retention times (seconds) into minutes
88 print("converting the retention times into minutes in the variableMetadata")
89 variableMetadata[, "rt"] <- variableMetadata[, "rt"] / 60
90 variableMetadata[, "rtmin"] <- variableMetadata[, "rtmin"] / 60
91 variableMetadata[, "rtmax"] <- variableMetadata[, "rtmax"] / 60
92 }
93 return(variableMetadata)
94 }
95
96 # @author G. Le Corguille
97 # This function format ions identifiers
98 formatIonIdentifiers <- function(variableMetadata, numDigitsRT = 0, numDigitsMZ = 0) {
99 splitDeco <- strsplit(as.character(variableMetadata$name), "_")
100 idsDeco <- sapply(splitDeco, function(x) {
101 deco <- unlist(x)[2]
102 if (is.na(deco)) {
103 return("")
104 } else {
105 return(paste0("_", deco))
106 }
107 })
108 namecustom <- make.unique(paste0("M", round(variableMetadata[, "mz"], numDigitsMZ), "T", round(variableMetadata[, "rt"], numDigitsRT), idsDeco))
109 variableMetadata <- cbind(name = variableMetadata$name, namecustom = namecustom, variableMetadata[, !(colnames(variableMetadata) %in% c("name"))])
110 return(variableMetadata)
111 }
112
113 # The function annotateDiffreport without the corr function which bugs
114 annotatediff <- function(xset = xset, args = args, variableMetadataOutput = "variableMetadata.tsv") {
115 # Resolve the bug with x11, with the function png
116 options(bitmapType = "cairo")
117
118 # Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped.
119 res <- try(is.null(xset@filled))
120
121 # ------ annot -------
122 args$calcCiS <- as.logical(args$calcCiS)
123 args$calcIso <- as.logical(args$calcIso)
124 args$calcCaS <- as.logical(args$calcCaS)
125
126 # common parameters
127 args4annotate <- list(
128 object = xset,
129 nSlaves = args$nSlaves, sigma = args$sigma, perfwhm = args$perfwhm,
130 maxcharge = args$maxcharge, maxiso = args$maxiso, minfrac = args$minfrac,
131 ppm = args$ppm, mzabs = args$mzabs, quick = args$quick,
132 polarity = args$polarity, max_peaks = args$max_peaks, intval = args$intval
133 )
134
135 if (args$quick == FALSE) {
136 args4annotate <- append(
137 args4annotate,
138 list(
139 graphMethod = args$graphMethod, cor_eic_th = args$cor_eic_th, pval = args$pval,
140 calcCiS = args$calcCiS, calcIso = args$calcIso, calcCaS = args$calcCaS
141 )
142 )
143 # no ruleset
144 if (!is.null(args$multiplier)) {
145 args4annotate <- append(
146 args4annotate,
147 list(multiplier = args$multiplier)
148 )
149 } else { # ruleset
150 rulset <- read.table(args$rules, h = TRUE, sep = ";")
151 if (ncol(rulset) < 4) rulset <- read.table(args$rules, h = TRUE, sep = "\t")
152 if (ncol(rulset) < 4) rulset <- read.table(args$rules, h = TRUE, sep = ",")
153 if (ncol(rulset) < 4) {
154 error_message <- "Your ruleset file seems not well formatted. The column separators accepted are ; , and tabulation"
155 print(error_message)
156 stop(error_message)
157 }
158
159 args4annotate <- append(
160 args4annotate,
161 list(rules = rulset)
162 )
163 }
164 }
165
166
167 # launch annotate
168 xa <- do.call("annotate", args4annotate)
169 peakList <- getPeaklist(xa, intval = args$intval)
170 peakList <- cbind(groupnames(xa@xcmsSet), peakList)
171 colnames(peakList)[1] <- c("name")
172
173 # --- Multi condition : diffreport ---
174 diffrepOri <- NULL
175 if (!is.null(args$runDiffreport) && nlevels(sampclass(xset)) >= 2) {
176 # Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped.
177 res <- try(is.null(xset@filled))
178 classes <- levels(sampclass(xset))
179 for (i in seq_len(length(classes) - 1)) {
180 for (n in seq_len(length(classes))) {
181 if (i + n <= length(classes)) {
182 filebase <- paste(classes[i], class2 = classes[i + n], sep = "-vs-")
183
184 diffrep <- diffreport(
185 object = xset, class1 = classes[i], class2 = classes[i + n],
186 filebase = filebase, eicmax = args$eicmax, eicwidth = args$eicwidth,
187 sortpval = TRUE, value = args$value, h = args$h, w = args$w, mzdec = args$mzdec, missing = 0
188 )
189
190 diffrepOri <- diffrep
191
192 # renamming of the column rtmed to rt to fit with camera peaklist function output
193 colnames(diffrep)[colnames(diffrep) == "rtmed"] <- "rt"
194 colnames(diffrep)[colnames(diffrep) == "mzmed"] <- "mz"
195
196 # combines results and reorder columns
197 diffrep <- merge(peakList, diffrep[, c("name", "fold", "tstat", "pvalue")], by.x = "name", by.y = "name", sort = FALSE)
198 diffrep <- cbind(diffrep[, !(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))], diffrep[, (colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))])
199
200 diffrep <- RTSecondToMinute(diffrep, args$convertRTMinute)
201 diffrep <- formatIonIdentifiers(diffrep, numDigitsRT = args$numDigitsRT, numDigitsMZ = args$numDigitsMZ)
202
203 if (args$sortpval) {
204 diffrep <- diffrep[order(diffrep$pvalue), ]
205 }
206
207 dir.create("tabular", showWarnings = FALSE)
208 write.table(diffrep, sep = "\t", quote = FALSE, row.names = FALSE, file = paste("tabular/", filebase, "_tsv.tabular", sep = ""))
209
210 if (args$eicmax != 0) {
211 if (args$png2 == "pdf") {
212 diffreport_png2pdf(filebase)
213 }
214 if (args$png2 == "zip") {
215 diffreport_png2zip()
216 }
217 }
218 }
219 }
220 }
221 if (args$tabular2 == "zip") {
222 diffreport_tabular2zip()
223 }
224 }
225
226 # --- variableMetadata ---
227 variableMetadata <- peakList[, !(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))]
228 variableMetadata <- RTSecondToMinute(variableMetadata, args$convertRTMinute)
229 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = args$numDigitsRT, numDigitsMZ = args$numDigitsMZ)
230 # if we have 2 conditions, we keep stat of diffrep
231 if (!is.null(args$runDiffreport) && nlevels(sampclass(xset)) == 2) {
232 variableMetadata <- merge(variableMetadata, diffrep[, c("name", "fold", "tstat", "pvalue")], by.x = "name", by.y = "name", sort = FALSE)
233 if (exists("args[[\"sortpval\"]]")) {
234 variableMetadata <- variableMetadata[order(variableMetadata$pvalue), ]
235 }
236 }
237
238 variableMetadataOri <- variableMetadata
239 write.table(variableMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = variableMetadataOutput)
240
241 return(list("xa" = xa, "diffrep" = diffrepOri, "variableMetadata" = variableMetadataOri))
242 }
243
244
245 combinexsAnnos_function <- function(
246 xaP, xaN, diffrepP = NULL, diffrepN = NULL,
247 pos = TRUE, tol = 2, ruleset = NULL, keep_meta = TRUE, convertRTMinute = FALSE, numDigitsMZ = 0,
248 numDigitsRT = 0, variableMetadataOutput = "variableMetadata.tsv") {
249 # Load the two Rdata to extract the xset objects from positive and negative mode
250 cat("\tObject xset from positive mode\n")
251 print(xaP)
252 cat("\n")
253
254 cat("\tObject xset from negative mode\n")
255 print(xaN)
256 cat("\n")
257
258 cat("\n")
259 cat("\tCombining...\n")
260 # Convert the string to numeric for creating matrix
261 row <- as.numeric(strsplit(ruleset, ",")[[1]][1])
262 column <- as.numeric(strsplit(ruleset, ",")[[1]][2])
263 ruleset <- cbind(row, column)
264 # Test if the file comes from an older version tool
265 if ((!is.null(xaP)) && (!is.null(xaN))) {
266 # Launch the combinexsannos function from CAMERA
267 cAnnot <- combinexsAnnos(xaP, xaN, pos = pos, tol = tol, ruleset = ruleset)
268 } else {
269 stop("You must relauch the CAMERA.annotate step with the lastest version.")
270 }
271
272 if (pos) {
273 xa <- xaP
274 mode <- "neg. Mode"
275 } else {
276 xa <- xaN
277 mode <- "pos. Mode"
278 }
279
280 peakList <- getPeaklist(xa)
281 peakList <- cbind(groupnames(xa@xcmsSet), peakList)
282 colnames(peakList)[1] <- c("name")
283 variableMetadata <- cbind(peakList, cAnnot[, c("isotopes", "adduct", "pcgroup", mode)])
284 variableMetadata <- variableMetadata[, !(colnames(variableMetadata) %in% c(sampnames(xa@xcmsSet)))]
285
286 # Test if there are more than two classes (conditions)
287 if (nlevels(sampclass(xaP@xcmsSet)) == 2 && (!is.null(diffrepN)) && (!is.null(diffrepP))) {
288 diffrepP <- diffrepP[, c("name", "fold", "tstat", "pvalue")]
289 colnames(diffrepP) <- paste("P.", colnames(diffrepP), sep = "")
290 diffrepN <- diffrepN[, c("name", "fold", "tstat", "pvalue")]
291 colnames(diffrepN) <- paste("N.", colnames(diffrepN), sep = "")
292
293 variableMetadata <- merge(variableMetadata, diffrepP, by.x = "name", by.y = "P.name")
294 variableMetadata <- merge(variableMetadata, diffrepN, by.x = "name", by.y = "N.name")
295 }
296
297 rownames(variableMetadata) <- NULL
298 # TODO: checker colnames(variableMetadata)[1:2] = c("name", "mz/rt");
299
300 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute)
301 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = numDigitsRT, numDigitsMZ = numDigitsMZ)
302
303 # If the user want to keep only the metabolites which match a difference
304 if (keep_meta) {
305 variableMetadata <- variableMetadata[variableMetadata[, c(mode)] != "", ]
306 }
307
308 # Write the output into a tsv file
309 write.table(variableMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = variableMetadataOutput)
310 return(variableMetadata)
311 }
312
313 # This function get the raw file path from the arguments
314 getRawfilePathFromArguments <- function(singlefile, zipfile, args) {
315 if (!is.null(args$zipfile)) zipfile <- args$zipfile
316 if (!is.null(args$zipfilePositive)) zipfile <- args$zipfilePositive
317 if (!is.null(args$zipfileNegative)) zipfile <- args$zipfileNegative
318
319 if (!is.null(args$singlefile_galaxyPath)) {
320 singlefile_galaxyPaths <- args$singlefile_galaxyPath
321 singlefile_sampleNames <- args$singlefile_sampleName
322 }
323 if (!is.null(args$singlefile_galaxyPathPositive)) {
324 singlefile_galaxyPaths <- args$singlefile_galaxyPathPositive
325 singlefile_sampleNames <- args$singlefile_sampleNamePositive
326 }
327 if (!is.null(args$singlefile_galaxyPathNegative)) {
328 singlefile_galaxyPaths <- args$singlefile_galaxyPathNegative
329 singlefile_sampleNames <- args$singlefile_sampleNameNegative
330 }
331 if (exists("singlefile_galaxyPaths")) {
332 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths, ","))
333 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames, ","))
334
335 singlefile <- NULL
336 for (singlefile_galaxyPath_i in seq_len(length(singlefile_galaxyPaths))) {
337 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
338 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
339 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
340 }
341 }
342 for (argument in c(
343 "zipfile", "zipfilePositive", "zipfileNegative",
344 "singlefile_galaxyPath", "singlefile_sampleName",
345 "singlefile_galaxyPathPositive", "singlefile_sampleNamePositive",
346 "singlefile_galaxyPathNegative", "singlefile_sampleNameNegative"
347 )) {
348 args[[argument]] <- NULL
349 }
350 return(list(zipfile = zipfile, singlefile = singlefile, args = args))
351 }
352
353
354 # This function retrieve the raw file in the working directory
355 # - if zipfile: unzip the file with its directory tree
356 # - if singlefiles: set symlink with the good filename
357 retrieveRawfileInTheWorkingDir <- function(singlefile, zipfile) {
358 if (!is.null(singlefile) && (length("singlefile") > 0)) {
359 for (singlefile_sampleName in names(singlefile)) {
360 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
361 if (!file.exists(singlefile_galaxyPath)) {
362 error_message <- paste("Cannot access the sample:", singlefile_sampleName, "located:", singlefile_galaxyPath, ". Please, contact your administrator ... if you have one!")
363 print(error_message)
364 stop(error_message)
365 }
366
367 file.symlink(singlefile_galaxyPath, singlefile_sampleName)
368 }
369 directory <- "."
370 }
371 if (!is.null(zipfile) && (zipfile != "")) {
372 if (!file.exists(zipfile)) {
373 error_message <- paste("Cannot access the Zip file:", zipfile, ". Please, contact your administrator ... if you have one!")
374 print(error_message)
375 stop(error_message)
376 }
377
378 # unzip
379 suppressWarnings(unzip(zipfile, unzip = "unzip"))
380
381 # get the directory name
382 filesInZip <- unzip(zipfile, list = TRUE)
383 directories <- unique(unlist(lapply(strsplit(filesInZip$Name, "/"), function(x) x[1])))
384 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
385 directory <- "."
386 if (length(directories) == 1) directory <- directories
387
388 cat("files_root_directory\t", directory, "\n")
389 }
390 return(directory)
391 }