Mercurial > repos > workflow4metabolomics > camera_groupfwhm
comparison lib.r @ 0:6aea0427511e draft default tip
planemo upload commit 24d44ee26b7c23380c2b42fae2f7f6e58472100d
author | workflow4metabolomics |
---|---|
date | Sun, 24 Nov 2024 21:28:57 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:6aea0427511e |
---|---|
1 # lib.r | |
2 | |
3 # @author G. Le Corguille | |
4 # solve an issue with batch if arguments are logical TRUE/FALSE | |
5 parseCommandArgs <- function(...) { | |
6 args <- batch::parseCommandArgs(...) | |
7 for (key in names(args)) { | |
8 if (args[key] %in% c("TRUE", "FALSE")) { | |
9 args[key] <- as.logical(args[key]) | |
10 } | |
11 } | |
12 return(args) | |
13 } | |
14 | |
15 # @author G. Le Corguille | |
16 # This function will | |
17 # - load the packages | |
18 # - display the sessionInfo | |
19 loadAndDisplayPackages <- function(pkgs) { | |
20 for (pkg in pkgs) suppressPackageStartupMessages(stopifnot(library(pkg, quietly = TRUE, logical.return = TRUE, character.only = TRUE))) | |
21 | |
22 sessioninfo <- sessionInfo() | |
23 cat(sessioninfo$R.version$version.string, "\n") | |
24 cat("Main packages:\n") | |
25 for (pkg in names(sessioninfo$otherPkgs)) { | |
26 cat(paste(pkg, packageVersion(pkg)), "\t") | |
27 } | |
28 cat("\n") | |
29 cat("Other loaded packages:\n") | |
30 for (pkg in names(sessioninfo$loadedOnly)) { | |
31 cat(paste(pkg, packageVersion(pkg)), "\t") | |
32 } | |
33 cat("\n") | |
34 } | |
35 | |
36 # This function retrieve a xset like object | |
37 # @author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
38 getxcmsSetObject <- function(xobject) { | |
39 # XCMS 1.x | |
40 if (class(xobject) == "xcmsSet") { | |
41 return(xobject) | |
42 } | |
43 # XCMS 3.x | |
44 if (class(xobject) == "XCMSnExp") { | |
45 # Get the legacy xcmsSet object | |
46 suppressWarnings(xset <- as(xobject, "xcmsSet")) | |
47 if (is.null(xset@phenoData$sample_group)) { | |
48 sampclass(xset) <- "." | |
49 } else { | |
50 sampclass(xset) <- xset@phenoData$sample_group | |
51 } | |
52 if (!is.null(xset@phenoData$sample_name)) { | |
53 rownames(xset@phenoData) <- xset@phenoData$sample_name | |
54 } | |
55 return(xset) | |
56 } | |
57 } | |
58 | |
59 # @author G. Le Corguille | |
60 # The function create a pdf from the different png generated by diffreport | |
61 diffreport_png2pdf <- function(filebase) { | |
62 dir.create("pdf") | |
63 | |
64 pdfEicOutput <- paste0("pdf/", filebase, "-eic_pdf.pdf") | |
65 pdfBoxOutput <- paste0("pdf/", filebase, "-box_pdf.pdf") | |
66 | |
67 system(paste0("gm convert ", filebase, "_eic/*.png ", pdfEicOutput)) | |
68 system(paste0("gm convert ", filebase, "_box/*.png ", pdfBoxOutput)) | |
69 } | |
70 | |
71 # @author G. Le Corguille | |
72 # The function create a zip archive from the different png generated by diffreport | |
73 diffreport_png2zip <- function() { | |
74 zip("eic.zip", dir(pattern = "_eic"), zip = Sys.which("zip")) | |
75 zip("box.zip", dir(pattern = "_box"), zip = Sys.which("zip")) | |
76 } | |
77 | |
78 # The function create a zip archive from the different tabular generated by diffreport | |
79 diffreport_tabular2zip <- function() { | |
80 zip("tabular.zip", dir(pattern = "tabular/*"), zip = Sys.which("zip")) | |
81 } | |
82 | |
83 # @author G. Le Corguille | |
84 # This function convert if it is required the Retention Time in minutes | |
85 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { | |
86 if (convertRTMinute) { | |
87 # converting the retention times (seconds) into minutes | |
88 print("converting the retention times into minutes in the variableMetadata") | |
89 variableMetadata[, "rt"] <- variableMetadata[, "rt"] / 60 | |
90 variableMetadata[, "rtmin"] <- variableMetadata[, "rtmin"] / 60 | |
91 variableMetadata[, "rtmax"] <- variableMetadata[, "rtmax"] / 60 | |
92 } | |
93 return(variableMetadata) | |
94 } | |
95 | |
96 # @author G. Le Corguille | |
97 # This function format ions identifiers | |
98 formatIonIdentifiers <- function(variableMetadata, numDigitsRT = 0, numDigitsMZ = 0) { | |
99 splitDeco <- strsplit(as.character(variableMetadata$name), "_") | |
100 idsDeco <- sapply(splitDeco, function(x) { | |
101 deco <- unlist(x)[2] | |
102 if (is.na(deco)) { | |
103 return("") | |
104 } else { | |
105 return(paste0("_", deco)) | |
106 } | |
107 }) | |
108 namecustom <- make.unique(paste0("M", round(variableMetadata[, "mz"], numDigitsMZ), "T", round(variableMetadata[, "rt"], numDigitsRT), idsDeco)) | |
109 variableMetadata <- cbind(name = variableMetadata$name, namecustom = namecustom, variableMetadata[, !(colnames(variableMetadata) %in% c("name"))]) | |
110 return(variableMetadata) | |
111 } | |
112 | |
113 # The function annotateDiffreport without the corr function which bugs | |
114 annotatediff <- function(xset = xset, args = args, variableMetadataOutput = "variableMetadata.tsv") { | |
115 # Resolve the bug with x11, with the function png | |
116 options(bitmapType = "cairo") | |
117 | |
118 # Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped. | |
119 res <- try(is.null(xset@filled)) | |
120 | |
121 # ------ annot ------- | |
122 args$calcCiS <- as.logical(args$calcCiS) | |
123 args$calcIso <- as.logical(args$calcIso) | |
124 args$calcCaS <- as.logical(args$calcCaS) | |
125 | |
126 # common parameters | |
127 args4annotate <- list( | |
128 object = xset, | |
129 nSlaves = args$nSlaves, sigma = args$sigma, perfwhm = args$perfwhm, | |
130 maxcharge = args$maxcharge, maxiso = args$maxiso, minfrac = args$minfrac, | |
131 ppm = args$ppm, mzabs = args$mzabs, quick = args$quick, | |
132 polarity = args$polarity, max_peaks = args$max_peaks, intval = args$intval | |
133 ) | |
134 | |
135 if (args$quick == FALSE) { | |
136 args4annotate <- append( | |
137 args4annotate, | |
138 list( | |
139 graphMethod = args$graphMethod, cor_eic_th = args$cor_eic_th, pval = args$pval, | |
140 calcCiS = args$calcCiS, calcIso = args$calcIso, calcCaS = args$calcCaS | |
141 ) | |
142 ) | |
143 # no ruleset | |
144 if (!is.null(args$multiplier)) { | |
145 args4annotate <- append( | |
146 args4annotate, | |
147 list(multiplier = args$multiplier) | |
148 ) | |
149 } else { # ruleset | |
150 rulset <- read.table(args$rules, h = TRUE, sep = ";") | |
151 if (ncol(rulset) < 4) rulset <- read.table(args$rules, h = TRUE, sep = "\t") | |
152 if (ncol(rulset) < 4) rulset <- read.table(args$rules, h = TRUE, sep = ",") | |
153 if (ncol(rulset) < 4) { | |
154 error_message <- "Your ruleset file seems not well formatted. The column separators accepted are ; , and tabulation" | |
155 print(error_message) | |
156 stop(error_message) | |
157 } | |
158 | |
159 args4annotate <- append( | |
160 args4annotate, | |
161 list(rules = rulset) | |
162 ) | |
163 } | |
164 } | |
165 | |
166 | |
167 # launch annotate | |
168 xa <- do.call("annotate", args4annotate) | |
169 peakList <- getPeaklist(xa, intval = args$intval) | |
170 peakList <- cbind(groupnames(xa@xcmsSet), peakList) | |
171 colnames(peakList)[1] <- c("name") | |
172 | |
173 # --- Multi condition : diffreport --- | |
174 diffrepOri <- NULL | |
175 if (!is.null(args$runDiffreport) && nlevels(sampclass(xset)) >= 2) { | |
176 # Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped. | |
177 res <- try(is.null(xset@filled)) | |
178 classes <- levels(sampclass(xset)) | |
179 for (i in seq_len(length(classes) - 1)) { | |
180 for (n in seq_len(length(classes))) { | |
181 if (i + n <= length(classes)) { | |
182 filebase <- paste(classes[i], class2 = classes[i + n], sep = "-vs-") | |
183 | |
184 diffrep <- diffreport( | |
185 object = xset, class1 = classes[i], class2 = classes[i + n], | |
186 filebase = filebase, eicmax = args$eicmax, eicwidth = args$eicwidth, | |
187 sortpval = TRUE, value = args$value, h = args$h, w = args$w, mzdec = args$mzdec, missing = 0 | |
188 ) | |
189 | |
190 diffrepOri <- diffrep | |
191 | |
192 # renamming of the column rtmed to rt to fit with camera peaklist function output | |
193 colnames(diffrep)[colnames(diffrep) == "rtmed"] <- "rt" | |
194 colnames(diffrep)[colnames(diffrep) == "mzmed"] <- "mz" | |
195 | |
196 # combines results and reorder columns | |
197 diffrep <- merge(peakList, diffrep[, c("name", "fold", "tstat", "pvalue")], by.x = "name", by.y = "name", sort = FALSE) | |
198 diffrep <- cbind(diffrep[, !(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))], diffrep[, (colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))]) | |
199 | |
200 diffrep <- RTSecondToMinute(diffrep, args$convertRTMinute) | |
201 diffrep <- formatIonIdentifiers(diffrep, numDigitsRT = args$numDigitsRT, numDigitsMZ = args$numDigitsMZ) | |
202 | |
203 if (args$sortpval) { | |
204 diffrep <- diffrep[order(diffrep$pvalue), ] | |
205 } | |
206 | |
207 dir.create("tabular", showWarnings = FALSE) | |
208 write.table(diffrep, sep = "\t", quote = FALSE, row.names = FALSE, file = paste("tabular/", filebase, "_tsv.tabular", sep = "")) | |
209 | |
210 if (args$eicmax != 0) { | |
211 if (args$png2 == "pdf") { | |
212 diffreport_png2pdf(filebase) | |
213 } | |
214 if (args$png2 == "zip") { | |
215 diffreport_png2zip() | |
216 } | |
217 } | |
218 } | |
219 } | |
220 } | |
221 if (args$tabular2 == "zip") { | |
222 diffreport_tabular2zip() | |
223 } | |
224 } | |
225 | |
226 # --- variableMetadata --- | |
227 variableMetadata <- peakList[, !(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))] | |
228 variableMetadata <- RTSecondToMinute(variableMetadata, args$convertRTMinute) | |
229 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = args$numDigitsRT, numDigitsMZ = args$numDigitsMZ) | |
230 # if we have 2 conditions, we keep stat of diffrep | |
231 if (!is.null(args$runDiffreport) && nlevels(sampclass(xset)) == 2) { | |
232 variableMetadata <- merge(variableMetadata, diffrep[, c("name", "fold", "tstat", "pvalue")], by.x = "name", by.y = "name", sort = FALSE) | |
233 if (exists("args[[\"sortpval\"]]")) { | |
234 variableMetadata <- variableMetadata[order(variableMetadata$pvalue), ] | |
235 } | |
236 } | |
237 | |
238 variableMetadataOri <- variableMetadata | |
239 write.table(variableMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = variableMetadataOutput) | |
240 | |
241 return(list("xa" = xa, "diffrep" = diffrepOri, "variableMetadata" = variableMetadataOri)) | |
242 } | |
243 | |
244 | |
245 combinexsAnnos_function <- function( | |
246 xaP, xaN, diffrepP = NULL, diffrepN = NULL, | |
247 pos = TRUE, tol = 2, ruleset = NULL, keep_meta = TRUE, convertRTMinute = FALSE, numDigitsMZ = 0, | |
248 numDigitsRT = 0, variableMetadataOutput = "variableMetadata.tsv") { | |
249 # Load the two Rdata to extract the xset objects from positive and negative mode | |
250 cat("\tObject xset from positive mode\n") | |
251 print(xaP) | |
252 cat("\n") | |
253 | |
254 cat("\tObject xset from negative mode\n") | |
255 print(xaN) | |
256 cat("\n") | |
257 | |
258 cat("\n") | |
259 cat("\tCombining...\n") | |
260 # Convert the string to numeric for creating matrix | |
261 row <- as.numeric(strsplit(ruleset, ",")[[1]][1]) | |
262 column <- as.numeric(strsplit(ruleset, ",")[[1]][2]) | |
263 ruleset <- cbind(row, column) | |
264 # Test if the file comes from an older version tool | |
265 if ((!is.null(xaP)) && (!is.null(xaN))) { | |
266 # Launch the combinexsannos function from CAMERA | |
267 cAnnot <- combinexsAnnos(xaP, xaN, pos = pos, tol = tol, ruleset = ruleset) | |
268 } else { | |
269 stop("You must relauch the CAMERA.annotate step with the lastest version.") | |
270 } | |
271 | |
272 if (pos) { | |
273 xa <- xaP | |
274 mode <- "neg. Mode" | |
275 } else { | |
276 xa <- xaN | |
277 mode <- "pos. Mode" | |
278 } | |
279 | |
280 peakList <- getPeaklist(xa) | |
281 peakList <- cbind(groupnames(xa@xcmsSet), peakList) | |
282 colnames(peakList)[1] <- c("name") | |
283 variableMetadata <- cbind(peakList, cAnnot[, c("isotopes", "adduct", "pcgroup", mode)]) | |
284 variableMetadata <- variableMetadata[, !(colnames(variableMetadata) %in% c(sampnames(xa@xcmsSet)))] | |
285 | |
286 # Test if there are more than two classes (conditions) | |
287 if (nlevels(sampclass(xaP@xcmsSet)) == 2 && (!is.null(diffrepN)) && (!is.null(diffrepP))) { | |
288 diffrepP <- diffrepP[, c("name", "fold", "tstat", "pvalue")] | |
289 colnames(diffrepP) <- paste("P.", colnames(diffrepP), sep = "") | |
290 diffrepN <- diffrepN[, c("name", "fold", "tstat", "pvalue")] | |
291 colnames(diffrepN) <- paste("N.", colnames(diffrepN), sep = "") | |
292 | |
293 variableMetadata <- merge(variableMetadata, diffrepP, by.x = "name", by.y = "P.name") | |
294 variableMetadata <- merge(variableMetadata, diffrepN, by.x = "name", by.y = "N.name") | |
295 } | |
296 | |
297 rownames(variableMetadata) <- NULL | |
298 # TODO: checker colnames(variableMetadata)[1:2] = c("name", "mz/rt"); | |
299 | |
300 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) | |
301 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = numDigitsRT, numDigitsMZ = numDigitsMZ) | |
302 | |
303 # If the user want to keep only the metabolites which match a difference | |
304 if (keep_meta) { | |
305 variableMetadata <- variableMetadata[variableMetadata[, c(mode)] != "", ] | |
306 } | |
307 | |
308 # Write the output into a tsv file | |
309 write.table(variableMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = variableMetadataOutput) | |
310 return(variableMetadata) | |
311 } | |
312 | |
313 # This function get the raw file path from the arguments | |
314 getRawfilePathFromArguments <- function(singlefile, zipfile, args) { | |
315 if (!is.null(args$zipfile)) zipfile <- args$zipfile | |
316 if (!is.null(args$zipfilePositive)) zipfile <- args$zipfilePositive | |
317 if (!is.null(args$zipfileNegative)) zipfile <- args$zipfileNegative | |
318 | |
319 if (!is.null(args$singlefile_galaxyPath)) { | |
320 singlefile_galaxyPaths <- args$singlefile_galaxyPath | |
321 singlefile_sampleNames <- args$singlefile_sampleName | |
322 } | |
323 if (!is.null(args$singlefile_galaxyPathPositive)) { | |
324 singlefile_galaxyPaths <- args$singlefile_galaxyPathPositive | |
325 singlefile_sampleNames <- args$singlefile_sampleNamePositive | |
326 } | |
327 if (!is.null(args$singlefile_galaxyPathNegative)) { | |
328 singlefile_galaxyPaths <- args$singlefile_galaxyPathNegative | |
329 singlefile_sampleNames <- args$singlefile_sampleNameNegative | |
330 } | |
331 if (exists("singlefile_galaxyPaths")) { | |
332 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths, ",")) | |
333 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames, ",")) | |
334 | |
335 singlefile <- NULL | |
336 for (singlefile_galaxyPath_i in seq_len(length(singlefile_galaxyPaths))) { | |
337 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] | |
338 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] | |
339 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath | |
340 } | |
341 } | |
342 for (argument in c( | |
343 "zipfile", "zipfilePositive", "zipfileNegative", | |
344 "singlefile_galaxyPath", "singlefile_sampleName", | |
345 "singlefile_galaxyPathPositive", "singlefile_sampleNamePositive", | |
346 "singlefile_galaxyPathNegative", "singlefile_sampleNameNegative" | |
347 )) { | |
348 args[[argument]] <- NULL | |
349 } | |
350 return(list(zipfile = zipfile, singlefile = singlefile, args = args)) | |
351 } | |
352 | |
353 | |
354 # This function retrieve the raw file in the working directory | |
355 # - if zipfile: unzip the file with its directory tree | |
356 # - if singlefiles: set symlink with the good filename | |
357 retrieveRawfileInTheWorkingDir <- function(singlefile, zipfile) { | |
358 if (!is.null(singlefile) && (length("singlefile") > 0)) { | |
359 for (singlefile_sampleName in names(singlefile)) { | |
360 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] | |
361 if (!file.exists(singlefile_galaxyPath)) { | |
362 error_message <- paste("Cannot access the sample:", singlefile_sampleName, "located:", singlefile_galaxyPath, ". Please, contact your administrator ... if you have one!") | |
363 print(error_message) | |
364 stop(error_message) | |
365 } | |
366 | |
367 file.symlink(singlefile_galaxyPath, singlefile_sampleName) | |
368 } | |
369 directory <- "." | |
370 } | |
371 if (!is.null(zipfile) && (zipfile != "")) { | |
372 if (!file.exists(zipfile)) { | |
373 error_message <- paste("Cannot access the Zip file:", zipfile, ". Please, contact your administrator ... if you have one!") | |
374 print(error_message) | |
375 stop(error_message) | |
376 } | |
377 | |
378 # unzip | |
379 suppressWarnings(unzip(zipfile, unzip = "unzip")) | |
380 | |
381 # get the directory name | |
382 filesInZip <- unzip(zipfile, list = TRUE) | |
383 directories <- unique(unlist(lapply(strsplit(filesInZip$Name, "/"), function(x) x[1]))) | |
384 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] | |
385 directory <- "." | |
386 if (length(directories) == 1) directory <- directories | |
387 | |
388 cat("files_root_directory\t", directory, "\n") | |
389 } | |
390 return(directory) | |
391 } |