comparison lib.r @ 3:6c2798bce3eb draft default tip

planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics/ commit 95721ced8347c09e79340e6d67ecb41c5cc64163
author workflow4metabolomics
date Mon, 03 Feb 2025 14:46:55 +0000
parents 20f8ebc3a391
children
comparison
equal deleted inserted replaced
2:090b33a9981c 3:6c2798bce3eb
1 #@authors ABiMS TEAM, Y. Guitton 1 # @authors ABiMS TEAM, Y. Guitton
2 # lib.r for Galaxy Workflow4Metabolomics xcms tools 2 # lib.r for Galaxy Workflow4Metabolomics xcms tools
3 3
4 #@author G. Le Corguille 4 # @author G. Le Corguille
5 # solve an issue with batch if arguments are logical TRUE/FALSE 5 # solve an issue with batch if arguments are logical TRUE/FALSE
6 parseCommandArgs <- function(...) { 6 parseCommandArgs <- function(...) {
7 args <- batch::parseCommandArgs(...) 7 args <- batch::parseCommandArgs(...)
8 for (key in names(args)) { 8 for (key in names(args)) {
9 if (args[key] %in% c("TRUE", "FALSE")) 9 if (args[key] %in% c("TRUE", "FALSE")) {
10 args[key] <- as.logical(args[key]) 10 args[key] <- as.logical(args[key])
11 } 11 }
12 return(args) 12 }
13 } 13 return(args)
14 14 }
15 #@author G. Le Corguille 15
16 # @author G. Le Corguille
16 # This function will 17 # This function will
17 # - load the packages 18 # - load the packages
18 # - display the sessionInfo 19 # - display the sessionInfo
19 loadAndDisplayPackages <- function(pkgs) { 20 loadAndDisplayPackages <- function(pkgs) {
20 for (pkg in pkgs) suppressPackageStartupMessages(stopifnot(library(pkg, quietly = TRUE, logical.return = TRUE, character.only = TRUE))) 21 for (pkg in pkgs) suppressPackageStartupMessages(stopifnot(library(pkg, quietly = TRUE, logical.return = TRUE, character.only = TRUE)))
21 22
22 sessioninfo <- sessionInfo() 23 sessioninfo <- sessionInfo()
23 cat(sessioninfo$R.version$version.string, "\n") 24 cat(sessioninfo$R.version$version.string, "\n")
24 cat("Main packages:\n") 25 cat("Main packages:\n")
25 for (pkg in names(sessioninfo$otherPkgs)) { 26 for (pkg in names(sessioninfo$otherPkgs)) {
26 cat(paste(pkg, packageVersion(pkg)), "\t") 27 cat(paste(pkg, packageVersion(pkg)), "\t")
27 } 28 }
28 cat("\n") 29 cat("\n")
29 cat("Other loaded packages:\n") 30 cat("Other loaded packages:\n")
30 for (pkg in names(sessioninfo$loadedOnly)) { 31 for (pkg in names(sessioninfo$loadedOnly)) {
31 cat(paste(pkg, packageVersion(pkg)), "\t") 32 cat(paste(pkg, packageVersion(pkg)), "\t")
32 } 33 }
33 cat("\n") 34 cat("\n")
34 } 35 }
35 36
36 #@author G. Le Corguille 37 # @author G. Le Corguille
37 # This function merge several chromBPI or chromTIC into one. 38 # This function merge several chromBPI or chromTIC into one.
38 mergeChrom <- function(chrom_merged, chrom) { 39 mergeChrom <- function(chrom_merged, chrom) {
39 if (is.null(chrom_merged)) return(NULL) 40 if (is.null(chrom_merged)) {
40 chrom_merged@.Data <- cbind(chrom_merged@.Data, chrom@.Data) 41 return(NULL)
41 return(chrom_merged) 42 }
42 } 43 chrom_merged@.Data <- cbind(chrom_merged@.Data, chrom@.Data)
43 44 return(chrom_merged)
44 #@author G. Le Corguille 45 }
46
47 # @author G. Le Corguille
45 # This function merge several xdata into one. 48 # This function merge several xdata into one.
46 mergeXData <- function(args) { 49 mergeXData <- function(args) {
47 chromTIC <- NULL 50 chromTIC <- NULL
48 chromBPI <- NULL 51 chromBPI <- NULL
49 chromTIC_adjusted <- NULL 52 chromTIC_adjusted <- NULL
50 chromBPI_adjusted <- NULL 53 chromBPI_adjusted <- NULL
51 md5sumList <- NULL 54 md5sumList <- NULL
52 for (image in args$images) { 55 for (image in args$images) {
53 56 load(image)
54 load(image) 57 # Handle infiles
55 # Handle infiles 58 if (!exists("singlefile")) singlefile <- NULL
56 if (!exists("singlefile")) singlefile <- NULL 59 if (!exists("zipfile")) zipfile <- NULL
57 if (!exists("zipfile")) zipfile <- NULL 60 rawFilePath <- retrieveRawfileInTheWorkingDir(singlefile, zipfile, args)
58 rawFilePath <- retrieveRawfileInTheWorkingDir(singlefile, zipfile, args) 61 zipfile <- rawFilePath$zipfile
59 zipfile <- rawFilePath$zipfile 62 singlefile <- rawFilePath$singlefile
60 singlefile <- rawFilePath$singlefile 63
61 64 if (exists("raw_data")) xdata <- raw_data
62 if (exists("raw_data")) xdata <- raw_data 65 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*")
63 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") 66
64 67 cat(sampleNamesList$sampleNamesOrigin, "\n")
65 cat(sampleNamesList$sampleNamesOrigin, "\n") 68
66 69 if (!exists("xdata_merged")) {
67 if (!exists("xdata_merged")) { 70 xdata_merged <- xdata
68 xdata_merged <- xdata 71 singlefile_merged <- singlefile
69 singlefile_merged <- singlefile 72 md5sumList_merged <- md5sumList
70 md5sumList_merged <- md5sumList 73 sampleNamesList_merged <- sampleNamesList
71 sampleNamesList_merged <- sampleNamesList 74 chromTIC_merged <- chromTIC
72 chromTIC_merged <- chromTIC 75 chromBPI_merged <- chromBPI
73 chromBPI_merged <- chromBPI 76 chromTIC_adjusted_merged <- chromTIC_adjusted
74 chromTIC_adjusted_merged <- chromTIC_adjusted 77 chromBPI_adjusted_merged <- chromBPI_adjusted
75 chromBPI_adjusted_merged <- chromBPI_adjusted 78 } else {
76 } else { 79 if (is(xdata, "XCMSnExp")) {
77 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged, xdata) 80 xdata_merged <- c(xdata_merged, xdata)
78 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- xcms:::.concatenate_OnDiskMSnExp(xdata_merged, xdata) 81 } else if (is(xdata, "OnDiskMSnExp")) {
79 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata") 82 xdata_merged <- xcms:::.concatenate_OnDiskMSnExp(xdata_merged, xdata)
80 83 } else {
81 singlefile_merged <- c(singlefile_merged, singlefile) 84 stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata")
82 md5sumList_merged$origin <- rbind(md5sumList_merged$origin, md5sumList$origin) 85 }
83 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin, sampleNamesList$sampleNamesOrigin) 86
84 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames, sampleNamesList$sampleNamesMakeNames) 87 singlefile_merged <- c(singlefile_merged, singlefile)
85 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC) 88 md5sumList_merged$origin <- rbind(md5sumList_merged$origin, md5sumList$origin)
86 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI) 89 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin, sampleNamesList$sampleNamesOrigin)
87 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted) 90 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames, sampleNamesList$sampleNamesMakeNames)
88 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted) 91 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC)
89 } 92 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI)
90 } 93 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted)
91 rm(image) 94 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted)
92 xdata <- xdata_merged 95 }
93 rm(xdata_merged) 96 }
94 singlefile <- singlefile_merged 97 rm(image)
95 rm(singlefile_merged) 98 xdata <- xdata_merged
96 md5sumList <- md5sumList_merged 99 rm(xdata_merged)
97 rm(md5sumList_merged) 100 singlefile <- singlefile_merged
98 sampleNamesList <- sampleNamesList_merged 101 rm(singlefile_merged)
99 rm(sampleNamesList_merged) 102 md5sumList <- md5sumList_merged
100 103 rm(md5sumList_merged)
101 if (!is.null(args$sampleMetadata)) { 104 sampleNamesList <- sampleNamesList_merged
102 cat("\tXSET PHENODATA SETTING...\n") 105 rm(sampleNamesList_merged)
103 sampleMetadataFile <- args$sampleMetadata 106
104 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header = FALSE) 107 if (!is.null(args$sampleMetadata)) {
105 xdata@phenoData@data$sample_group <- sampleMetadata$V2[match(xdata@phenoData@data$sample_name, sampleMetadata$V1)] 108 cat("\tXSET PHENODATA SETTING...\n")
106 109 sampleMetadataFile <- args$sampleMetadata
107 if (any(is.na(pData(xdata)$sample_group))) { 110 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header = FALSE)
108 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)] 111 xdata@phenoData@data$sample_group <- sampleMetadata$V2[match(xdata@phenoData@data$sample_name, sampleMetadata$V1)]
109 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse = " ")) 112
110 print(error_message) 113 if (any(is.na(pData(xdata)$sample_group))) {
111 stop(error_message) 114 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)]
112 } 115 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse = " "))
113 } 116 print(error_message)
114 117 stop(error_message)
115 if (!is.null(chromTIC_merged)) { 118 }
116 chromTIC <- chromTIC_merged 119 }
117 chromTIC@phenoData <- xdata@phenoData 120
118 } 121 if (!is.null(chromTIC_merged)) {
119 if (!is.null(chromBPI_merged)) { 122 chromTIC <- chromTIC_merged
120 chromBPI <- chromBPI_merged 123 chromTIC@phenoData <- xdata@phenoData
121 chromBPI@phenoData <- xdata@phenoData 124 }
122 } 125 if (!is.null(chromBPI_merged)) {
123 if (!is.null(chromTIC_adjusted_merged)) { 126 chromBPI <- chromBPI_merged
124 chromTIC_adjusted <- chromTIC_adjusted_merged 127 chromBPI@phenoData <- xdata@phenoData
125 chromTIC_adjusted@phenoData <- xdata@phenoData 128 }
126 } 129 if (!is.null(chromTIC_adjusted_merged)) {
127 if (!is.null(chromBPI_adjusted_merged)) { 130 chromTIC_adjusted <- chromTIC_adjusted_merged
128 chromBPI_adjusted <- chromBPI_adjusted_merged 131 chromTIC_adjusted@phenoData <- xdata@phenoData
129 chromBPI_adjusted@phenoData <- xdata@phenoData 132 }
130 } 133 if (!is.null(chromBPI_adjusted_merged)) {
131 134 chromBPI_adjusted <- chromBPI_adjusted_merged
132 return(list("xdata" = xdata, "singlefile" = singlefile, "md5sumList" = md5sumList, "sampleNamesList" = sampleNamesList, "chromTIC" = chromTIC, "chromBPI" = chromBPI, "chromTIC_adjusted" = chromTIC_adjusted, "chromBPI_adjusted" = chromBPI_adjusted)) 135 chromBPI_adjusted@phenoData <- xdata@phenoData
133 } 136 }
134 137
135 #@author G. Le Corguille 138 return(list("xdata" = xdata, "singlefile" = singlefile, "md5sumList" = md5sumList, "sampleNamesList" = sampleNamesList, "chromTIC" = chromTIC, "chromBPI" = chromBPI, "chromTIC_adjusted" = chromTIC_adjusted, "chromBPI_adjusted" = chromBPI_adjusted))
139 }
140
141 # @author G. Le Corguille
136 # This function convert if it is required the Retention Time in minutes 142 # This function convert if it is required the Retention Time in minutes
137 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { 143 RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
138 if (convertRTMinute) { 144 if (convertRTMinute) {
139 #converting the retention times (seconds) into minutes 145 # converting the retention times (seconds) into minutes
140 print("converting the retention times into minutes in the variableMetadata") 146 print("converting the retention times into minutes in the variableMetadata")
141 variableMetadata[, "rt"] <- variableMetadata[, "rt"] / 60 147 variableMetadata[, "rt"] <- variableMetadata[, "rt"] / 60
142 variableMetadata[, "rtmin"] <- variableMetadata[, "rtmin"] / 60 148 variableMetadata[, "rtmin"] <- variableMetadata[, "rtmin"] / 60
143 variableMetadata[, "rtmax"] <- variableMetadata[, "rtmax"] / 60 149 variableMetadata[, "rtmax"] <- variableMetadata[, "rtmax"] / 60
144 } 150 }
145 return(variableMetadata) 151 return(variableMetadata)
146 } 152 }
147 153
148 #@author G. Le Corguille 154 # @author G. Le Corguille
149 # This function format ions identifiers 155 # This function format ions identifiers
150 formatIonIdentifiers <- function(variableMetadata, numDigitsRT = 0, numDigitsMZ = 0) { 156 formatIonIdentifiers <- function(variableMetadata, numDigitsRT = 0, numDigitsMZ = 0) {
151 splitDeco <- strsplit(as.character(variableMetadata$name), "_") 157 splitDeco <- strsplit(as.character(variableMetadata$name), "_")
152 idsDeco <- sapply(splitDeco, 158 idsDeco <- sapply(
153 function(x) { 159 splitDeco,
154 deco <- unlist(x)[2] 160 function(x) {
155 if (is.na(deco)) return("") else return(paste0("_", deco)) 161 deco <- unlist(x)[2]
156 } 162 if (is.na(deco)) {
157 ) 163 return("")
158 namecustom <- make.unique(paste0("M", round(variableMetadata[, "mz"], numDigitsMZ), "T", round(variableMetadata[, "rt"], numDigitsRT), idsDeco)) 164 } else {
159 variableMetadata <- cbind(name = variableMetadata$name, namecustom = namecustom, variableMetadata[, !(colnames(variableMetadata) %in% c("name"))]) 165 return(paste0("_", deco))
160 return(variableMetadata) 166 }
161 } 167 }
162 168 )
163 #@author G. Le Corguille 169 namecustom <- make.unique(paste0("M", round(variableMetadata[, "mz"], numDigitsMZ), "T", round(variableMetadata[, "rt"], numDigitsRT), idsDeco))
170 variableMetadata <- cbind(name = variableMetadata$name, namecustom = namecustom, variableMetadata[, !(colnames(variableMetadata) %in% c("name"))])
171 return(variableMetadata)
172 }
173
174 # @author G. Le Corguille
164 # This function convert the remain NA to 0 in the dataMatrix 175 # This function convert the remain NA to 0 in the dataMatrix
165 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) { 176 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) {
166 if (naTOzero) { 177 if (naTOzero) {
167 dataMatrix[is.na(dataMatrix)] <- 0 178 dataMatrix[is.na(dataMatrix)] <- 0
168 } 179 }
169 return(dataMatrix) 180 return(dataMatrix)
170 } 181 }
171 182
172 #@author G. Le Corguille 183 # @author G. Le Corguille
173 # Draw the plotChromPeakDensity 3 per page in a pdf file 184 # Draw the plotChromPeakDensity 3 per page in a pdf file
174 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit = 4) { 185 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit = 4) {
175 pdf(file = "plotChromPeakDensity.pdf", width = 16, height = 12) 186 pdf(file = "plotChromPeakDensity.pdf", width = 16, height = 12)
176 187
177 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) 188 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5))
178 189
179 if (length(unique(xdata$sample_group)) < 10) { 190 if (length(unique(xdata$sample_group)) < 10) {
180 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") 191 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
181 } else { 192 } else {
182 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3") 193 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3")
183 } 194 }
184 names(group_colors) <- unique(xdata$sample_group) 195 names(group_colors) <- unique(xdata$sample_group)
185 col_per_samp <- as.character(xdata$sample_group) 196 col_per_samp <- as.character(xdata$sample_group)
186 for (i in seq_len(length(group_colors))) { 197 for (i in seq_len(length(group_colors))) {
187 col_per_samp[col_per_samp == (names(group_colors)[i])] <- group_colors[i] 198 col_per_samp[col_per_samp == (names(group_colors)[i])] <- group_colors[i]
188 } 199 }
189 200
190 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) 201 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax))
191 for (i in seq_len(nrow(featureDefinitions(xdata)))) { 202 for (i in seq_len(nrow(featureDefinitions(xdata)))) {
192 mzmin <- featureDefinitions(xdata)[i, ]$mzmin 203 mzmin <- featureDefinitions(xdata)[i, ]$mzmin
193 mzmax <- featureDefinitions(xdata)[i, ]$mzmax 204 mzmax <- featureDefinitions(xdata)[i, ]$mzmax
194 plotChromPeakDensity(xdata, param = param, mz = c(mzmin, mzmax), col = col_per_samp, pch = 16, xlim = xlim, main = paste(round(mzmin, mzdigit), round(mzmax, mzdigit))) 205 plotChromPeakDensity(xdata, param = param, mz = c(mzmin, mzmax), col = col_per_samp, pch = 16, xlim = xlim, main = paste(round(mzmin, mzdigit), round(mzmax, mzdigit)))
195 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1) 206 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1)
196 } 207 }
197 208
198 dev.off() 209 dev.off()
199 } 210 }
200 211
201 #@author G. Le Corguille 212 # @author G. Le Corguille
202 # Draw the plotChromPeakDensity 3 per page in a pdf file 213 # Draw the plotChromPeakDensity 3 per page in a pdf file
203 getPlotAdjustedRtime <- function(xdata) { 214 getPlotAdjustedRtime <- function(xdata) {
204 215 pdf(file = "raw_vs_adjusted_rt.pdf", width = 16, height = 12)
205 pdf(file = "raw_vs_adjusted_rt.pdf", width = 16, height = 12) 216
206 217 # Color by group
207 # Color by group 218 if (length(unique(xdata$sample_group)) < 10) {
208 if (length(unique(xdata$sample_group)) < 10) { 219 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
209 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") 220 } else {
210 } else { 221 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3")
211 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3") 222 }
212 } 223 if (length(group_colors) > 1) {
213 if (length(group_colors) > 1) { 224 names(group_colors) <- unique(xdata$sample_group)
214 names(group_colors) <- unique(xdata$sample_group) 225 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group])
215 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) 226 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1)
216 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1) 227 }
217 } 228
218 229 # Color by sample
219 # Color by sample 230 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name)))
220 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) 231 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1)
221 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1) 232
222 233 dev.off()
223 dev.off() 234 }
224 } 235
225 236 # @author G. Le Corguille
226 #@author G. Le Corguille
227 # value: intensity values to be used into, maxo or intb 237 # value: intensity values to be used into, maxo or intb
228 getPeaklistW4M <- function(xdata, intval = "into", convertRTMinute = FALSE, numDigitsMZ = 4, numDigitsRT = 0, naTOzero = TRUE, variableMetadataOutput, dataMatrixOutput, sampleNamesList) { 238 getPeaklistW4M <- function(xdata, intval = "into", convertRTMinute = FALSE, numDigitsMZ = 4, numDigitsRT = 0, naTOzero = TRUE, variableMetadataOutput, dataMatrixOutput, sampleNamesList) {
229 dataMatrix <- featureValues(xdata, method = "medret", value = intval) 239 dataMatrix <- featureValues(xdata, method = "medret", value = intval)
230 colnames(dataMatrix) <- make.names(tools::file_path_sans_ext(colnames(dataMatrix))) 240 colnames(dataMatrix) <- make.names(tools::file_path_sans_ext(colnames(dataMatrix)))
231 dataMatrix <- cbind(name = groupnames(xdata), dataMatrix) 241 dataMatrix <- cbind(name = groupnames(xdata), dataMatrix)
232 variableMetadata <- featureDefinitions(xdata) 242 variableMetadata <- featureDefinitions(xdata)
233 colnames(variableMetadata)[1] <- "mz" 243 colnames(variableMetadata)[1] <- "mz"
234 colnames(variableMetadata)[4] <- "rt" 244 colnames(variableMetadata)[4] <- "rt"
235 variableMetadata <- data.frame(name = groupnames(xdata), variableMetadata) 245 variableMetadata <- data.frame(name = groupnames(xdata), variableMetadata)
236 246
237 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) 247 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute)
238 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = numDigitsRT, numDigitsMZ = numDigitsMZ) 248 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = numDigitsRT, numDigitsMZ = numDigitsMZ)
239 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero) 249 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero)
240 250
241 # FIX: issue when the vector at peakidx is too long and is written in a new line during the export 251 # FIX: issue when the vector at peakidx is too long and is written in a new line during the export
242 variableMetadata[, "peakidx"] <- vapply(variableMetadata[, "peakidx"], FUN = paste, FUN.VALUE = character(1), collapse = ",") 252 variableMetadata[, "peakidx"] <- vapply(variableMetadata[, "peakidx"], FUN = paste, FUN.VALUE = character(1), collapse = ",")
243 253
244 write.table(variableMetadata, file = variableMetadataOutput, sep = "\t", quote = FALSE, row.names = FALSE) 254 write.table(variableMetadata, file = variableMetadataOutput, sep = "\t", quote = FALSE, row.names = FALSE)
245 write.table(dataMatrix, file = dataMatrixOutput, sep = "\t", quote = FALSE, row.names = FALSE) 255 write.table(dataMatrix, file = dataMatrixOutput, sep = "\t", quote = FALSE, row.names = FALSE)
246 256 }
247 } 257
248 258 # @author G. Le Corguille
249 #@author G. Le Corguille
250 # It allow different of field separators 259 # It allow different of field separators
251 getDataFrameFromFile <- function(filename, header = TRUE) { 260 getDataFrameFromFile <- function(filename, header = TRUE) {
252 myDataFrame <- read.table(filename, header = header, sep = ";", stringsAsFactors = FALSE) 261 myDataFrame <- read.table(filename, header = header, sep = ";", stringsAsFactors = FALSE)
253 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = "\t", stringsAsFactors = FALSE) 262 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = "\t", stringsAsFactors = FALSE)
254 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = ",", stringsAsFactors = FALSE) 263 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = ",", stringsAsFactors = FALSE)
255 if (ncol(myDataFrame) < 2) { 264 if (ncol(myDataFrame) < 2) {
256 error_message <- "Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation" 265 error_message <- "Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation"
257 print(error_message) 266 print(error_message)
258 stop(error_message) 267 stop(error_message)
259 } 268 }
260 return(myDataFrame) 269 return(myDataFrame)
261 } 270 }
262 271
263 #@author G. Le Corguille 272 # @author G. Le Corguille
264 # Draw the BPI and TIC graphics 273 # Draw the BPI and TIC graphics
265 # colored by sample names or class names 274 # colored by sample names or class names
266 getPlotChromatogram <- function(chrom, xdata, pdfname = "Chromatogram.pdf", aggregationFun = "max") { 275 getPlotChromatogram <- function(chrom, xdata, pdfname = "Chromatogram.pdf", aggregationFun = "max") {
267 276 if (aggregationFun == "sum") {
268 if (aggregationFun == "sum") 277 type <- "Total Ion Chromatograms"
269 type <- "Total Ion Chromatograms" 278 } else {
270 else 279 type <- "Base Peak Intensity Chromatograms"
271 type <- "Base Peak Intensity Chromatograms" 280 }
272 281
273 adjusted <- "Raw" 282 adjusted <- "Raw"
274 if (hasAdjustedRtime(xdata)) 283 if (hasAdjustedRtime(xdata)) {
275 adjusted <- "Adjusted" 284 adjusted <- "Adjusted"
276 285 }
277 main <- paste(type, ":", adjusted, "data") 286
278 287 main <- paste(type, ":", adjusted, "data")
279 pdf(pdfname, width = 16, height = 10) 288
280 289 pdf(pdfname, width = 16, height = 10)
281 # Color by group 290
282 if (length(unique(xdata$sample_group)) < 10) { 291 # Color by group
283 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") 292 if (length(unique(xdata$sample_group)) < 10) {
284 } else { 293 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
285 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3") 294 } else {
286 } 295 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3")
287 if (length(group_colors) > 1) { 296 }
288 names(group_colors) <- unique(xdata$sample_group) 297 if (length(group_colors) > 1) {
289 plot(chrom, col = group_colors[chrom$sample_group], main = main, peakType = "none") 298 names(group_colors) <- unique(xdata$sample_group)
290 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1) 299 plot(chrom, col = group_colors[chrom$sample_group], main = main, peakType = "none")
291 } 300 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1)
292 301 }
293 # Color by sample 302
294 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main = main, peakType = "none") 303 # Color by sample
295 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1) 304 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main = main, peakType = "none")
296 305 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1)
297 dev.off() 306
307 dev.off()
298 } 308 }
299 309
300 310
301 # Get the polarities from all the samples of a condition 311 # Get the polarities from all the samples of a condition
302 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 312 # @author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
303 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM 313 # @author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
304 getSampleMetadata <- function(xdata = NULL, sampleMetadataOutput = "sampleMetadata.tsv") { 314 getSampleMetadata <- function(xdata = NULL, sampleMetadataOutput = "sampleMetadata.tsv") {
305 cat("Creating the sampleMetadata file...\n") 315 cat("Creating the sampleMetadata file...\n")
306 316
307 #Create the sampleMetada dataframe 317 # Create the sampleMetada dataframe
308 sampleMetadata <- xdata@phenoData@data 318 sampleMetadata <- xdata@phenoData@data
309 rownames(sampleMetadata) <- NULL 319 rownames(sampleMetadata) <- NULL
310 colnames(sampleMetadata) <- c("sample_name", "class") 320 colnames(sampleMetadata) <- c("sample_name", "class")
311 321
312 sampleNamesOrigin <- sampleMetadata$sample_name 322 sampleNamesOrigin <- sampleMetadata$sample_name
313 sampleNamesMakeNames <- make.names(sampleNamesOrigin) 323 sampleNamesMakeNames <- make.names(sampleNamesOrigin)
314 324
315 if (any(duplicated(sampleNamesMakeNames))) { 325 if (any(duplicated(sampleNamesMakeNames))) {
316 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) 326 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())
317 for (sampleName in sampleNamesOrigin) { 327 for (sampleName in sampleNamesOrigin) {
318 write(paste(sampleName, "\t->\t", make.names(sampleName)), stderr()) 328 write(paste(sampleName, "\t->\t", make.names(sampleName)), stderr())
319 } 329 }
320 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") 330 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
321 } 331 }
322 332
323 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { 333 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) {
324 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") 334 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n")
325 for (sampleName in sampleNamesOrigin) { 335 for (sampleName in sampleNamesOrigin) {
326 cat(paste(sampleName, "\t->\t", make.names(sampleName), "\n")) 336 cat(paste(sampleName, "\t->\t", make.names(sampleName), "\n"))
327 } 337 }
328 } 338 }
329 339
330 sampleMetadata$sample_name <- sampleNamesMakeNames 340 sampleMetadata$sample_name <- sampleNamesMakeNames
331 341
332 342
333 #For each sample file, the following actions are done 343 # For each sample file, the following actions are done
334 for (fileIdx in seq_len(length(fileNames(xdata)))) { 344 for (fileIdx in seq_len(length(fileNames(xdata)))) {
335 #Check if the file is in the CDF format 345 # Check if the file is in the CDF format
336 if (!mzR:::netCDFIsFile(fileNames(xdata))) { 346 if (!mzR:::netCDFIsFile(fileNames(xdata))) {
337 347 # If the column isn't exist, with add one filled with NA
338 # If the column isn't exist, with add one filled with NA 348 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA
339 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA 349
340 350 # Extract the polarity (a list of polarities)
341 #Extract the polarity (a list of polarities) 351 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx, "polarity"]
342 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx, "polarity"] 352 # Verify if all the scans have the same polarity
343 #Verify if all the scans have the same polarity 353 uniq_list <- unique(polarity)
344 uniq_list <- unique(polarity) 354 if (length(uniq_list) > 1) {
345 if (length(uniq_list) > 1) { 355 polarity <- "mixed"
346 polarity <- "mixed" 356 } else {
347 } else { 357 polarity <- as.character(uniq_list)
348 polarity <- as.character(uniq_list) 358 }
349 } 359
350 360 # Set the polarity attribute
351 #Set the polarity attribute 361 sampleMetadata$polarity[fileIdx] <- polarity
352 sampleMetadata$polarity[fileIdx] <- polarity 362 }
353 } 363 }
354 364
355 } 365 write.table(sampleMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = sampleMetadataOutput)
356 366
357 write.table(sampleMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = sampleMetadataOutput) 367 return(list("sampleNamesOrigin" = sampleNamesOrigin, "sampleNamesMakeNames" = sampleNamesMakeNames))
358
359 return(list("sampleNamesOrigin" = sampleNamesOrigin, "sampleNamesMakeNames" = sampleNamesMakeNames))
360
361 } 368 }
362 369
363 370
364 # This function will compute MD5 checksum to check the data integrity 371 # This function will compute MD5 checksum to check the data integrity
365 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 372 # @author Gildas Le Corguille lecorguille@sb-roscoff.fr
366 getMd5sum <- function(files) { 373 getMd5sum <- function(files) {
367 cat("Compute md5 checksum...\n") 374 cat("Compute md5 checksum...\n")
368 library(tools) 375 library(tools)
369 return(as.matrix(md5sum(files))) 376 return(as.matrix(md5sum(files)))
370 } 377 }
371 378
372 # This function retrieve the raw file in the working directory 379 # This function retrieve the raw file in the working directory
373 # - if zipfile: unzip the file with its directory tree 380 # - if zipfile: unzip the file with its directory tree
374 # - if singlefiles: set symlink with the good filename 381 # - if singlefiles: set symlink with the good filename
375 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 382 # @author Gildas Le Corguille lecorguille@sb-roscoff.fr
376 retrieveRawfileInTheWorkingDir <- function(singlefile, zipfile, args, prefix = "") { 383 retrieveRawfileInTheWorkingDir <- function(singlefile, zipfile, args, prefix = "") {
377 384 if (!(prefix %in% c("", "Positive", "Negative", "MS1", "MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
378 if (!(prefix %in% c("", "Positive", "Negative", "MS1", "MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") 385
379 386 # single - if the file are passed in the command arguments -> refresh singlefile
380 # single - if the file are passed in the command arguments -> refresh singlefile 387 if (!is.null(args[[paste0("singlefile_galaxyPath", prefix)]])) {
381 if (!is.null(args[[paste0("singlefile_galaxyPath", prefix)]])) { 388 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath", prefix)]], "\\|"))
382 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath", prefix)]], "\\|")) 389 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName", prefix)]], "\\|"))
383 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName", prefix)]], "\\|")) 390
384 391 singlefile <- NULL
385 singlefile <- NULL 392 for (singlefile_galaxyPath_i in seq_len(length(singlefile_galaxyPaths))) {
386 for (singlefile_galaxyPath_i in seq_len(length(singlefile_galaxyPaths))) { 393 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
387 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] 394 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
388 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] 395 # In case, an url is used to import data within Galaxy
389 # In case, an url is used to import data within Galaxy 396 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName, "/")), n = 1)
390 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName, "/")), n = 1) 397 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
391 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath 398 }
392 } 399 }
393 } 400 # zipfile - if the file are passed in the command arguments -> refresh zipfile
394 # zipfile - if the file are passed in the command arguments -> refresh zipfile 401 if (!is.null(args[[paste0("zipfile", prefix)]])) {
395 if (!is.null(args[[paste0("zipfile", prefix)]])) 402 zipfile <- args[[paste0("zipfile", prefix)]]
396 zipfile <- args[[paste0("zipfile", prefix)]] 403 }
397 404
398 # single 405 # single
399 if (!is.null(singlefile) && (length("singlefile") > 0)) { 406 if (!is.null(singlefile) && (length("singlefile") > 0)) {
400 files <- vector() 407 files <- vector()
401 for (singlefile_sampleName in names(singlefile)) { 408 for (singlefile_sampleName in names(singlefile)) {
402 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] 409 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
403 if (!file.exists(singlefile_galaxyPath)) { 410 if (!file.exists(singlefile_galaxyPath)) {
404 error_message <- paste("Cannot access the sample:", singlefile_sampleName, "located:", singlefile_galaxyPath, ". Please, contact your administrator ... if you have one!") 411 error_message <- paste("Cannot access the sample:", singlefile_sampleName, "located:", singlefile_galaxyPath, ". Please, contact your administrator ... if you have one!")
405 print(error_message) 412 print(error_message)
406 stop(error_message) 413 stop(error_message)
407 } 414 }
408 415
409 if (!suppressWarnings(try(file.link(singlefile_galaxyPath, singlefile_sampleName), silent = TRUE))) 416 if (!suppressWarnings(try(file.link(singlefile_galaxyPath, singlefile_sampleName), silent = TRUE))) {
410 file.copy(singlefile_galaxyPath, singlefile_sampleName) 417 file.copy(singlefile_galaxyPath, singlefile_sampleName)
411 files <- c(files, singlefile_sampleName) 418 }
412 } 419 files <- c(files, singlefile_sampleName)
413 } 420 }
414 # zipfile 421 }
415 if (!is.null(zipfile) && (zipfile != "")) { 422 # zipfile
416 if (!file.exists(zipfile)) { 423 if (!is.null(zipfile) && (zipfile != "")) {
417 error_message <- paste("Cannot access the Zip file:", zipfile, ". Please, contact your administrator ... if you have one!") 424 if (!file.exists(zipfile)) {
418 print(error_message) 425 error_message <- paste("Cannot access the Zip file:", zipfile, ". Please, contact your administrator ... if you have one!")
419 stop(error_message) 426 print(error_message)
420 } 427 stop(error_message)
421 suppressWarnings(unzip(zipfile, unzip = "unzip")) 428 }
422 429 suppressWarnings(unzip(zipfile, unzip = "unzip"))
423 #get the directory name 430
424 suppressWarnings(filesInZip <- unzip(zipfile, list = TRUE)) 431 # get the directory name
425 directories <- unique(unlist(lapply(strsplit(filesInZip$Name, "/"), function(x) x[1]))) 432 suppressWarnings(filesInZip <- unzip(zipfile, list = TRUE))
426 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] 433 directories <- unique(unlist(lapply(strsplit(filesInZip$Name, "/"), function(x) x[1])))
427 directory <- "." 434 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
428 if (length(directories) == 1) directory <- directories 435 directory <- "."
429 436 if (length(directories) == 1) directory <- directories
430 cat("files_root_directory\t", directory, "\n") 437
431 438 cat("files_root_directory\t", directory, "\n")
432 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") 439
433 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") 440 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
434 info <- file.info(directory) 441 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
435 listed <- list.files(directory[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) 442 info <- file.info(directory)
436 files <- c(directory[!info$isdir], listed) 443 listed <- list.files(directory[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE)
437 exists <- file.exists(files) 444 files <- c(directory[!info$isdir], listed)
438 files <- files[exists] 445 exists <- file.exists(files)
439 446 files <- files[exists]
440 } 447 }
441 return(list(zipfile = zipfile, singlefile = singlefile, files = files)) 448 return(list(zipfile = zipfile, singlefile = singlefile, files = files))
442 } 449 }
443 450
444 451
445 # This function retrieve a xset like object 452 # This function retrieve a xset like object
446 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 453 # @author Gildas Le Corguille lecorguille@sb-roscoff.fr
447 getxcmsSetObject <- function(xobject) { 454 getxcmsSetObject <- function(xobject) {
448 # XCMS 1.x 455 # XCMS 1.x
449 if (class(xobject) == "xcmsSet") 456 if (class(xobject) == "xcmsSet") {
450 return(xobject) 457 return(xobject)
451 # XCMS 3.x 458 }
452 if (class(xobject) == "XCMSnExp") { 459 # XCMS 3.x
453 # Get the legacy xcmsSet object 460 if (class(xobject) == "XCMSnExp") {
454 suppressWarnings(xset <- as(xobject, "xcmsSet")) 461 # Get the legacy xcmsSet object
455 if (!is.null(xset@phenoData$sample_group)) 462 suppressWarnings(xset <- as(xobject, "xcmsSet"))
456 sampclass(xset) <- xset@phenoData$sample_group 463 if (!is.null(xset@phenoData$sample_group)) {
457 else 464 sampclass(xset) <- xset@phenoData$sample_group
458 sampclass(xset) <- "." 465 } else {
459 return(xset) 466 sampclass(xset) <- "."
460 } 467 }
461 } 468 return(xset)
469 }
470 }