comparison lib.r @ 27:d45a786cbc40 draft

"planemo upload for repository https://github.com/workflow4metabolomics/xcms commit dcc90f9cf76e6980c0a7d9698c89fab826e7adae"
author workflow4metabolomics
date Wed, 07 Apr 2021 12:06:33 +0000
parents 0d05d0458376
children 2b676d5eb848
comparison
equal deleted inserted replaced
26:0d05d0458376 27:d45a786cbc40
4 #@author G. Le Corguille 4 #@author G. Le Corguille
5 # solve an issue with batch if arguments are logical TRUE/FALSE 5 # solve an issue with batch if arguments are logical TRUE/FALSE
6 parseCommandArgs <- function(...) { 6 parseCommandArgs <- function(...) {
7 args <- batch::parseCommandArgs(...) 7 args <- batch::parseCommandArgs(...)
8 for (key in names(args)) { 8 for (key in names(args)) {
9 if (args[key] %in% c("TRUE","FALSE")) 9 if (args[key] %in% c("TRUE", "FALSE"))
10 args[key] = as.logical(args[key]) 10 args[key] <- as.logical(args[key])
11 } 11 }
12 return(args) 12 return(args)
13 } 13 }
14 14
15 #@author G. Le Corguille 15 #@author G. Le Corguille
16 # This function will 16 # This function will
17 # - load the packages 17 # - load the packages
18 # - display the sessionInfo 18 # - display the sessionInfo
19 loadAndDisplayPackages <- function(pkgs) { 19 loadAndDisplayPackages <- function(pkgs) {
20 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) 20 for (pkg in pkgs) suppressPackageStartupMessages(stopifnot(library(pkg, quietly = TRUE, logical.return = TRUE, character.only = TRUE)))
21 21
22 sessioninfo = sessionInfo() 22 sessioninfo <- sessionInfo()
23 cat(sessioninfo$R.version$version.string,"\n") 23 cat(sessioninfo$R.version$version.string, "\n")
24 cat("Main packages:\n") 24 cat("Main packages:\n")
25 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") 25 for (pkg in names(sessioninfo$otherPkgs)) {
26 cat(paste(pkg, packageVersion(pkg)), "\t")
27 }
28 cat("\n")
26 cat("Other loaded packages:\n") 29 cat("Other loaded packages:\n")
27 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") 30 for (pkg in names(sessioninfo$loadedOnly)) {
31 cat(paste(pkg, packageVersion(pkg)), "\t")
32 }
33 cat("\n")
28 } 34 }
29 35
30 #@author G. Le Corguille 36 #@author G. Le Corguille
31 # This function merge several chromBPI or chromTIC into one. 37 # This function merge several chromBPI or chromTIC into one.
32 mergeChrom <- function(chrom_merged, chrom) { 38 mergeChrom <- function(chrom_merged, chrom) {
41 chromTIC <- NULL 47 chromTIC <- NULL
42 chromBPI <- NULL 48 chromBPI <- NULL
43 chromTIC_adjusted <- NULL 49 chromTIC_adjusted <- NULL
44 chromBPI_adjusted <- NULL 50 chromBPI_adjusted <- NULL
45 md5sumList <- NULL 51 md5sumList <- NULL
46 for(image in args$images) { 52 for (image in args$images) {
47 53
48 load(image) 54 load(image)
49 # Handle infiles 55 # Handle infiles
50 if (!exists("singlefile")) singlefile <- NULL 56 if (!exists("singlefile")) singlefile <- NULL
51 if (!exists("zipfile")) zipfile <- NULL 57 if (!exists("zipfile")) zipfile <- NULL
52 rawFilePath <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile, args) 58 rawFilePath <- retrieveRawfileInTheWorkingDir(singlefile, zipfile, args)
53 zipfile <- rawFilePath$zipfile 59 zipfile <- rawFilePath$zipfile
54 singlefile <- rawFilePath$singlefile 60 singlefile <- rawFilePath$singlefile
55 61
56 if (exists("raw_data")) xdata <- raw_data 62 if (exists("raw_data")) xdata <- raw_data
57 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") 63 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*")
58 64
59 cat(sampleNamesList$sampleNamesOrigin,"\n") 65 cat(sampleNamesList$sampleNamesOrigin, "\n")
60 66
61 if (!exists("xdata_merged")) { 67 if (!exists("xdata_merged")) {
62 xdata_merged <- xdata 68 xdata_merged <- xdata
63 singlefile_merged <- singlefile 69 singlefile_merged <- singlefile
64 md5sumList_merged <- md5sumList 70 md5sumList_merged <- md5sumList
66 chromTIC_merged <- chromTIC 72 chromTIC_merged <- chromTIC
67 chromBPI_merged <- chromBPI 73 chromBPI_merged <- chromBPI
68 chromTIC_adjusted_merged <- chromTIC_adjusted 74 chromTIC_adjusted_merged <- chromTIC_adjusted
69 chromBPI_adjusted_merged <- chromBPI_adjusted 75 chromBPI_adjusted_merged <- chromBPI_adjusted
70 } else { 76 } else {
71 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged,xdata) 77 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged, xdata)
72 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- xcms:::.concatenate_OnDiskMSnExp(xdata_merged,xdata) 78 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- xcms:::.concatenate_OnDiskMSnExp(xdata_merged, xdata)
73 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata") 79 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata")
74 80
75 singlefile_merged <- c(singlefile_merged,singlefile) 81 singlefile_merged <- c(singlefile_merged, singlefile)
76 md5sumList_merged$origin <- rbind(md5sumList_merged$origin,md5sumList$origin) 82 md5sumList_merged$origin <- rbind(md5sumList_merged$origin, md5sumList$origin)
77 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin,sampleNamesList$sampleNamesOrigin) 83 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin, sampleNamesList$sampleNamesOrigin)
78 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames,sampleNamesList$sampleNamesMakeNames) 84 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames, sampleNamesList$sampleNamesMakeNames)
79 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC) 85 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC)
80 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI) 86 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI)
81 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted) 87 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted)
82 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted) 88 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted)
83 } 89 }
89 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged) 95 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged)
90 96
91 if (!is.null(args$sampleMetadata)) { 97 if (!is.null(args$sampleMetadata)) {
92 cat("\tXSET PHENODATA SETTING...\n") 98 cat("\tXSET PHENODATA SETTING...\n")
93 sampleMetadataFile <- args$sampleMetadata 99 sampleMetadataFile <- args$sampleMetadata
94 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header=F) 100 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header = F)
95 xdata@phenoData@data$sample_group=sampleMetadata$V2[match(xdata@phenoData@data$sample_name,sampleMetadata$V1)] 101 xdata@phenoData@data$sample_group <- sampleMetadata$V2[match(xdata@phenoData@data$sample_name, sampleMetadata$V1)]
96 102
97 if (any(is.na(pData(xdata)$sample_group))) { 103 if (any(is.na(pData(xdata)$sample_group))) {
98 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)] 104 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)]
99 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse=" ")) 105 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse = " "))
100 print(error_message) 106 print(error_message)
101 stop(error_message) 107 stop(error_message)
102 } 108 }
103 } 109 }
104 110
105 if (!is.null(chromTIC_merged)) { chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData } 111 if (!is.null(chromTIC_merged)) {
106 if (!is.null(chromBPI_merged)) { chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData } 112 chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData
107 if (!is.null(chromTIC_adjusted_merged)) { chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData } 113 }
108 if (!is.null(chromBPI_adjusted_merged)) { chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData } 114 if (!is.null(chromBPI_merged)) {
109 115 chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData
110 return(list("xdata"=xdata, "singlefile"=singlefile, "md5sumList"=md5sumList,"sampleNamesList"=sampleNamesList, "chromTIC"=chromTIC, "chromBPI"=chromBPI, "chromTIC_adjusted"=chromTIC_adjusted, "chromBPI_adjusted"=chromBPI_adjusted)) 116 }
117 if (!is.null(chromTIC_adjusted_merged)) {
118 chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData
119 }
120 if (!is.null(chromBPI_adjusted_merged)) {
121 chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData
122 }
123
124 return(list("xdata" = xdata, "singlefile" = singlefile, "md5sumList" = md5sumList, "sampleNamesList" = sampleNamesList, "chromTIC" = chromTIC, "chromBPI" = chromBPI, "chromTIC_adjusted" = chromTIC_adjusted, "chromBPI_adjusted" = chromBPI_adjusted))
111 } 125 }
112 126
113 #@author G. Le Corguille 127 #@author G. Le Corguille
114 # This function convert if it is required the Retention Time in minutes 128 # This function convert if it is required the Retention Time in minutes
115 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { 129 RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
116 if (convertRTMinute){ 130 if (convertRTMinute) {
117 #converting the retention times (seconds) into minutes 131 #converting the retention times (seconds) into minutes
118 print("converting the retention times into minutes in the variableMetadata") 132 print("converting the retention times into minutes in the variableMetadata")
119 variableMetadata[,"rt"] <- variableMetadata[,"rt"]/60 133 variableMetadata[, "rt"] <- variableMetadata[, "rt"] / 60
120 variableMetadata[,"rtmin"] <- variableMetadata[,"rtmin"]/60 134 variableMetadata[, "rtmin"] <- variableMetadata[, "rtmin"] / 60
121 variableMetadata[,"rtmax"] <- variableMetadata[,"rtmax"]/60 135 variableMetadata[, "rtmax"] <- variableMetadata[, "rtmax"] / 60
122 } 136 }
123 return (variableMetadata) 137 return(variableMetadata)
124 } 138 }
125 139
126 #@author G. Le Corguille 140 #@author G. Le Corguille
127 # This function format ions identifiers 141 # This function format ions identifiers
128 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { 142 formatIonIdentifiers <- function(variableMetadata, numDigitsRT = 0, numDigitsMZ = 0) {
129 splitDeco <- strsplit(as.character(variableMetadata$name),"_") 143 splitDeco <- strsplit(as.character(variableMetadata$name), "_")
130 idsDeco <- sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) 144 idsDeco <- sapply(splitDeco,
131 namecustom <- make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) 145 function(x) {
132 variableMetadata <- cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) 146 deco <- unlist(x)[2]; if (is.na(deco)) return("") else return(paste0("_", deco))
147 }
148 )
149 namecustom <- make.unique(paste0("M", round(variableMetadata[, "mz"], numDigitsMZ), "T", round(variableMetadata[, "rt"], numDigitsRT), idsDeco))
150 variableMetadata <- cbind(name = variableMetadata$name, namecustom = namecustom, variableMetadata[, !(colnames(variableMetadata) %in% c("name"))])
133 return(variableMetadata) 151 return(variableMetadata)
134 } 152 }
135 153
136 #@author G. Le Corguille 154 #@author G. Le Corguille
137 # This function convert the remain NA to 0 in the dataMatrix 155 # This function convert the remain NA to 0 in the dataMatrix
138 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) { 156 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) {
139 if (naTOzero){ 157 if (naTOzero) {
140 dataMatrix[is.na(dataMatrix)] <- 0 158 dataMatrix[is.na(dataMatrix)] <- 0
141 } 159 }
142 return (dataMatrix) 160 return(dataMatrix)
143 } 161 }
144 162
145 #@author G. Le Corguille 163 #@author G. Le Corguille
146 # Draw the plotChromPeakDensity 3 per page in a pdf file 164 # Draw the plotChromPeakDensity 3 per page in a pdf file
147 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit=4) { 165 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit = 4) {
148 pdf(file="plotChromPeakDensity.pdf", width=16, height=12) 166 pdf(file = "plotChromPeakDensity.pdf", width = 16, height = 12)
149 167
150 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) 168 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5))
151 169
152 if(length(unique(xdata$sample_group))<10){ 170 if (length(unique(xdata$sample_group)) < 10) {
153 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") 171 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
154 }else{ 172 }else{
155 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette="Dark 3") 173 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3")
156 } 174 }
157 names(group_colors) <- unique(xdata$sample_group) 175 names(group_colors) <- unique(xdata$sample_group)
158 col_per_samp <- as.character(xdata$sample_group) 176 col_per_samp <- as.character(xdata$sample_group)
159 for(i in 1:length(group_colors)){col_per_samp[col_per_samp==(names(group_colors)[i])]<-group_colors[i]} 177 for (i in seq_len(length(group_colors))) {
178 col_per_samp[col_per_samp == (names(group_colors)[i])] <- group_colors[i]
179 }
160 180
161 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) 181 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax))
162 for (i in 1:nrow(featureDefinitions(xdata))) { 182 for (i in seq_len(nrow(featureDefinitions(xdata)))) {
163 mzmin = featureDefinitions(xdata)[i,]$mzmin 183 mzmin <- featureDefinitions(xdata)[i, ]$mzmin
164 mzmax = featureDefinitions(xdata)[i,]$mzmax 184 mzmax <- featureDefinitions(xdata)[i, ]$mzmax
165 plotChromPeakDensity(xdata, param = param, mz=c(mzmin,mzmax), col=col_per_samp, pch=16, xlim=xlim, main=paste(round(mzmin,mzdigit),round(mzmax,mzdigit))) 185 plotChromPeakDensity(xdata, param = param, mz = c(mzmin, mzmax), col = col_per_samp, pch = 16, xlim = xlim, main = paste(round(mzmin, mzdigit), round(mzmax, mzdigit)))
166 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) 186 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1)
167 } 187 }
168 188
169 dev.off() 189 dev.off()
170 } 190 }
171 191
172 #@author G. Le Corguille 192 #@author G. Le Corguille
173 # Draw the plotChromPeakDensity 3 per page in a pdf file 193 # Draw the plotChromPeakDensity 3 per page in a pdf file
174 getPlotAdjustedRtime <- function(xdata) { 194 getPlotAdjustedRtime <- function(xdata) {
175 195
176 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) 196 pdf(file = "raw_vs_adjusted_rt.pdf", width = 16, height = 12)
177 197
178 # Color by group 198 # Color by group
179 if(length(unique(xdata$sample_group))<10){ 199 if (length(unique(xdata$sample_group)) < 10) {
180 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") 200 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
181 }else{ 201 } else {
182 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette="Dark 3") 202 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3")
183 } 203 }
184 if (length(group_colors) > 1) { 204 if (length(group_colors) > 1) {
185 names(group_colors) <- unique(xdata$sample_group) 205 names(group_colors) <- unique(xdata$sample_group)
186 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) 206 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group])
187 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) 207 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1)
188 } 208 }
189 209
190 # Color by sample 210 # Color by sample
191 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) 211 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name)))
192 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) 212 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1)
193 213
194 dev.off() 214 dev.off()
195 } 215 }
196 216
197 #@author G. Le Corguille 217 #@author G. Le Corguille
198 # value: intensity values to be used into, maxo or intb 218 # value: intensity values to be used into, maxo or intb
199 getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, naTOzero=T, variableMetadataOutput, dataMatrixOutput, sampleNamesList) { 219 getPeaklistW4M <- function(xdata, intval = "into", convertRTMinute = F, numDigitsMZ = 4, numDigitsRT = 0, naTOzero = T, variableMetadataOutput, dataMatrixOutput, sampleNamesList) {
200 dataMatrix <- featureValues(xdata, method="medret", value=intval) 220 dataMatrix <- featureValues(xdata, method = "medret", value = intval)
201 colnames(dataMatrix) <- make.names(tools::file_path_sans_ext(colnames(dataMatrix))) 221 colnames(dataMatrix) <- make.names(tools::file_path_sans_ext(colnames(dataMatrix)))
202 dataMatrix = cbind(name=groupnames(xdata), dataMatrix) 222 dataMatrix <- cbind(name = groupnames(xdata), dataMatrix)
203 variableMetadata <- featureDefinitions(xdata) 223 variableMetadata <- featureDefinitions(xdata)
204 colnames(variableMetadata)[1] = "mz"; colnames(variableMetadata)[4] = "rt" 224 colnames(variableMetadata)[1] <- "mz"; colnames(variableMetadata)[4] <- "rt"
205 variableMetadata = data.frame(name=groupnames(xdata), variableMetadata) 225 variableMetadata <- data.frame(name = groupnames(xdata), variableMetadata)
206 226
207 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) 227 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute)
208 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) 228 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = numDigitsRT, numDigitsMZ = numDigitsMZ)
209 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero) 229 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero)
210 230
211 # FIX: issue when the vector at peakidx is too long and is written in a new line during the export 231 # FIX: issue when the vector at peakidx is too long and is written in a new line during the export
212 variableMetadata[,"peakidx"] <- vapply(variableMetadata[,"peakidx"], FUN = paste, FUN.VALUE = character(1), collapse = ",") 232 variableMetadata[, "peakidx"] <- vapply(variableMetadata[, "peakidx"], FUN = paste, FUN.VALUE = character(1), collapse = ",")
213 233
214 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) 234 write.table(variableMetadata, file = variableMetadataOutput, sep = "\t", quote = F, row.names = F)
215 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) 235 write.table(dataMatrix, file = dataMatrixOutput, sep = "\t", quote = F, row.names = F)
216 236
217 } 237 }
218 238
219 #@author G. Le Corguille 239 #@author G. Le Corguille
220 # It allow different of field separators 240 # It allow different of field separators
221 getDataFrameFromFile <- function(filename, header=T) { 241 getDataFrameFromFile <- function(filename, header = T) {
222 myDataFrame <- read.table(filename, header=header, sep=";", stringsAsFactors=F) 242 myDataFrame <- read.table(filename, header = header, sep = ";", stringsAsFactors = F)
223 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep="\t", stringsAsFactors=F) 243 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = "\t", stringsAsFactors = F)
224 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep=",", stringsAsFactors=F) 244 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = ",", stringsAsFactors = F)
225 if (ncol(myDataFrame) < 2) { 245 if (ncol(myDataFrame) < 2) {
226 error_message="Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation" 246 error_message <- "Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation"
227 print(error_message) 247 print(error_message)
228 stop(error_message) 248 stop(error_message)
229 } 249 }
230 return(myDataFrame) 250 return(myDataFrame)
231 } 251 }
232 252
233 #@author G. Le Corguille 253 #@author G. Le Corguille
234 # Draw the BPI and TIC graphics 254 # Draw the BPI and TIC graphics
235 # colored by sample names or class names 255 # colored by sample names or class names
236 getPlotChromatogram <- function(chrom, xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") { 256 getPlotChromatogram <- function(chrom, xdata, pdfname = "Chromatogram.pdf", aggregationFun = "max") {
237 257
238 if (aggregationFun == "sum") 258 if (aggregationFun == "sum")
239 type="Total Ion Chromatograms" 259 type <- "Total Ion Chromatograms"
240 else 260 else
241 type="Base Peak Intensity Chromatograms" 261 type <- "Base Peak Intensity Chromatograms"
242 262
243 adjusted="Raw" 263 adjusted <- "Raw"
244 if (hasAdjustedRtime(xdata)) 264 if (hasAdjustedRtime(xdata))
245 adjusted="Adjusted" 265 adjusted <- "Adjusted"
246 266
247 main <- paste(type,":",adjusted,"data") 267 main <- paste(type, ":", adjusted, "data")
248 268
249 pdf(pdfname, width=16, height=10) 269 pdf(pdfname, width = 16, height = 10)
250 270
251 # Color by group 271 # Color by group
252 if(length(unique(xdata$sample_group))<10){ 272 if (length(unique(xdata$sample_group)) < 10) {
253 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") 273 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
254 }else{ 274 }else{
255 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette="Dark 3") 275 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3")
256 } 276 }
257 if (length(group_colors) > 1) { 277 if (length(group_colors) > 1) {
258 names(group_colors) <- unique(xdata$sample_group) 278 names(group_colors) <- unique(xdata$sample_group)
259 plot(chrom, col = group_colors[as.factor(chrom$sample_group)], main=main, peakType = "none") 279 plot(chrom, col = group_colors[chrom$sample_group], main = main, peakType = "none")
260 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) 280 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1)
261 } 281 }
262 282
263 # Color by sample 283 # Color by sample
264 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main, peakType = "none") 284 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main = main, peakType = "none")
265 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) 285 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1)
266 286
267 dev.off() 287 dev.off()
268 } 288 }
269 289
270 290
271 # Get the polarities from all the samples of a condition 291 # Get the polarities from all the samples of a condition
272 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 292 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
273 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM 293 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
274 getSampleMetadata <- function(xdata=NULL, sampleMetadataOutput="sampleMetadata.tsv") { 294 getSampleMetadata <- function(xdata = NULL, sampleMetadataOutput = "sampleMetadata.tsv") {
275 cat("Creating the sampleMetadata file...\n") 295 cat("Creating the sampleMetadata file...\n")
276 296
277 #Create the sampleMetada dataframe 297 #Create the sampleMetada dataframe
278 sampleMetadata <- xdata@phenoData@data 298 sampleMetadata <- xdata@phenoData@data
279 rownames(sampleMetadata) <- NULL 299 rownames(sampleMetadata) <- NULL
283 sampleNamesMakeNames <- make.names(sampleNamesOrigin) 303 sampleNamesMakeNames <- make.names(sampleNamesOrigin)
284 304
285 if (any(duplicated(sampleNamesMakeNames))) { 305 if (any(duplicated(sampleNamesMakeNames))) {
286 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) 306 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())
287 for (sampleName in sampleNamesOrigin) { 307 for (sampleName in sampleNamesOrigin) {
288 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) 308 write(paste(sampleName, "\t->\t", make.names(sampleName)), stderr())
289 } 309 }
290 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") 310 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
291 } 311 }
292 312
293 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { 313 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) {
294 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") 314 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n")
295 for (sampleName in sampleNamesOrigin) { 315 for (sampleName in sampleNamesOrigin) {
296 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) 316 cat(paste(sampleName, "\t->\t", make.names(sampleName), "\n"))
297 } 317 }
298 } 318 }
299 319
300 sampleMetadata$sample_name <- sampleNamesMakeNames 320 sampleMetadata$sample_name <- sampleNamesMakeNames
301 321
302 322
303 #For each sample file, the following actions are done 323 #For each sample file, the following actions are done
304 for (fileIdx in 1:length(fileNames(xdata))) { 324 for (fileIdx in seq_len(length(fileNames(xdata)))) {
305 #Check if the file is in the CDF format 325 #Check if the file is in the CDF format
306 if (!mzR:::netCDFIsFile(fileNames(xdata))) { 326 if (!mzR:::netCDFIsFile(fileNames(xdata))) {
307 327
308 # If the column isn't exist, with add one filled with NA 328 # If the column isn't exist, with add one filled with NA
309 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA 329 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA
310 330
311 #Extract the polarity (a list of polarities) 331 #Extract the polarity (a list of polarities)
312 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx,"polarity"] 332 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx, "polarity"]
313 #Verify if all the scans have the same polarity 333 #Verify if all the scans have the same polarity
314 uniq_list <- unique(polarity) 334 uniq_list <- unique(polarity)
315 if (length(uniq_list)>1){ 335 if (length(uniq_list) > 1) {
316 polarity <- "mixed" 336 polarity <- "mixed"
317 } else { 337 } else {
318 polarity <- as.character(uniq_list) 338 polarity <- as.character(uniq_list)
319 } 339 }
320 340
322 sampleMetadata$polarity[fileIdx] <- polarity 342 sampleMetadata$polarity[fileIdx] <- polarity
323 } 343 }
324 344
325 } 345 }
326 346
327 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) 347 write.table(sampleMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = sampleMetadataOutput)
328 348
329 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames)) 349 return(list("sampleNamesOrigin" = sampleNamesOrigin, "sampleNamesMakeNames" = sampleNamesMakeNames))
330 350
331 } 351 }
332 352
333 353
334 # This function will compute MD5 checksum to check the data integrity 354 # This function will compute MD5 checksum to check the data integrity
335 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 355 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
336 getMd5sum <- function (files) { 356 getMd5sum <- function(files) {
337 cat("Compute md5 checksum...\n") 357 cat("Compute md5 checksum...\n")
338 library(tools) 358 library(tools)
339 return(as.matrix(md5sum(files))) 359 return(as.matrix(md5sum(files)))
340 } 360 }
341 361
342 # This function retrieve the raw file in the working directory 362 # This function retrieve the raw file in the working directory
343 # - if zipfile: unzip the file with its directory tree 363 # - if zipfile: unzip the file with its directory tree
344 # - if singlefiles: set symlink with the good filename 364 # - if singlefiles: set symlink with the good filename
345 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 365 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
346 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile, args, prefix="") { 366 retrieveRawfileInTheWorkingDir <- function(singlefile, zipfile, args, prefix = "") {
347 367
348 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") 368 if (!(prefix %in% c("", "Positive", "Negative", "MS1", "MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
349 369
350 # single - if the file are passed in the command arguments -> refresh singlefile 370 # single - if the file are passed in the command arguments -> refresh singlefile
351 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) { 371 if (!is.null(args[[paste0("singlefile_galaxyPath", prefix)]])) {
352 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath",prefix)]],"\\|")) 372 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath", prefix)]], "\\|"))
353 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName",prefix)]],"\\|")) 373 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName", prefix)]], "\\|"))
354 374
355 singlefile <- NULL 375 singlefile <- NULL
356 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { 376 for (singlefile_galaxyPath_i in seq_len(length(singlefile_galaxyPaths))) {
357 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] 377 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
358 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] 378 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
359 # In case, an url is used to import data within Galaxy 379 # In case, an url is used to import data within Galaxy
360 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1) 380 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName, "/")), n = 1)
361 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath 381 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
362 } 382 }
363 } 383 }
364 # zipfile - if the file are passed in the command arguments -> refresh zipfile 384 # zipfile - if the file are passed in the command arguments -> refresh zipfile
365 if (!is.null(args[[paste0("zipfile",prefix)]])) 385 if (!is.null(args[[paste0("zipfile", prefix)]]))
366 zipfile <- args[[paste0("zipfile",prefix)]] 386 zipfile <- args[[paste0("zipfile", prefix)]]
367 387
368 # single 388 # single
369 if(!is.null(singlefile) && (length("singlefile")>0)) { 389 if (!is.null(singlefile) && (length("singlefile") > 0)) {
370 files <- vector() 390 files <- vector()
371 for (singlefile_sampleName in names(singlefile)) { 391 for (singlefile_sampleName in names(singlefile)) {
372 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] 392 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
373 if(!file.exists(singlefile_galaxyPath)){ 393 if (!file.exists(singlefile_galaxyPath)) {
374 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") 394 error_message <- paste("Cannot access the sample:", singlefile_sampleName, "located:", singlefile_galaxyPath, ". Please, contact your administrator ... if you have one!")
375 print(error_message); stop(error_message) 395 print(error_message); stop(error_message)
376 } 396 }
377 397
378 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) 398 if (!suppressWarnings(try(file.link(singlefile_galaxyPath, singlefile_sampleName), silent = T)))
379 file.copy(singlefile_galaxyPath, singlefile_sampleName) 399 file.copy(singlefile_galaxyPath, singlefile_sampleName)
380 files <- c(files, singlefile_sampleName) 400 files <- c(files, singlefile_sampleName)
381 } 401 }
382 } 402 }
383 # zipfile 403 # zipfile
384 if(!is.null(zipfile) && (zipfile != "")) { 404 if (!is.null(zipfile) && (zipfile != "")) {
385 if(!file.exists(zipfile)){ 405 if (!file.exists(zipfile)) {
386 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") 406 error_message <- paste("Cannot access the Zip file:", zipfile, ". Please, contact your administrator ... if you have one!")
387 print(error_message) 407 print(error_message)
388 stop(error_message) 408 stop(error_message)
389 } 409 }
390 suppressWarnings(unzip(zipfile, unzip="unzip")) 410 suppressWarnings(unzip(zipfile, unzip = "unzip"))
391 411
392 #get the directory name 412 #get the directory name
393 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) 413 suppressWarnings(filesInZip <- unzip(zipfile, list = T))
394 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) 414 directories <- unique(unlist(lapply(strsplit(filesInZip$Name, "/"), function(x) x[1])))
395 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] 415 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
396 directory <- "." 416 directory <- "."
397 if (length(directories) == 1) directory <- directories 417 if (length(directories) == 1) directory <- directories
398 418
399 cat("files_root_directory\t",directory,"\n") 419 cat("files_root_directory\t", directory, "\n")
400 420
401 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") 421 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
402 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") 422 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
403 info <- file.info(directory) 423 info <- file.info(directory)
404 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) 424 listed <- list.files(directory[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE)
405 files <- c(directory[!info$isdir], listed) 425 files <- c(directory[!info$isdir], listed)
406 exists <- file.exists(files) 426 exists <- file.exists(files)
407 files <- files[exists] 427 files <- files[exists]
408 428
409 } 429 }
410 return(list(zipfile=zipfile, singlefile=singlefile, files=files)) 430 return(list(zipfile = zipfile, singlefile = singlefile, files = files))
411 431
412 } 432 }
413 433
414 434
415 # This function retrieve a xset like object 435 # This function retrieve a xset like object
416 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 436 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
417 getxcmsSetObject <- function(xobject) { 437 getxcmsSetObject <- function(xobject) {
418 # XCMS 1.x 438 # XCMS 1.x
419 if (class(xobject) == "xcmsSet") 439 if (class(xobject) == "xcmsSet")
420 return (xobject) 440 return(xobject)
421 # XCMS 3.x 441 # XCMS 3.x
422 if (class(xobject) == "XCMSnExp") { 442 if (class(xobject) == "XCMSnExp") {
423 # Get the legacy xcmsSet object 443 # Get the legacy xcmsSet object
424 suppressWarnings(xset <- as(xobject, 'xcmsSet')) 444 suppressWarnings(xset <- as(xobject, "xcmsSet"))
425 if (!is.null(xset@phenoData$sample_group)) 445 if (!is.null(xset@phenoData$sample_group))
426 sampclass(xset) <- xset@phenoData$sample_group 446 sampclass(xset) <- xset@phenoData$sample_group
427 else 447 else
428 sampclass(xset) <- "." 448 sampclass(xset) <- "."
429 return (xset) 449 return(xset)
430 } 450 }
431 } 451 }