comparison lib.r @ 32:b02d1992a43a draft

"planemo upload for repository https://github.com/workflow4metabolomics/xcms commit dcc90f9cf76e6980c0a7d9698c89fab826e7adae"
author workflow4metabolomics
date Wed, 07 Apr 2021 16:47:14 +0000
parents eb15a3841da4
children f5d51091cf84
comparison
equal deleted inserted replaced
31:eb15a3841da4 32:b02d1992a43a
4 #@author G. Le Corguille 4 #@author G. Le Corguille
5 # solve an issue with batch if arguments are logical TRUE/FALSE 5 # solve an issue with batch if arguments are logical TRUE/FALSE
6 parseCommandArgs <- function(...) { 6 parseCommandArgs <- function(...) {
7 args <- batch::parseCommandArgs(...) 7 args <- batch::parseCommandArgs(...)
8 for (key in names(args)) { 8 for (key in names(args)) {
9 if (args[key] %in% c("TRUE","FALSE")) 9 if (args[key] %in% c("TRUE", "FALSE"))
10 args[key] = as.logical(args[key]) 10 args[key] <- as.logical(args[key])
11 } 11 }
12 return(args) 12 return(args)
13 } 13 }
14 14
15 #@author G. Le Corguille 15 #@author G. Le Corguille
16 # This function will 16 # This function will
17 # - load the packages 17 # - load the packages
18 # - display the sessionInfo 18 # - display the sessionInfo
19 loadAndDisplayPackages <- function(pkgs) { 19 loadAndDisplayPackages <- function(pkgs) {
20 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) 20 for (pkg in pkgs) suppressPackageStartupMessages(stopifnot(library(pkg, quietly = TRUE, logical.return = TRUE, character.only = TRUE)))
21 21
22 sessioninfo = sessionInfo() 22 sessioninfo <- sessionInfo()
23 cat(sessioninfo$R.version$version.string,"\n") 23 cat(sessioninfo$R.version$version.string, "\n")
24 cat("Main packages:\n") 24 cat("Main packages:\n")
25 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") 25 for (pkg in names(sessioninfo$otherPkgs)) {
26 cat(paste(pkg, packageVersion(pkg)), "\t")
27 }
28 cat("\n")
26 cat("Other loaded packages:\n") 29 cat("Other loaded packages:\n")
27 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") 30 for (pkg in names(sessioninfo$loadedOnly)) {
31 cat(paste(pkg, packageVersion(pkg)), "\t")
32 }
33 cat("\n")
28 } 34 }
29 35
30 #@author G. Le Corguille 36 #@author G. Le Corguille
31 # This function merge several chromBPI or chromTIC into one. 37 # This function merge several chromBPI or chromTIC into one.
32 mergeChrom <- function(chrom_merged, chrom) { 38 mergeChrom <- function(chrom_merged, chrom) {
41 chromTIC <- NULL 47 chromTIC <- NULL
42 chromBPI <- NULL 48 chromBPI <- NULL
43 chromTIC_adjusted <- NULL 49 chromTIC_adjusted <- NULL
44 chromBPI_adjusted <- NULL 50 chromBPI_adjusted <- NULL
45 md5sumList <- NULL 51 md5sumList <- NULL
46 for(image in args$images) { 52 for (image in args$images) {
47 53
48 load(image) 54 load(image)
49 # Handle infiles 55 # Handle infiles
50 if (!exists("singlefile")) singlefile <- NULL 56 if (!exists("singlefile")) singlefile <- NULL
51 if (!exists("zipfile")) zipfile <- NULL 57 if (!exists("zipfile")) zipfile <- NULL
52 rawFilePath <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile, args) 58 rawFilePath <- retrieveRawfileInTheWorkingDir(singlefile, zipfile, args)
53 zipfile <- rawFilePath$zipfile 59 zipfile <- rawFilePath$zipfile
54 singlefile <- rawFilePath$singlefile 60 singlefile <- rawFilePath$singlefile
55 61
56 if (exists("raw_data")) xdata <- raw_data 62 if (exists("raw_data")) xdata <- raw_data
57 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") 63 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*")
58 64
59 cat(sampleNamesList$sampleNamesOrigin,"\n") 65 cat(sampleNamesList$sampleNamesOrigin, "\n")
60 66
61 if (!exists("xdata_merged")) { 67 if (!exists("xdata_merged")) {
62 xdata_merged <- xdata 68 xdata_merged <- xdata
63 singlefile_merged <- singlefile 69 singlefile_merged <- singlefile
64 md5sumList_merged <- md5sumList 70 md5sumList_merged <- md5sumList
66 chromTIC_merged <- chromTIC 72 chromTIC_merged <- chromTIC
67 chromBPI_merged <- chromBPI 73 chromBPI_merged <- chromBPI
68 chromTIC_adjusted_merged <- chromTIC_adjusted 74 chromTIC_adjusted_merged <- chromTIC_adjusted
69 chromBPI_adjusted_merged <- chromBPI_adjusted 75 chromBPI_adjusted_merged <- chromBPI_adjusted
70 } else { 76 } else {
71 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged,xdata) 77 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged, xdata)
72 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- xcms:::.concatenate_OnDiskMSnExp(xdata_merged,xdata) 78 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- xcms:::.concatenate_OnDiskMSnExp(xdata_merged, xdata)
73 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata") 79 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata")
74 80
75 singlefile_merged <- c(singlefile_merged,singlefile) 81 singlefile_merged <- c(singlefile_merged, singlefile)
76 md5sumList_merged$origin <- rbind(md5sumList_merged$origin,md5sumList$origin) 82 md5sumList_merged$origin <- rbind(md5sumList_merged$origin, md5sumList$origin)
77 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin,sampleNamesList$sampleNamesOrigin) 83 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin, sampleNamesList$sampleNamesOrigin)
78 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames,sampleNamesList$sampleNamesMakeNames) 84 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames, sampleNamesList$sampleNamesMakeNames)
79 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC) 85 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC)
80 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI) 86 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI)
81 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted) 87 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted)
82 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted) 88 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted)
83 } 89 }
89 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged) 95 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged)
90 96
91 if (!is.null(args$sampleMetadata)) { 97 if (!is.null(args$sampleMetadata)) {
92 cat("\tXSET PHENODATA SETTING...\n") 98 cat("\tXSET PHENODATA SETTING...\n")
93 sampleMetadataFile <- args$sampleMetadata 99 sampleMetadataFile <- args$sampleMetadata
94 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header=F) 100 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header = F)
95 xdata@phenoData@data$sample_group=sampleMetadata$V2[match(xdata@phenoData@data$sample_name,sampleMetadata$V1)] 101 xdata@phenoData@data$sample_group <- sampleMetadata$V2[match(xdata@phenoData@data$sample_name, sampleMetadata$V1)]
96 102
97 if (any(is.na(pData(xdata)$sample_group))) { 103 if (any(is.na(pData(xdata)$sample_group))) {
98 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)] 104 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)]
99 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse=" ")) 105 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse = " "))
100 print(error_message) 106 print(error_message)
101 stop(error_message) 107 stop(error_message)
102 } 108 }
103 } 109 }
104 110
105 if (!is.null(chromTIC_merged)) { chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData } 111 if (!is.null(chromTIC_merged)) {
106 if (!is.null(chromBPI_merged)) { chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData } 112 chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData
107 if (!is.null(chromTIC_adjusted_merged)) { chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData } 113 }
108 if (!is.null(chromBPI_adjusted_merged)) { chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData } 114 if (!is.null(chromBPI_merged)) {
109 115 chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData
110 return(list("xdata"=xdata, "singlefile"=singlefile, "md5sumList"=md5sumList,"sampleNamesList"=sampleNamesList, "chromTIC"=chromTIC, "chromBPI"=chromBPI, "chromTIC_adjusted"=chromTIC_adjusted, "chromBPI_adjusted"=chromBPI_adjusted)) 116 }
117 if (!is.null(chromTIC_adjusted_merged)) {
118 chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData
119 }
120 if (!is.null(chromBPI_adjusted_merged)) {
121 chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData
122 }
123
124 return(list("xdata" = xdata, "singlefile" = singlefile, "md5sumList" = md5sumList, "sampleNamesList" = sampleNamesList, "chromTIC" = chromTIC, "chromBPI" = chromBPI, "chromTIC_adjusted" = chromTIC_adjusted, "chromBPI_adjusted" = chromBPI_adjusted))
111 } 125 }
112 126
113 #@author G. Le Corguille 127 #@author G. Le Corguille
114 # This function convert if it is required the Retention Time in minutes 128 # This function convert if it is required the Retention Time in minutes
115 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { 129 RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
116 if (convertRTMinute){ 130 if (convertRTMinute) {
117 #converting the retention times (seconds) into minutes 131 #converting the retention times (seconds) into minutes
118 print("converting the retention times into minutes in the variableMetadata") 132 print("converting the retention times into minutes in the variableMetadata")
119 variableMetadata[,"rt"] <- variableMetadata[,"rt"]/60 133 variableMetadata[, "rt"] <- variableMetadata[, "rt"] / 60
120 variableMetadata[,"rtmin"] <- variableMetadata[,"rtmin"]/60 134 variableMetadata[, "rtmin"] <- variableMetadata[, "rtmin"] / 60
121 variableMetadata[,"rtmax"] <- variableMetadata[,"rtmax"]/60 135 variableMetadata[, "rtmax"] <- variableMetadata[, "rtmax"] / 60
122 } 136 }
123 return (variableMetadata) 137 return(variableMetadata)
124 } 138 }
125 139
126 #@author G. Le Corguille 140 #@author G. Le Corguille
127 # This function format ions identifiers 141 # This function format ions identifiers
128 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { 142 formatIonIdentifiers <- function(variableMetadata, numDigitsRT = 0, numDigitsMZ = 0) {
129 splitDeco <- strsplit(as.character(variableMetadata$name),"_") 143 splitDeco <- strsplit(as.character(variableMetadata$name), "_")
130 idsDeco <- sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) 144 idsDeco <- sapply(splitDeco,
131 namecustom <- make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) 145 function(x) {
132 variableMetadata <- cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) 146 deco <- unlist(x)[2]; if (is.na(deco)) return("") else return(paste0("_", deco))
147 }
148 )
149 namecustom <- make.unique(paste0("M", round(variableMetadata[, "mz"], numDigitsMZ), "T", round(variableMetadata[, "rt"], numDigitsRT), idsDeco))
150 variableMetadata <- cbind(name = variableMetadata$name, namecustom = namecustom, variableMetadata[, !(colnames(variableMetadata) %in% c("name"))])
133 return(variableMetadata) 151 return(variableMetadata)
134 } 152 }
135 153
136 #@author G. Le Corguille 154 #@author G. Le Corguille
137 # This function convert the remain NA to 0 in the dataMatrix 155 # This function convert the remain NA to 0 in the dataMatrix
138 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) { 156 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) {
139 if (naTOzero){ 157 if (naTOzero) {
140 dataMatrix[is.na(dataMatrix)] <- 0 158 dataMatrix[is.na(dataMatrix)] <- 0
141 } 159 }
142 return (dataMatrix) 160 return(dataMatrix)
143 } 161 }
144 162
145 #@author G. Le Corguille 163 #@author G. Le Corguille
146 # Draw the plotChromPeakDensity 3 per page in a pdf file 164 # Draw the plotChromPeakDensity 3 per page in a pdf file
147 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit=4) { 165 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit = 4) {
148 pdf(file="plotChromPeakDensity.pdf", width=16, height=12) 166 pdf(file = "plotChromPeakDensity.pdf", width = 16, height = 12)
149 167
150 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) 168 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5))
151 169
152 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") 170 if (length(unique(xdata$sample_group)) < 10) {
171 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
172 }else{
173 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3")
174 }
153 names(group_colors) <- unique(xdata$sample_group) 175 names(group_colors) <- unique(xdata$sample_group)
154 col_per_samp <- as.character(xdata$sample_group) 176 col_per_samp <- as.character(xdata$sample_group)
155 for(i in 1:length(group_colors)){col_per_samp[col_per_samp==(names(group_colors)[i])]<-group_colors[i]} 177 for (i in seq_len(length(group_colors))) {
178 col_per_samp[col_per_samp == (names(group_colors)[i])] <- group_colors[i]
179 }
156 180
157 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) 181 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax))
158 for (i in 1:nrow(featureDefinitions(xdata))) { 182 for (i in seq_len(nrow(featureDefinitions(xdata)))) {
159 mzmin = featureDefinitions(xdata)[i,]$mzmin 183 mzmin <- featureDefinitions(xdata)[i, ]$mzmin
160 mzmax = featureDefinitions(xdata)[i,]$mzmax 184 mzmax <- featureDefinitions(xdata)[i, ]$mzmax
161 plotChromPeakDensity(xdata, param = param, mz=c(mzmin,mzmax), col=col_per_samp, pch=16, xlim=xlim, main=paste(round(mzmin,mzdigit),round(mzmax,mzdigit))) 185 plotChromPeakDensity(xdata, param = param, mz = c(mzmin, mzmax), col = col_per_samp, pch = 16, xlim = xlim, main = paste(round(mzmin, mzdigit), round(mzmax, mzdigit)))
162 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) 186 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1)
163 } 187 }
164 188
165 dev.off() 189 dev.off()
166 } 190 }
167 191
168 #@author G. Le Corguille 192 #@author G. Le Corguille
169 # Draw the plotChromPeakDensity 3 per page in a pdf file 193 # Draw the plotChromPeakDensity 3 per page in a pdf file
170 getPlotAdjustedRtime <- function(xdata) { 194 getPlotAdjustedRtime <- function(xdata) {
171 195
172 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) 196 pdf(file = "raw_vs_adjusted_rt.pdf", width = 16, height = 12)
173 197
174 # Color by group 198 # Color by group
175 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") 199 if (length(unique(xdata$sample_group)) < 10) {
200 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
201 } else {
202 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3")
203 }
176 if (length(group_colors) > 1) { 204 if (length(group_colors) > 1) {
177 names(group_colors) <- unique(xdata$sample_group) 205 names(group_colors) <- unique(xdata$sample_group)
178 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) 206 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group])
179 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) 207 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1)
180 } 208 }
181 209
182 # Color by sample 210 # Color by sample
183 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) 211 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name)))
184 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) 212 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1)
185 213
186 dev.off() 214 dev.off()
187 } 215 }
188 216
189 #@author G. Le Corguille 217 #@author G. Le Corguille
190 # value: intensity values to be used into, maxo or intb 218 # value: intensity values to be used into, maxo or intb
191 getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, naTOzero=T, variableMetadataOutput, dataMatrixOutput, sampleNamesList) { 219 getPeaklistW4M <- function(xdata, intval = "into", convertRTMinute = F, numDigitsMZ = 4, numDigitsRT = 0, naTOzero = T, variableMetadataOutput, dataMatrixOutput, sampleNamesList) {
192 dataMatrix <- featureValues(xdata, method="medret", value=intval) 220 dataMatrix <- featureValues(xdata, method = "medret", value = intval)
193 colnames(dataMatrix) <- make.names(tools::file_path_sans_ext(colnames(dataMatrix))) 221 colnames(dataMatrix) <- make.names(tools::file_path_sans_ext(colnames(dataMatrix)))
194 dataMatrix = cbind(name=groupnames(xdata), dataMatrix) 222 dataMatrix <- cbind(name = groupnames(xdata), dataMatrix)
195 variableMetadata <- featureDefinitions(xdata) 223 variableMetadata <- featureDefinitions(xdata)
196 colnames(variableMetadata)[1] = "mz"; colnames(variableMetadata)[4] = "rt" 224 colnames(variableMetadata)[1] <- "mz"; colnames(variableMetadata)[4] <- "rt"
197 variableMetadata = data.frame(name=groupnames(xdata), variableMetadata) 225 variableMetadata <- data.frame(name = groupnames(xdata), variableMetadata)
198 226
199 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) 227 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute)
200 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) 228 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = numDigitsRT, numDigitsMZ = numDigitsMZ)
201 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero) 229 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero)
202 230
203 # FIX: issue when the vector at peakidx is too long and is written in a new line during the export 231 # FIX: issue when the vector at peakidx is too long and is written in a new line during the export
204 variableMetadata[,"peakidx"] <- vapply(variableMetadata[,"peakidx"], FUN = paste, FUN.VALUE = character(1), collapse = ",") 232 variableMetadata[, "peakidx"] <- vapply(variableMetadata[, "peakidx"], FUN = paste, FUN.VALUE = character(1), collapse = ",")
205 233
206 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) 234 write.table(variableMetadata, file = variableMetadataOutput, sep = "\t", quote = F, row.names = F)
207 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) 235 write.table(dataMatrix, file = dataMatrixOutput, sep = "\t", quote = F, row.names = F)
208 236
209 } 237 }
210 238
211 #@author G. Le Corguille 239 #@author G. Le Corguille
212 # It allow different of field separators 240 # It allow different of field separators
213 getDataFrameFromFile <- function(filename, header=T) { 241 getDataFrameFromFile <- function(filename, header = T) {
214 myDataFrame <- read.table(filename, header=header, sep=";", stringsAsFactors=F) 242 myDataFrame <- read.table(filename, header = header, sep = ";", stringsAsFactors = F)
215 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep="\t", stringsAsFactors=F) 243 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = "\t", stringsAsFactors = F)
216 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep=",", stringsAsFactors=F) 244 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = ",", stringsAsFactors = F)
217 if (ncol(myDataFrame) < 2) { 245 if (ncol(myDataFrame) < 2) {
218 error_message="Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation" 246 error_message <- "Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation"
219 print(error_message) 247 print(error_message)
220 stop(error_message) 248 stop(error_message)
221 } 249 }
222 return(myDataFrame) 250 return(myDataFrame)
223 } 251 }
224 252
225 #@author G. Le Corguille 253 #@author G. Le Corguille
226 # Draw the BPI and TIC graphics 254 # Draw the BPI and TIC graphics
227 # colored by sample names or class names 255 # colored by sample names or class names
228 getPlotChromatogram <- function(chrom, xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") { 256 getPlotChromatogram <- function(chrom, xdata, pdfname = "Chromatogram.pdf", aggregationFun = "max") {
229 257
230 if (aggregationFun == "sum") 258 if (aggregationFun == "sum")
231 type="Total Ion Chromatograms" 259 type <- "Total Ion Chromatograms"
232 else 260 else
233 type="Base Peak Intensity Chromatograms" 261 type <- "Base Peak Intensity Chromatograms"
234 262
235 adjusted="Raw" 263 adjusted <- "Raw"
236 if (hasAdjustedRtime(xdata)) 264 if (hasAdjustedRtime(xdata))
237 adjusted="Adjusted" 265 adjusted <- "Adjusted"
238 266
239 main <- paste(type,":",adjusted,"data") 267 main <- paste(type, ":", adjusted, "data")
240 268
241 pdf(pdfname, width=16, height=10) 269 pdf(pdfname, width = 16, height = 10)
242 270
243 # Color by group 271 # Color by group
244 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") 272 if (length(unique(xdata$sample_group)) < 10) {
273 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
274 }else{
275 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3")
276 }
245 if (length(group_colors) > 1) { 277 if (length(group_colors) > 1) {
246 names(group_colors) <- unique(xdata$sample_group) 278 names(group_colors) <- unique(xdata$sample_group)
247 plot(chrom, col = group_colors[as.factor(chrom$sample_group)], main=main, peakType = "none") 279 plot(chrom, col = group_colors[chrom$sample_group], main = main, peakType = "none")
248 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) 280 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1)
249 } 281 }
250 282
251 # Color by sample 283 # Color by sample
252 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main, peakType = "none") 284 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main = main, peakType = "none")
253 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) 285 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1)
254 286
255 dev.off() 287 dev.off()
256 } 288 }
257 289
258 290
259 # Get the polarities from all the samples of a condition 291 # Get the polarities from all the samples of a condition
260 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 292 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
261 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM 293 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
262 getSampleMetadata <- function(xdata=NULL, sampleMetadataOutput="sampleMetadata.tsv") { 294 getSampleMetadata <- function(xdata = NULL, sampleMetadataOutput = "sampleMetadata.tsv") {
263 cat("Creating the sampleMetadata file...\n") 295 cat("Creating the sampleMetadata file...\n")
264 296
265 #Create the sampleMetada dataframe 297 #Create the sampleMetada dataframe
266 sampleMetadata <- xdata@phenoData@data 298 sampleMetadata <- xdata@phenoData@data
267 rownames(sampleMetadata) <- NULL 299 rownames(sampleMetadata) <- NULL
271 sampleNamesMakeNames <- make.names(sampleNamesOrigin) 303 sampleNamesMakeNames <- make.names(sampleNamesOrigin)
272 304
273 if (any(duplicated(sampleNamesMakeNames))) { 305 if (any(duplicated(sampleNamesMakeNames))) {
274 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) 306 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())
275 for (sampleName in sampleNamesOrigin) { 307 for (sampleName in sampleNamesOrigin) {
276 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) 308 write(paste(sampleName, "\t->\t", make.names(sampleName)), stderr())
277 } 309 }
278 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") 310 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
279 } 311 }
280 312
281 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { 313 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) {
282 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") 314 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n")
283 for (sampleName in sampleNamesOrigin) { 315 for (sampleName in sampleNamesOrigin) {
284 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) 316 cat(paste(sampleName, "\t->\t", make.names(sampleName), "\n"))
285 } 317 }
286 } 318 }
287 319
288 sampleMetadata$sample_name <- sampleNamesMakeNames 320 sampleMetadata$sample_name <- sampleNamesMakeNames
289 321
290 322
291 #For each sample file, the following actions are done 323 #For each sample file, the following actions are done
292 for (fileIdx in 1:length(fileNames(xdata))) { 324 for (fileIdx in seq_len(length(fileNames(xdata)))) {
293 #Check if the file is in the CDF format 325 #Check if the file is in the CDF format
294 if (!mzR:::netCDFIsFile(fileNames(xdata))) { 326 if (!mzR:::netCDFIsFile(fileNames(xdata))) {
295 327
296 # If the column isn't exist, with add one filled with NA 328 # If the column isn't exist, with add one filled with NA
297 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA 329 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA
298 330
299 #Extract the polarity (a list of polarities) 331 #Extract the polarity (a list of polarities)
300 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx,"polarity"] 332 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx, "polarity"]
301 #Verify if all the scans have the same polarity 333 #Verify if all the scans have the same polarity
302 uniq_list <- unique(polarity) 334 uniq_list <- unique(polarity)
303 if (length(uniq_list)>1){ 335 if (length(uniq_list) > 1) {
304 polarity <- "mixed" 336 polarity <- "mixed"
305 } else { 337 } else {
306 polarity <- as.character(uniq_list) 338 polarity <- as.character(uniq_list)
307 } 339 }
308 340
310 sampleMetadata$polarity[fileIdx] <- polarity 342 sampleMetadata$polarity[fileIdx] <- polarity
311 } 343 }
312 344
313 } 345 }
314 346
315 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) 347 write.table(sampleMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = sampleMetadataOutput)
316 348
317 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames)) 349 return(list("sampleNamesOrigin" = sampleNamesOrigin, "sampleNamesMakeNames" = sampleNamesMakeNames))
318 350
319 } 351 }
320 352
321 353
322 # This function will compute MD5 checksum to check the data integrity 354 # This function will compute MD5 checksum to check the data integrity
323 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 355 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
324 getMd5sum <- function (files) { 356 getMd5sum <- function(files) {
325 cat("Compute md5 checksum...\n") 357 cat("Compute md5 checksum...\n")
326 library(tools) 358 library(tools)
327 return(as.matrix(md5sum(files))) 359 return(as.matrix(md5sum(files)))
328 } 360 }
329 361
330 # This function retrieve the raw file in the working directory 362 # This function retrieve the raw file in the working directory
331 # - if zipfile: unzip the file with its directory tree 363 # - if zipfile: unzip the file with its directory tree
332 # - if singlefiles: set symlink with the good filename 364 # - if singlefiles: set symlink with the good filename
333 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 365 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
334 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile, args, prefix="") { 366 retrieveRawfileInTheWorkingDir <- function(singlefile, zipfile, args, prefix = "") {
335 367
336 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") 368 if (!(prefix %in% c("", "Positive", "Negative", "MS1", "MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
337 369
338 # single - if the file are passed in the command arguments -> refresh singlefile 370 # single - if the file are passed in the command arguments -> refresh singlefile
339 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) { 371 if (!is.null(args[[paste0("singlefile_galaxyPath", prefix)]])) {
340 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath",prefix)]],"\\|")) 372 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath", prefix)]], "\\|"))
341 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName",prefix)]],"\\|")) 373 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName", prefix)]], "\\|"))
342 374
343 singlefile <- NULL 375 singlefile <- NULL
344 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { 376 for (singlefile_galaxyPath_i in seq_len(length(singlefile_galaxyPaths))) {
345 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] 377 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
346 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] 378 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
347 # In case, an url is used to import data within Galaxy 379 # In case, an url is used to import data within Galaxy
348 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1) 380 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName, "/")), n = 1)
349 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath 381 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
350 } 382 }
351 } 383 }
352 # zipfile - if the file are passed in the command arguments -> refresh zipfile 384 # zipfile - if the file are passed in the command arguments -> refresh zipfile
353 if (!is.null(args[[paste0("zipfile",prefix)]])) 385 if (!is.null(args[[paste0("zipfile", prefix)]]))
354 zipfile <- args[[paste0("zipfile",prefix)]] 386 zipfile <- args[[paste0("zipfile", prefix)]]
355 387
356 # single 388 # single
357 if(!is.null(singlefile) && (length("singlefile")>0)) { 389 if (!is.null(singlefile) && (length("singlefile") > 0)) {
358 files <- vector() 390 files <- vector()
359 for (singlefile_sampleName in names(singlefile)) { 391 for (singlefile_sampleName in names(singlefile)) {
360 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] 392 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
361 if(!file.exists(singlefile_galaxyPath)){ 393 if (!file.exists(singlefile_galaxyPath)) {
362 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") 394 error_message <- paste("Cannot access the sample:", singlefile_sampleName, "located:", singlefile_galaxyPath, ". Please, contact your administrator ... if you have one!")
363 print(error_message); stop(error_message) 395 print(error_message); stop(error_message)
364 } 396 }
365 397
366 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) 398 if (!suppressWarnings(try(file.link(singlefile_galaxyPath, singlefile_sampleName), silent = T)))
367 file.copy(singlefile_galaxyPath, singlefile_sampleName) 399 file.copy(singlefile_galaxyPath, singlefile_sampleName)
368 files <- c(files, singlefile_sampleName) 400 files <- c(files, singlefile_sampleName)
369 } 401 }
370 } 402 }
371 # zipfile 403 # zipfile
372 if(!is.null(zipfile) && (zipfile != "")) { 404 if (!is.null(zipfile) && (zipfile != "")) {
373 if(!file.exists(zipfile)){ 405 if (!file.exists(zipfile)) {
374 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") 406 error_message <- paste("Cannot access the Zip file:", zipfile, ". Please, contact your administrator ... if you have one!")
375 print(error_message) 407 print(error_message)
376 stop(error_message) 408 stop(error_message)
377 } 409 }
378 suppressWarnings(unzip(zipfile, unzip="unzip")) 410 suppressWarnings(unzip(zipfile, unzip = "unzip"))
379 411
380 #get the directory name 412 #get the directory name
381 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) 413 suppressWarnings(filesInZip <- unzip(zipfile, list = T))
382 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) 414 directories <- unique(unlist(lapply(strsplit(filesInZip$Name, "/"), function(x) x[1])))
383 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] 415 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
384 directory <- "." 416 directory <- "."
385 if (length(directories) == 1) directory <- directories 417 if (length(directories) == 1) directory <- directories
386 418
387 cat("files_root_directory\t",directory,"\n") 419 cat("files_root_directory\t", directory, "\n")
388 420
389 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") 421 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
390 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") 422 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
391 info <- file.info(directory) 423 info <- file.info(directory)
392 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) 424 listed <- list.files(directory[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE)
393 files <- c(directory[!info$isdir], listed) 425 files <- c(directory[!info$isdir], listed)
394 exists <- file.exists(files) 426 exists <- file.exists(files)
395 files <- files[exists] 427 files <- files[exists]
396 428
397 } 429 }
398 return(list(zipfile=zipfile, singlefile=singlefile, files=files)) 430 return(list(zipfile = zipfile, singlefile = singlefile, files = files))
399 431
400 } 432 }
401 433
402 434
403 # This function retrieve a xset like object 435 # This function retrieve a xset like object
404 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 436 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
405 getxcmsSetObject <- function(xobject) { 437 getxcmsSetObject <- function(xobject) {
406 # XCMS 1.x 438 # XCMS 1.x
407 if (class(xobject) == "xcmsSet") 439 if (class(xobject) == "xcmsSet")
408 return (xobject) 440 return(xobject)
409 # XCMS 3.x 441 # XCMS 3.x
410 if (class(xobject) == "XCMSnExp") { 442 if (class(xobject) == "XCMSnExp") {
411 # Get the legacy xcmsSet object 443 # Get the legacy xcmsSet object
412 suppressWarnings(xset <- as(xobject, 'xcmsSet')) 444 suppressWarnings(xset <- as(xobject, "xcmsSet"))
413 if (!is.null(xset@phenoData$sample_group)) 445 if (!is.null(xset@phenoData$sample_group))
414 sampclass(xset) <- xset@phenoData$sample_group 446 sampclass(xset) <- xset@phenoData$sample_group
415 else 447 else
416 sampclass(xset) <- "." 448 sampclass(xset) <- "."
417 return (xset) 449 return(xset)
418 } 450 }
419 } 451 }