Mercurial > repos > lecorguille > xcms_xcmsset
comparison lib.r @ 32:b02d1992a43a draft
"planemo upload for repository https://github.com/workflow4metabolomics/xcms commit dcc90f9cf76e6980c0a7d9698c89fab826e7adae"
author | workflow4metabolomics |
---|---|
date | Wed, 07 Apr 2021 16:47:14 +0000 |
parents | eb15a3841da4 |
children | f5d51091cf84 |
comparison
equal
deleted
inserted
replaced
31:eb15a3841da4 | 32:b02d1992a43a |
---|---|
4 #@author G. Le Corguille | 4 #@author G. Le Corguille |
5 # solve an issue with batch if arguments are logical TRUE/FALSE | 5 # solve an issue with batch if arguments are logical TRUE/FALSE |
6 parseCommandArgs <- function(...) { | 6 parseCommandArgs <- function(...) { |
7 args <- batch::parseCommandArgs(...) | 7 args <- batch::parseCommandArgs(...) |
8 for (key in names(args)) { | 8 for (key in names(args)) { |
9 if (args[key] %in% c("TRUE","FALSE")) | 9 if (args[key] %in% c("TRUE", "FALSE")) |
10 args[key] = as.logical(args[key]) | 10 args[key] <- as.logical(args[key]) |
11 } | 11 } |
12 return(args) | 12 return(args) |
13 } | 13 } |
14 | 14 |
15 #@author G. Le Corguille | 15 #@author G. Le Corguille |
16 # This function will | 16 # This function will |
17 # - load the packages | 17 # - load the packages |
18 # - display the sessionInfo | 18 # - display the sessionInfo |
19 loadAndDisplayPackages <- function(pkgs) { | 19 loadAndDisplayPackages <- function(pkgs) { |
20 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) | 20 for (pkg in pkgs) suppressPackageStartupMessages(stopifnot(library(pkg, quietly = TRUE, logical.return = TRUE, character.only = TRUE))) |
21 | 21 |
22 sessioninfo = sessionInfo() | 22 sessioninfo <- sessionInfo() |
23 cat(sessioninfo$R.version$version.string,"\n") | 23 cat(sessioninfo$R.version$version.string, "\n") |
24 cat("Main packages:\n") | 24 cat("Main packages:\n") |
25 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | 25 for (pkg in names(sessioninfo$otherPkgs)) { |
26 cat(paste(pkg, packageVersion(pkg)), "\t") | |
27 } | |
28 cat("\n") | |
26 cat("Other loaded packages:\n") | 29 cat("Other loaded packages:\n") |
27 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | 30 for (pkg in names(sessioninfo$loadedOnly)) { |
31 cat(paste(pkg, packageVersion(pkg)), "\t") | |
32 } | |
33 cat("\n") | |
28 } | 34 } |
29 | 35 |
30 #@author G. Le Corguille | 36 #@author G. Le Corguille |
31 # This function merge several chromBPI or chromTIC into one. | 37 # This function merge several chromBPI or chromTIC into one. |
32 mergeChrom <- function(chrom_merged, chrom) { | 38 mergeChrom <- function(chrom_merged, chrom) { |
41 chromTIC <- NULL | 47 chromTIC <- NULL |
42 chromBPI <- NULL | 48 chromBPI <- NULL |
43 chromTIC_adjusted <- NULL | 49 chromTIC_adjusted <- NULL |
44 chromBPI_adjusted <- NULL | 50 chromBPI_adjusted <- NULL |
45 md5sumList <- NULL | 51 md5sumList <- NULL |
46 for(image in args$images) { | 52 for (image in args$images) { |
47 | 53 |
48 load(image) | 54 load(image) |
49 # Handle infiles | 55 # Handle infiles |
50 if (!exists("singlefile")) singlefile <- NULL | 56 if (!exists("singlefile")) singlefile <- NULL |
51 if (!exists("zipfile")) zipfile <- NULL | 57 if (!exists("zipfile")) zipfile <- NULL |
52 rawFilePath <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile, args) | 58 rawFilePath <- retrieveRawfileInTheWorkingDir(singlefile, zipfile, args) |
53 zipfile <- rawFilePath$zipfile | 59 zipfile <- rawFilePath$zipfile |
54 singlefile <- rawFilePath$singlefile | 60 singlefile <- rawFilePath$singlefile |
55 | 61 |
56 if (exists("raw_data")) xdata <- raw_data | 62 if (exists("raw_data")) xdata <- raw_data |
57 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") | 63 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") |
58 | 64 |
59 cat(sampleNamesList$sampleNamesOrigin,"\n") | 65 cat(sampleNamesList$sampleNamesOrigin, "\n") |
60 | 66 |
61 if (!exists("xdata_merged")) { | 67 if (!exists("xdata_merged")) { |
62 xdata_merged <- xdata | 68 xdata_merged <- xdata |
63 singlefile_merged <- singlefile | 69 singlefile_merged <- singlefile |
64 md5sumList_merged <- md5sumList | 70 md5sumList_merged <- md5sumList |
66 chromTIC_merged <- chromTIC | 72 chromTIC_merged <- chromTIC |
67 chromBPI_merged <- chromBPI | 73 chromBPI_merged <- chromBPI |
68 chromTIC_adjusted_merged <- chromTIC_adjusted | 74 chromTIC_adjusted_merged <- chromTIC_adjusted |
69 chromBPI_adjusted_merged <- chromBPI_adjusted | 75 chromBPI_adjusted_merged <- chromBPI_adjusted |
70 } else { | 76 } else { |
71 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged,xdata) | 77 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged, xdata) |
72 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- xcms:::.concatenate_OnDiskMSnExp(xdata_merged,xdata) | 78 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- xcms:::.concatenate_OnDiskMSnExp(xdata_merged, xdata) |
73 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata") | 79 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata") |
74 | 80 |
75 singlefile_merged <- c(singlefile_merged,singlefile) | 81 singlefile_merged <- c(singlefile_merged, singlefile) |
76 md5sumList_merged$origin <- rbind(md5sumList_merged$origin,md5sumList$origin) | 82 md5sumList_merged$origin <- rbind(md5sumList_merged$origin, md5sumList$origin) |
77 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin,sampleNamesList$sampleNamesOrigin) | 83 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin, sampleNamesList$sampleNamesOrigin) |
78 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames,sampleNamesList$sampleNamesMakeNames) | 84 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames, sampleNamesList$sampleNamesMakeNames) |
79 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC) | 85 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC) |
80 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI) | 86 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI) |
81 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted) | 87 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted) |
82 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted) | 88 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted) |
83 } | 89 } |
89 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged) | 95 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged) |
90 | 96 |
91 if (!is.null(args$sampleMetadata)) { | 97 if (!is.null(args$sampleMetadata)) { |
92 cat("\tXSET PHENODATA SETTING...\n") | 98 cat("\tXSET PHENODATA SETTING...\n") |
93 sampleMetadataFile <- args$sampleMetadata | 99 sampleMetadataFile <- args$sampleMetadata |
94 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header=F) | 100 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header = F) |
95 xdata@phenoData@data$sample_group=sampleMetadata$V2[match(xdata@phenoData@data$sample_name,sampleMetadata$V1)] | 101 xdata@phenoData@data$sample_group <- sampleMetadata$V2[match(xdata@phenoData@data$sample_name, sampleMetadata$V1)] |
96 | 102 |
97 if (any(is.na(pData(xdata)$sample_group))) { | 103 if (any(is.na(pData(xdata)$sample_group))) { |
98 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)] | 104 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)] |
99 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse=" ")) | 105 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse = " ")) |
100 print(error_message) | 106 print(error_message) |
101 stop(error_message) | 107 stop(error_message) |
102 } | 108 } |
103 } | 109 } |
104 | 110 |
105 if (!is.null(chromTIC_merged)) { chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData } | 111 if (!is.null(chromTIC_merged)) { |
106 if (!is.null(chromBPI_merged)) { chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData } | 112 chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData |
107 if (!is.null(chromTIC_adjusted_merged)) { chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData } | 113 } |
108 if (!is.null(chromBPI_adjusted_merged)) { chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData } | 114 if (!is.null(chromBPI_merged)) { |
109 | 115 chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData |
110 return(list("xdata"=xdata, "singlefile"=singlefile, "md5sumList"=md5sumList,"sampleNamesList"=sampleNamesList, "chromTIC"=chromTIC, "chromBPI"=chromBPI, "chromTIC_adjusted"=chromTIC_adjusted, "chromBPI_adjusted"=chromBPI_adjusted)) | 116 } |
117 if (!is.null(chromTIC_adjusted_merged)) { | |
118 chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData | |
119 } | |
120 if (!is.null(chromBPI_adjusted_merged)) { | |
121 chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData | |
122 } | |
123 | |
124 return(list("xdata" = xdata, "singlefile" = singlefile, "md5sumList" = md5sumList, "sampleNamesList" = sampleNamesList, "chromTIC" = chromTIC, "chromBPI" = chromBPI, "chromTIC_adjusted" = chromTIC_adjusted, "chromBPI_adjusted" = chromBPI_adjusted)) | |
111 } | 125 } |
112 | 126 |
113 #@author G. Le Corguille | 127 #@author G. Le Corguille |
114 # This function convert if it is required the Retention Time in minutes | 128 # This function convert if it is required the Retention Time in minutes |
115 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { | 129 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { |
116 if (convertRTMinute){ | 130 if (convertRTMinute) { |
117 #converting the retention times (seconds) into minutes | 131 #converting the retention times (seconds) into minutes |
118 print("converting the retention times into minutes in the variableMetadata") | 132 print("converting the retention times into minutes in the variableMetadata") |
119 variableMetadata[,"rt"] <- variableMetadata[,"rt"]/60 | 133 variableMetadata[, "rt"] <- variableMetadata[, "rt"] / 60 |
120 variableMetadata[,"rtmin"] <- variableMetadata[,"rtmin"]/60 | 134 variableMetadata[, "rtmin"] <- variableMetadata[, "rtmin"] / 60 |
121 variableMetadata[,"rtmax"] <- variableMetadata[,"rtmax"]/60 | 135 variableMetadata[, "rtmax"] <- variableMetadata[, "rtmax"] / 60 |
122 } | 136 } |
123 return (variableMetadata) | 137 return(variableMetadata) |
124 } | 138 } |
125 | 139 |
126 #@author G. Le Corguille | 140 #@author G. Le Corguille |
127 # This function format ions identifiers | 141 # This function format ions identifiers |
128 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { | 142 formatIonIdentifiers <- function(variableMetadata, numDigitsRT = 0, numDigitsMZ = 0) { |
129 splitDeco <- strsplit(as.character(variableMetadata$name),"_") | 143 splitDeco <- strsplit(as.character(variableMetadata$name), "_") |
130 idsDeco <- sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) | 144 idsDeco <- sapply(splitDeco, |
131 namecustom <- make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) | 145 function(x) { |
132 variableMetadata <- cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) | 146 deco <- unlist(x)[2]; if (is.na(deco)) return("") else return(paste0("_", deco)) |
147 } | |
148 ) | |
149 namecustom <- make.unique(paste0("M", round(variableMetadata[, "mz"], numDigitsMZ), "T", round(variableMetadata[, "rt"], numDigitsRT), idsDeco)) | |
150 variableMetadata <- cbind(name = variableMetadata$name, namecustom = namecustom, variableMetadata[, !(colnames(variableMetadata) %in% c("name"))]) | |
133 return(variableMetadata) | 151 return(variableMetadata) |
134 } | 152 } |
135 | 153 |
136 #@author G. Le Corguille | 154 #@author G. Le Corguille |
137 # This function convert the remain NA to 0 in the dataMatrix | 155 # This function convert the remain NA to 0 in the dataMatrix |
138 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) { | 156 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) { |
139 if (naTOzero){ | 157 if (naTOzero) { |
140 dataMatrix[is.na(dataMatrix)] <- 0 | 158 dataMatrix[is.na(dataMatrix)] <- 0 |
141 } | 159 } |
142 return (dataMatrix) | 160 return(dataMatrix) |
143 } | 161 } |
144 | 162 |
145 #@author G. Le Corguille | 163 #@author G. Le Corguille |
146 # Draw the plotChromPeakDensity 3 per page in a pdf file | 164 # Draw the plotChromPeakDensity 3 per page in a pdf file |
147 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit=4) { | 165 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit = 4) { |
148 pdf(file="plotChromPeakDensity.pdf", width=16, height=12) | 166 pdf(file = "plotChromPeakDensity.pdf", width = 16, height = 12) |
149 | 167 |
150 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) | 168 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) |
151 | 169 |
152 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") | 170 if (length(unique(xdata$sample_group)) < 10) { |
171 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") | |
172 }else{ | |
173 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3") | |
174 } | |
153 names(group_colors) <- unique(xdata$sample_group) | 175 names(group_colors) <- unique(xdata$sample_group) |
154 col_per_samp <- as.character(xdata$sample_group) | 176 col_per_samp <- as.character(xdata$sample_group) |
155 for(i in 1:length(group_colors)){col_per_samp[col_per_samp==(names(group_colors)[i])]<-group_colors[i]} | 177 for (i in seq_len(length(group_colors))) { |
178 col_per_samp[col_per_samp == (names(group_colors)[i])] <- group_colors[i] | |
179 } | |
156 | 180 |
157 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) | 181 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) |
158 for (i in 1:nrow(featureDefinitions(xdata))) { | 182 for (i in seq_len(nrow(featureDefinitions(xdata)))) { |
159 mzmin = featureDefinitions(xdata)[i,]$mzmin | 183 mzmin <- featureDefinitions(xdata)[i, ]$mzmin |
160 mzmax = featureDefinitions(xdata)[i,]$mzmax | 184 mzmax <- featureDefinitions(xdata)[i, ]$mzmax |
161 plotChromPeakDensity(xdata, param = param, mz=c(mzmin,mzmax), col=col_per_samp, pch=16, xlim=xlim, main=paste(round(mzmin,mzdigit),round(mzmax,mzdigit))) | 185 plotChromPeakDensity(xdata, param = param, mz = c(mzmin, mzmax), col = col_per_samp, pch = 16, xlim = xlim, main = paste(round(mzmin, mzdigit), round(mzmax, mzdigit))) |
162 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | 186 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1) |
163 } | 187 } |
164 | 188 |
165 dev.off() | 189 dev.off() |
166 } | 190 } |
167 | 191 |
168 #@author G. Le Corguille | 192 #@author G. Le Corguille |
169 # Draw the plotChromPeakDensity 3 per page in a pdf file | 193 # Draw the plotChromPeakDensity 3 per page in a pdf file |
170 getPlotAdjustedRtime <- function(xdata) { | 194 getPlotAdjustedRtime <- function(xdata) { |
171 | 195 |
172 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) | 196 pdf(file = "raw_vs_adjusted_rt.pdf", width = 16, height = 12) |
173 | 197 |
174 # Color by group | 198 # Color by group |
175 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") | 199 if (length(unique(xdata$sample_group)) < 10) { |
200 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") | |
201 } else { | |
202 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3") | |
203 } | |
176 if (length(group_colors) > 1) { | 204 if (length(group_colors) > 1) { |
177 names(group_colors) <- unique(xdata$sample_group) | 205 names(group_colors) <- unique(xdata$sample_group) |
178 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) | 206 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) |
179 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | 207 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1) |
180 } | 208 } |
181 | 209 |
182 # Color by sample | 210 # Color by sample |
183 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) | 211 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) |
184 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) | 212 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1) |
185 | 213 |
186 dev.off() | 214 dev.off() |
187 } | 215 } |
188 | 216 |
189 #@author G. Le Corguille | 217 #@author G. Le Corguille |
190 # value: intensity values to be used into, maxo or intb | 218 # value: intensity values to be used into, maxo or intb |
191 getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, naTOzero=T, variableMetadataOutput, dataMatrixOutput, sampleNamesList) { | 219 getPeaklistW4M <- function(xdata, intval = "into", convertRTMinute = F, numDigitsMZ = 4, numDigitsRT = 0, naTOzero = T, variableMetadataOutput, dataMatrixOutput, sampleNamesList) { |
192 dataMatrix <- featureValues(xdata, method="medret", value=intval) | 220 dataMatrix <- featureValues(xdata, method = "medret", value = intval) |
193 colnames(dataMatrix) <- make.names(tools::file_path_sans_ext(colnames(dataMatrix))) | 221 colnames(dataMatrix) <- make.names(tools::file_path_sans_ext(colnames(dataMatrix))) |
194 dataMatrix = cbind(name=groupnames(xdata), dataMatrix) | 222 dataMatrix <- cbind(name = groupnames(xdata), dataMatrix) |
195 variableMetadata <- featureDefinitions(xdata) | 223 variableMetadata <- featureDefinitions(xdata) |
196 colnames(variableMetadata)[1] = "mz"; colnames(variableMetadata)[4] = "rt" | 224 colnames(variableMetadata)[1] <- "mz"; colnames(variableMetadata)[4] <- "rt" |
197 variableMetadata = data.frame(name=groupnames(xdata), variableMetadata) | 225 variableMetadata <- data.frame(name = groupnames(xdata), variableMetadata) |
198 | 226 |
199 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) | 227 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) |
200 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) | 228 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = numDigitsRT, numDigitsMZ = numDigitsMZ) |
201 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero) | 229 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero) |
202 | 230 |
203 # FIX: issue when the vector at peakidx is too long and is written in a new line during the export | 231 # FIX: issue when the vector at peakidx is too long and is written in a new line during the export |
204 variableMetadata[,"peakidx"] <- vapply(variableMetadata[,"peakidx"], FUN = paste, FUN.VALUE = character(1), collapse = ",") | 232 variableMetadata[, "peakidx"] <- vapply(variableMetadata[, "peakidx"], FUN = paste, FUN.VALUE = character(1), collapse = ",") |
205 | 233 |
206 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) | 234 write.table(variableMetadata, file = variableMetadataOutput, sep = "\t", quote = F, row.names = F) |
207 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) | 235 write.table(dataMatrix, file = dataMatrixOutput, sep = "\t", quote = F, row.names = F) |
208 | 236 |
209 } | 237 } |
210 | 238 |
211 #@author G. Le Corguille | 239 #@author G. Le Corguille |
212 # It allow different of field separators | 240 # It allow different of field separators |
213 getDataFrameFromFile <- function(filename, header=T) { | 241 getDataFrameFromFile <- function(filename, header = T) { |
214 myDataFrame <- read.table(filename, header=header, sep=";", stringsAsFactors=F) | 242 myDataFrame <- read.table(filename, header = header, sep = ";", stringsAsFactors = F) |
215 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep="\t", stringsAsFactors=F) | 243 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = "\t", stringsAsFactors = F) |
216 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep=",", stringsAsFactors=F) | 244 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = ",", stringsAsFactors = F) |
217 if (ncol(myDataFrame) < 2) { | 245 if (ncol(myDataFrame) < 2) { |
218 error_message="Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation" | 246 error_message <- "Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation" |
219 print(error_message) | 247 print(error_message) |
220 stop(error_message) | 248 stop(error_message) |
221 } | 249 } |
222 return(myDataFrame) | 250 return(myDataFrame) |
223 } | 251 } |
224 | 252 |
225 #@author G. Le Corguille | 253 #@author G. Le Corguille |
226 # Draw the BPI and TIC graphics | 254 # Draw the BPI and TIC graphics |
227 # colored by sample names or class names | 255 # colored by sample names or class names |
228 getPlotChromatogram <- function(chrom, xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") { | 256 getPlotChromatogram <- function(chrom, xdata, pdfname = "Chromatogram.pdf", aggregationFun = "max") { |
229 | 257 |
230 if (aggregationFun == "sum") | 258 if (aggregationFun == "sum") |
231 type="Total Ion Chromatograms" | 259 type <- "Total Ion Chromatograms" |
232 else | 260 else |
233 type="Base Peak Intensity Chromatograms" | 261 type <- "Base Peak Intensity Chromatograms" |
234 | 262 |
235 adjusted="Raw" | 263 adjusted <- "Raw" |
236 if (hasAdjustedRtime(xdata)) | 264 if (hasAdjustedRtime(xdata)) |
237 adjusted="Adjusted" | 265 adjusted <- "Adjusted" |
238 | 266 |
239 main <- paste(type,":",adjusted,"data") | 267 main <- paste(type, ":", adjusted, "data") |
240 | 268 |
241 pdf(pdfname, width=16, height=10) | 269 pdf(pdfname, width = 16, height = 10) |
242 | 270 |
243 # Color by group | 271 # Color by group |
244 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") | 272 if (length(unique(xdata$sample_group)) < 10) { |
273 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") | |
274 }else{ | |
275 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3") | |
276 } | |
245 if (length(group_colors) > 1) { | 277 if (length(group_colors) > 1) { |
246 names(group_colors) <- unique(xdata$sample_group) | 278 names(group_colors) <- unique(xdata$sample_group) |
247 plot(chrom, col = group_colors[as.factor(chrom$sample_group)], main=main, peakType = "none") | 279 plot(chrom, col = group_colors[chrom$sample_group], main = main, peakType = "none") |
248 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | 280 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1) |
249 } | 281 } |
250 | 282 |
251 # Color by sample | 283 # Color by sample |
252 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main, peakType = "none") | 284 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main = main, peakType = "none") |
253 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) | 285 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1) |
254 | 286 |
255 dev.off() | 287 dev.off() |
256 } | 288 } |
257 | 289 |
258 | 290 |
259 # Get the polarities from all the samples of a condition | 291 # Get the polarities from all the samples of a condition |
260 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 292 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
261 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | 293 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM |
262 getSampleMetadata <- function(xdata=NULL, sampleMetadataOutput="sampleMetadata.tsv") { | 294 getSampleMetadata <- function(xdata = NULL, sampleMetadataOutput = "sampleMetadata.tsv") { |
263 cat("Creating the sampleMetadata file...\n") | 295 cat("Creating the sampleMetadata file...\n") |
264 | 296 |
265 #Create the sampleMetada dataframe | 297 #Create the sampleMetada dataframe |
266 sampleMetadata <- xdata@phenoData@data | 298 sampleMetadata <- xdata@phenoData@data |
267 rownames(sampleMetadata) <- NULL | 299 rownames(sampleMetadata) <- NULL |
271 sampleNamesMakeNames <- make.names(sampleNamesOrigin) | 303 sampleNamesMakeNames <- make.names(sampleNamesOrigin) |
272 | 304 |
273 if (any(duplicated(sampleNamesMakeNames))) { | 305 if (any(duplicated(sampleNamesMakeNames))) { |
274 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) | 306 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) |
275 for (sampleName in sampleNamesOrigin) { | 307 for (sampleName in sampleNamesOrigin) { |
276 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) | 308 write(paste(sampleName, "\t->\t", make.names(sampleName)), stderr()) |
277 } | 309 } |
278 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | 310 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") |
279 } | 311 } |
280 | 312 |
281 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { | 313 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { |
282 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") | 314 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") |
283 for (sampleName in sampleNamesOrigin) { | 315 for (sampleName in sampleNamesOrigin) { |
284 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) | 316 cat(paste(sampleName, "\t->\t", make.names(sampleName), "\n")) |
285 } | 317 } |
286 } | 318 } |
287 | 319 |
288 sampleMetadata$sample_name <- sampleNamesMakeNames | 320 sampleMetadata$sample_name <- sampleNamesMakeNames |
289 | 321 |
290 | 322 |
291 #For each sample file, the following actions are done | 323 #For each sample file, the following actions are done |
292 for (fileIdx in 1:length(fileNames(xdata))) { | 324 for (fileIdx in seq_len(length(fileNames(xdata)))) { |
293 #Check if the file is in the CDF format | 325 #Check if the file is in the CDF format |
294 if (!mzR:::netCDFIsFile(fileNames(xdata))) { | 326 if (!mzR:::netCDFIsFile(fileNames(xdata))) { |
295 | 327 |
296 # If the column isn't exist, with add one filled with NA | 328 # If the column isn't exist, with add one filled with NA |
297 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA | 329 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA |
298 | 330 |
299 #Extract the polarity (a list of polarities) | 331 #Extract the polarity (a list of polarities) |
300 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx,"polarity"] | 332 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx, "polarity"] |
301 #Verify if all the scans have the same polarity | 333 #Verify if all the scans have the same polarity |
302 uniq_list <- unique(polarity) | 334 uniq_list <- unique(polarity) |
303 if (length(uniq_list)>1){ | 335 if (length(uniq_list) > 1) { |
304 polarity <- "mixed" | 336 polarity <- "mixed" |
305 } else { | 337 } else { |
306 polarity <- as.character(uniq_list) | 338 polarity <- as.character(uniq_list) |
307 } | 339 } |
308 | 340 |
310 sampleMetadata$polarity[fileIdx] <- polarity | 342 sampleMetadata$polarity[fileIdx] <- polarity |
311 } | 343 } |
312 | 344 |
313 } | 345 } |
314 | 346 |
315 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) | 347 write.table(sampleMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = sampleMetadataOutput) |
316 | 348 |
317 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames)) | 349 return(list("sampleNamesOrigin" = sampleNamesOrigin, "sampleNamesMakeNames" = sampleNamesMakeNames)) |
318 | 350 |
319 } | 351 } |
320 | 352 |
321 | 353 |
322 # This function will compute MD5 checksum to check the data integrity | 354 # This function will compute MD5 checksum to check the data integrity |
323 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 355 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
324 getMd5sum <- function (files) { | 356 getMd5sum <- function(files) { |
325 cat("Compute md5 checksum...\n") | 357 cat("Compute md5 checksum...\n") |
326 library(tools) | 358 library(tools) |
327 return(as.matrix(md5sum(files))) | 359 return(as.matrix(md5sum(files))) |
328 } | 360 } |
329 | 361 |
330 # This function retrieve the raw file in the working directory | 362 # This function retrieve the raw file in the working directory |
331 # - if zipfile: unzip the file with its directory tree | 363 # - if zipfile: unzip the file with its directory tree |
332 # - if singlefiles: set symlink with the good filename | 364 # - if singlefiles: set symlink with the good filename |
333 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 365 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
334 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile, args, prefix="") { | 366 retrieveRawfileInTheWorkingDir <- function(singlefile, zipfile, args, prefix = "") { |
335 | 367 |
336 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") | 368 if (!(prefix %in% c("", "Positive", "Negative", "MS1", "MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") |
337 | 369 |
338 # single - if the file are passed in the command arguments -> refresh singlefile | 370 # single - if the file are passed in the command arguments -> refresh singlefile |
339 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) { | 371 if (!is.null(args[[paste0("singlefile_galaxyPath", prefix)]])) { |
340 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath",prefix)]],"\\|")) | 372 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath", prefix)]], "\\|")) |
341 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName",prefix)]],"\\|")) | 373 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName", prefix)]], "\\|")) |
342 | 374 |
343 singlefile <- NULL | 375 singlefile <- NULL |
344 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { | 376 for (singlefile_galaxyPath_i in seq_len(length(singlefile_galaxyPaths))) { |
345 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] | 377 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] |
346 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] | 378 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] |
347 # In case, an url is used to import data within Galaxy | 379 # In case, an url is used to import data within Galaxy |
348 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1) | 380 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName, "/")), n = 1) |
349 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath | 381 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath |
350 } | 382 } |
351 } | 383 } |
352 # zipfile - if the file are passed in the command arguments -> refresh zipfile | 384 # zipfile - if the file are passed in the command arguments -> refresh zipfile |
353 if (!is.null(args[[paste0("zipfile",prefix)]])) | 385 if (!is.null(args[[paste0("zipfile", prefix)]])) |
354 zipfile <- args[[paste0("zipfile",prefix)]] | 386 zipfile <- args[[paste0("zipfile", prefix)]] |
355 | 387 |
356 # single | 388 # single |
357 if(!is.null(singlefile) && (length("singlefile")>0)) { | 389 if (!is.null(singlefile) && (length("singlefile") > 0)) { |
358 files <- vector() | 390 files <- vector() |
359 for (singlefile_sampleName in names(singlefile)) { | 391 for (singlefile_sampleName in names(singlefile)) { |
360 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] | 392 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] |
361 if(!file.exists(singlefile_galaxyPath)){ | 393 if (!file.exists(singlefile_galaxyPath)) { |
362 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") | 394 error_message <- paste("Cannot access the sample:", singlefile_sampleName, "located:", singlefile_galaxyPath, ". Please, contact your administrator ... if you have one!") |
363 print(error_message); stop(error_message) | 395 print(error_message); stop(error_message) |
364 } | 396 } |
365 | 397 |
366 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) | 398 if (!suppressWarnings(try(file.link(singlefile_galaxyPath, singlefile_sampleName), silent = T))) |
367 file.copy(singlefile_galaxyPath, singlefile_sampleName) | 399 file.copy(singlefile_galaxyPath, singlefile_sampleName) |
368 files <- c(files, singlefile_sampleName) | 400 files <- c(files, singlefile_sampleName) |
369 } | 401 } |
370 } | 402 } |
371 # zipfile | 403 # zipfile |
372 if(!is.null(zipfile) && (zipfile != "")) { | 404 if (!is.null(zipfile) && (zipfile != "")) { |
373 if(!file.exists(zipfile)){ | 405 if (!file.exists(zipfile)) { |
374 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") | 406 error_message <- paste("Cannot access the Zip file:", zipfile, ". Please, contact your administrator ... if you have one!") |
375 print(error_message) | 407 print(error_message) |
376 stop(error_message) | 408 stop(error_message) |
377 } | 409 } |
378 suppressWarnings(unzip(zipfile, unzip="unzip")) | 410 suppressWarnings(unzip(zipfile, unzip = "unzip")) |
379 | 411 |
380 #get the directory name | 412 #get the directory name |
381 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) | 413 suppressWarnings(filesInZip <- unzip(zipfile, list = T)) |
382 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) | 414 directories <- unique(unlist(lapply(strsplit(filesInZip$Name, "/"), function(x) x[1]))) |
383 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] | 415 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] |
384 directory <- "." | 416 directory <- "." |
385 if (length(directories) == 1) directory <- directories | 417 if (length(directories) == 1) directory <- directories |
386 | 418 |
387 cat("files_root_directory\t",directory,"\n") | 419 cat("files_root_directory\t", directory, "\n") |
388 | 420 |
389 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | 421 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") |
390 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | 422 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") |
391 info <- file.info(directory) | 423 info <- file.info(directory) |
392 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) | 424 listed <- list.files(directory[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) |
393 files <- c(directory[!info$isdir], listed) | 425 files <- c(directory[!info$isdir], listed) |
394 exists <- file.exists(files) | 426 exists <- file.exists(files) |
395 files <- files[exists] | 427 files <- files[exists] |
396 | 428 |
397 } | 429 } |
398 return(list(zipfile=zipfile, singlefile=singlefile, files=files)) | 430 return(list(zipfile = zipfile, singlefile = singlefile, files = files)) |
399 | 431 |
400 } | 432 } |
401 | 433 |
402 | 434 |
403 # This function retrieve a xset like object | 435 # This function retrieve a xset like object |
404 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 436 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
405 getxcmsSetObject <- function(xobject) { | 437 getxcmsSetObject <- function(xobject) { |
406 # XCMS 1.x | 438 # XCMS 1.x |
407 if (class(xobject) == "xcmsSet") | 439 if (class(xobject) == "xcmsSet") |
408 return (xobject) | 440 return(xobject) |
409 # XCMS 3.x | 441 # XCMS 3.x |
410 if (class(xobject) == "XCMSnExp") { | 442 if (class(xobject) == "XCMSnExp") { |
411 # Get the legacy xcmsSet object | 443 # Get the legacy xcmsSet object |
412 suppressWarnings(xset <- as(xobject, 'xcmsSet')) | 444 suppressWarnings(xset <- as(xobject, "xcmsSet")) |
413 if (!is.null(xset@phenoData$sample_group)) | 445 if (!is.null(xset@phenoData$sample_group)) |
414 sampclass(xset) <- xset@phenoData$sample_group | 446 sampclass(xset) <- xset@phenoData$sample_group |
415 else | 447 else |
416 sampclass(xset) <- "." | 448 sampclass(xset) <- "." |
417 return (xset) | 449 return(xset) |
418 } | 450 } |
419 } | 451 } |