Mercurial > repos > lecorguille > xcms_group
comparison lib.r @ 27:d45a786cbc40 draft
"planemo upload for repository https://github.com/workflow4metabolomics/xcms commit dcc90f9cf76e6980c0a7d9698c89fab826e7adae"
author | workflow4metabolomics |
---|---|
date | Wed, 07 Apr 2021 12:06:33 +0000 |
parents | 0d05d0458376 |
children | 2b676d5eb848 |
comparison
equal
deleted
inserted
replaced
26:0d05d0458376 | 27:d45a786cbc40 |
---|---|
4 #@author G. Le Corguille | 4 #@author G. Le Corguille |
5 # solve an issue with batch if arguments are logical TRUE/FALSE | 5 # solve an issue with batch if arguments are logical TRUE/FALSE |
6 parseCommandArgs <- function(...) { | 6 parseCommandArgs <- function(...) { |
7 args <- batch::parseCommandArgs(...) | 7 args <- batch::parseCommandArgs(...) |
8 for (key in names(args)) { | 8 for (key in names(args)) { |
9 if (args[key] %in% c("TRUE","FALSE")) | 9 if (args[key] %in% c("TRUE", "FALSE")) |
10 args[key] = as.logical(args[key]) | 10 args[key] <- as.logical(args[key]) |
11 } | 11 } |
12 return(args) | 12 return(args) |
13 } | 13 } |
14 | 14 |
15 #@author G. Le Corguille | 15 #@author G. Le Corguille |
16 # This function will | 16 # This function will |
17 # - load the packages | 17 # - load the packages |
18 # - display the sessionInfo | 18 # - display the sessionInfo |
19 loadAndDisplayPackages <- function(pkgs) { | 19 loadAndDisplayPackages <- function(pkgs) { |
20 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) | 20 for (pkg in pkgs) suppressPackageStartupMessages(stopifnot(library(pkg, quietly = TRUE, logical.return = TRUE, character.only = TRUE))) |
21 | 21 |
22 sessioninfo = sessionInfo() | 22 sessioninfo <- sessionInfo() |
23 cat(sessioninfo$R.version$version.string,"\n") | 23 cat(sessioninfo$R.version$version.string, "\n") |
24 cat("Main packages:\n") | 24 cat("Main packages:\n") |
25 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | 25 for (pkg in names(sessioninfo$otherPkgs)) { |
26 cat(paste(pkg, packageVersion(pkg)), "\t") | |
27 } | |
28 cat("\n") | |
26 cat("Other loaded packages:\n") | 29 cat("Other loaded packages:\n") |
27 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | 30 for (pkg in names(sessioninfo$loadedOnly)) { |
31 cat(paste(pkg, packageVersion(pkg)), "\t") | |
32 } | |
33 cat("\n") | |
28 } | 34 } |
29 | 35 |
30 #@author G. Le Corguille | 36 #@author G. Le Corguille |
31 # This function merge several chromBPI or chromTIC into one. | 37 # This function merge several chromBPI or chromTIC into one. |
32 mergeChrom <- function(chrom_merged, chrom) { | 38 mergeChrom <- function(chrom_merged, chrom) { |
41 chromTIC <- NULL | 47 chromTIC <- NULL |
42 chromBPI <- NULL | 48 chromBPI <- NULL |
43 chromTIC_adjusted <- NULL | 49 chromTIC_adjusted <- NULL |
44 chromBPI_adjusted <- NULL | 50 chromBPI_adjusted <- NULL |
45 md5sumList <- NULL | 51 md5sumList <- NULL |
46 for(image in args$images) { | 52 for (image in args$images) { |
47 | 53 |
48 load(image) | 54 load(image) |
49 # Handle infiles | 55 # Handle infiles |
50 if (!exists("singlefile")) singlefile <- NULL | 56 if (!exists("singlefile")) singlefile <- NULL |
51 if (!exists("zipfile")) zipfile <- NULL | 57 if (!exists("zipfile")) zipfile <- NULL |
52 rawFilePath <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile, args) | 58 rawFilePath <- retrieveRawfileInTheWorkingDir(singlefile, zipfile, args) |
53 zipfile <- rawFilePath$zipfile | 59 zipfile <- rawFilePath$zipfile |
54 singlefile <- rawFilePath$singlefile | 60 singlefile <- rawFilePath$singlefile |
55 | 61 |
56 if (exists("raw_data")) xdata <- raw_data | 62 if (exists("raw_data")) xdata <- raw_data |
57 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") | 63 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") |
58 | 64 |
59 cat(sampleNamesList$sampleNamesOrigin,"\n") | 65 cat(sampleNamesList$sampleNamesOrigin, "\n") |
60 | 66 |
61 if (!exists("xdata_merged")) { | 67 if (!exists("xdata_merged")) { |
62 xdata_merged <- xdata | 68 xdata_merged <- xdata |
63 singlefile_merged <- singlefile | 69 singlefile_merged <- singlefile |
64 md5sumList_merged <- md5sumList | 70 md5sumList_merged <- md5sumList |
66 chromTIC_merged <- chromTIC | 72 chromTIC_merged <- chromTIC |
67 chromBPI_merged <- chromBPI | 73 chromBPI_merged <- chromBPI |
68 chromTIC_adjusted_merged <- chromTIC_adjusted | 74 chromTIC_adjusted_merged <- chromTIC_adjusted |
69 chromBPI_adjusted_merged <- chromBPI_adjusted | 75 chromBPI_adjusted_merged <- chromBPI_adjusted |
70 } else { | 76 } else { |
71 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged,xdata) | 77 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged, xdata) |
72 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- xcms:::.concatenate_OnDiskMSnExp(xdata_merged,xdata) | 78 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- xcms:::.concatenate_OnDiskMSnExp(xdata_merged, xdata) |
73 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata") | 79 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata") |
74 | 80 |
75 singlefile_merged <- c(singlefile_merged,singlefile) | 81 singlefile_merged <- c(singlefile_merged, singlefile) |
76 md5sumList_merged$origin <- rbind(md5sumList_merged$origin,md5sumList$origin) | 82 md5sumList_merged$origin <- rbind(md5sumList_merged$origin, md5sumList$origin) |
77 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin,sampleNamesList$sampleNamesOrigin) | 83 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin, sampleNamesList$sampleNamesOrigin) |
78 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames,sampleNamesList$sampleNamesMakeNames) | 84 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames, sampleNamesList$sampleNamesMakeNames) |
79 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC) | 85 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC) |
80 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI) | 86 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI) |
81 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted) | 87 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted) |
82 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted) | 88 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted) |
83 } | 89 } |
89 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged) | 95 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged) |
90 | 96 |
91 if (!is.null(args$sampleMetadata)) { | 97 if (!is.null(args$sampleMetadata)) { |
92 cat("\tXSET PHENODATA SETTING...\n") | 98 cat("\tXSET PHENODATA SETTING...\n") |
93 sampleMetadataFile <- args$sampleMetadata | 99 sampleMetadataFile <- args$sampleMetadata |
94 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header=F) | 100 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header = F) |
95 xdata@phenoData@data$sample_group=sampleMetadata$V2[match(xdata@phenoData@data$sample_name,sampleMetadata$V1)] | 101 xdata@phenoData@data$sample_group <- sampleMetadata$V2[match(xdata@phenoData@data$sample_name, sampleMetadata$V1)] |
96 | 102 |
97 if (any(is.na(pData(xdata)$sample_group))) { | 103 if (any(is.na(pData(xdata)$sample_group))) { |
98 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)] | 104 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)] |
99 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse=" ")) | 105 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse = " ")) |
100 print(error_message) | 106 print(error_message) |
101 stop(error_message) | 107 stop(error_message) |
102 } | 108 } |
103 } | 109 } |
104 | 110 |
105 if (!is.null(chromTIC_merged)) { chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData } | 111 if (!is.null(chromTIC_merged)) { |
106 if (!is.null(chromBPI_merged)) { chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData } | 112 chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData |
107 if (!is.null(chromTIC_adjusted_merged)) { chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData } | 113 } |
108 if (!is.null(chromBPI_adjusted_merged)) { chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData } | 114 if (!is.null(chromBPI_merged)) { |
109 | 115 chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData |
110 return(list("xdata"=xdata, "singlefile"=singlefile, "md5sumList"=md5sumList,"sampleNamesList"=sampleNamesList, "chromTIC"=chromTIC, "chromBPI"=chromBPI, "chromTIC_adjusted"=chromTIC_adjusted, "chromBPI_adjusted"=chromBPI_adjusted)) | 116 } |
117 if (!is.null(chromTIC_adjusted_merged)) { | |
118 chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData | |
119 } | |
120 if (!is.null(chromBPI_adjusted_merged)) { | |
121 chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData | |
122 } | |
123 | |
124 return(list("xdata" = xdata, "singlefile" = singlefile, "md5sumList" = md5sumList, "sampleNamesList" = sampleNamesList, "chromTIC" = chromTIC, "chromBPI" = chromBPI, "chromTIC_adjusted" = chromTIC_adjusted, "chromBPI_adjusted" = chromBPI_adjusted)) | |
111 } | 125 } |
112 | 126 |
113 #@author G. Le Corguille | 127 #@author G. Le Corguille |
114 # This function convert if it is required the Retention Time in minutes | 128 # This function convert if it is required the Retention Time in minutes |
115 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { | 129 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { |
116 if (convertRTMinute){ | 130 if (convertRTMinute) { |
117 #converting the retention times (seconds) into minutes | 131 #converting the retention times (seconds) into minutes |
118 print("converting the retention times into minutes in the variableMetadata") | 132 print("converting the retention times into minutes in the variableMetadata") |
119 variableMetadata[,"rt"] <- variableMetadata[,"rt"]/60 | 133 variableMetadata[, "rt"] <- variableMetadata[, "rt"] / 60 |
120 variableMetadata[,"rtmin"] <- variableMetadata[,"rtmin"]/60 | 134 variableMetadata[, "rtmin"] <- variableMetadata[, "rtmin"] / 60 |
121 variableMetadata[,"rtmax"] <- variableMetadata[,"rtmax"]/60 | 135 variableMetadata[, "rtmax"] <- variableMetadata[, "rtmax"] / 60 |
122 } | 136 } |
123 return (variableMetadata) | 137 return(variableMetadata) |
124 } | 138 } |
125 | 139 |
126 #@author G. Le Corguille | 140 #@author G. Le Corguille |
127 # This function format ions identifiers | 141 # This function format ions identifiers |
128 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { | 142 formatIonIdentifiers <- function(variableMetadata, numDigitsRT = 0, numDigitsMZ = 0) { |
129 splitDeco <- strsplit(as.character(variableMetadata$name),"_") | 143 splitDeco <- strsplit(as.character(variableMetadata$name), "_") |
130 idsDeco <- sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) | 144 idsDeco <- sapply(splitDeco, |
131 namecustom <- make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) | 145 function(x) { |
132 variableMetadata <- cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) | 146 deco <- unlist(x)[2]; if (is.na(deco)) return("") else return(paste0("_", deco)) |
147 } | |
148 ) | |
149 namecustom <- make.unique(paste0("M", round(variableMetadata[, "mz"], numDigitsMZ), "T", round(variableMetadata[, "rt"], numDigitsRT), idsDeco)) | |
150 variableMetadata <- cbind(name = variableMetadata$name, namecustom = namecustom, variableMetadata[, !(colnames(variableMetadata) %in% c("name"))]) | |
133 return(variableMetadata) | 151 return(variableMetadata) |
134 } | 152 } |
135 | 153 |
136 #@author G. Le Corguille | 154 #@author G. Le Corguille |
137 # This function convert the remain NA to 0 in the dataMatrix | 155 # This function convert the remain NA to 0 in the dataMatrix |
138 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) { | 156 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) { |
139 if (naTOzero){ | 157 if (naTOzero) { |
140 dataMatrix[is.na(dataMatrix)] <- 0 | 158 dataMatrix[is.na(dataMatrix)] <- 0 |
141 } | 159 } |
142 return (dataMatrix) | 160 return(dataMatrix) |
143 } | 161 } |
144 | 162 |
145 #@author G. Le Corguille | 163 #@author G. Le Corguille |
146 # Draw the plotChromPeakDensity 3 per page in a pdf file | 164 # Draw the plotChromPeakDensity 3 per page in a pdf file |
147 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit=4) { | 165 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit = 4) { |
148 pdf(file="plotChromPeakDensity.pdf", width=16, height=12) | 166 pdf(file = "plotChromPeakDensity.pdf", width = 16, height = 12) |
149 | 167 |
150 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) | 168 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) |
151 | 169 |
152 if(length(unique(xdata$sample_group))<10){ | 170 if (length(unique(xdata$sample_group)) < 10) { |
153 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") | 171 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") |
154 }else{ | 172 }else{ |
155 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette="Dark 3") | 173 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3") |
156 } | 174 } |
157 names(group_colors) <- unique(xdata$sample_group) | 175 names(group_colors) <- unique(xdata$sample_group) |
158 col_per_samp <- as.character(xdata$sample_group) | 176 col_per_samp <- as.character(xdata$sample_group) |
159 for(i in 1:length(group_colors)){col_per_samp[col_per_samp==(names(group_colors)[i])]<-group_colors[i]} | 177 for (i in seq_len(length(group_colors))) { |
178 col_per_samp[col_per_samp == (names(group_colors)[i])] <- group_colors[i] | |
179 } | |
160 | 180 |
161 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) | 181 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) |
162 for (i in 1:nrow(featureDefinitions(xdata))) { | 182 for (i in seq_len(nrow(featureDefinitions(xdata)))) { |
163 mzmin = featureDefinitions(xdata)[i,]$mzmin | 183 mzmin <- featureDefinitions(xdata)[i, ]$mzmin |
164 mzmax = featureDefinitions(xdata)[i,]$mzmax | 184 mzmax <- featureDefinitions(xdata)[i, ]$mzmax |
165 plotChromPeakDensity(xdata, param = param, mz=c(mzmin,mzmax), col=col_per_samp, pch=16, xlim=xlim, main=paste(round(mzmin,mzdigit),round(mzmax,mzdigit))) | 185 plotChromPeakDensity(xdata, param = param, mz = c(mzmin, mzmax), col = col_per_samp, pch = 16, xlim = xlim, main = paste(round(mzmin, mzdigit), round(mzmax, mzdigit))) |
166 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | 186 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1) |
167 } | 187 } |
168 | 188 |
169 dev.off() | 189 dev.off() |
170 } | 190 } |
171 | 191 |
172 #@author G. Le Corguille | 192 #@author G. Le Corguille |
173 # Draw the plotChromPeakDensity 3 per page in a pdf file | 193 # Draw the plotChromPeakDensity 3 per page in a pdf file |
174 getPlotAdjustedRtime <- function(xdata) { | 194 getPlotAdjustedRtime <- function(xdata) { |
175 | 195 |
176 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) | 196 pdf(file = "raw_vs_adjusted_rt.pdf", width = 16, height = 12) |
177 | 197 |
178 # Color by group | 198 # Color by group |
179 if(length(unique(xdata$sample_group))<10){ | 199 if (length(unique(xdata$sample_group)) < 10) { |
180 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") | 200 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") |
181 }else{ | 201 } else { |
182 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette="Dark 3") | 202 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3") |
183 } | 203 } |
184 if (length(group_colors) > 1) { | 204 if (length(group_colors) > 1) { |
185 names(group_colors) <- unique(xdata$sample_group) | 205 names(group_colors) <- unique(xdata$sample_group) |
186 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) | 206 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) |
187 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | 207 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1) |
188 } | 208 } |
189 | 209 |
190 # Color by sample | 210 # Color by sample |
191 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) | 211 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) |
192 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) | 212 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1) |
193 | 213 |
194 dev.off() | 214 dev.off() |
195 } | 215 } |
196 | 216 |
197 #@author G. Le Corguille | 217 #@author G. Le Corguille |
198 # value: intensity values to be used into, maxo or intb | 218 # value: intensity values to be used into, maxo or intb |
199 getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, naTOzero=T, variableMetadataOutput, dataMatrixOutput, sampleNamesList) { | 219 getPeaklistW4M <- function(xdata, intval = "into", convertRTMinute = F, numDigitsMZ = 4, numDigitsRT = 0, naTOzero = T, variableMetadataOutput, dataMatrixOutput, sampleNamesList) { |
200 dataMatrix <- featureValues(xdata, method="medret", value=intval) | 220 dataMatrix <- featureValues(xdata, method = "medret", value = intval) |
201 colnames(dataMatrix) <- make.names(tools::file_path_sans_ext(colnames(dataMatrix))) | 221 colnames(dataMatrix) <- make.names(tools::file_path_sans_ext(colnames(dataMatrix))) |
202 dataMatrix = cbind(name=groupnames(xdata), dataMatrix) | 222 dataMatrix <- cbind(name = groupnames(xdata), dataMatrix) |
203 variableMetadata <- featureDefinitions(xdata) | 223 variableMetadata <- featureDefinitions(xdata) |
204 colnames(variableMetadata)[1] = "mz"; colnames(variableMetadata)[4] = "rt" | 224 colnames(variableMetadata)[1] <- "mz"; colnames(variableMetadata)[4] <- "rt" |
205 variableMetadata = data.frame(name=groupnames(xdata), variableMetadata) | 225 variableMetadata <- data.frame(name = groupnames(xdata), variableMetadata) |
206 | 226 |
207 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) | 227 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) |
208 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) | 228 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = numDigitsRT, numDigitsMZ = numDigitsMZ) |
209 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero) | 229 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero) |
210 | 230 |
211 # FIX: issue when the vector at peakidx is too long and is written in a new line during the export | 231 # FIX: issue when the vector at peakidx is too long and is written in a new line during the export |
212 variableMetadata[,"peakidx"] <- vapply(variableMetadata[,"peakidx"], FUN = paste, FUN.VALUE = character(1), collapse = ",") | 232 variableMetadata[, "peakidx"] <- vapply(variableMetadata[, "peakidx"], FUN = paste, FUN.VALUE = character(1), collapse = ",") |
213 | 233 |
214 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) | 234 write.table(variableMetadata, file = variableMetadataOutput, sep = "\t", quote = F, row.names = F) |
215 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) | 235 write.table(dataMatrix, file = dataMatrixOutput, sep = "\t", quote = F, row.names = F) |
216 | 236 |
217 } | 237 } |
218 | 238 |
219 #@author G. Le Corguille | 239 #@author G. Le Corguille |
220 # It allow different of field separators | 240 # It allow different of field separators |
221 getDataFrameFromFile <- function(filename, header=T) { | 241 getDataFrameFromFile <- function(filename, header = T) { |
222 myDataFrame <- read.table(filename, header=header, sep=";", stringsAsFactors=F) | 242 myDataFrame <- read.table(filename, header = header, sep = ";", stringsAsFactors = F) |
223 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep="\t", stringsAsFactors=F) | 243 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = "\t", stringsAsFactors = F) |
224 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep=",", stringsAsFactors=F) | 244 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = ",", stringsAsFactors = F) |
225 if (ncol(myDataFrame) < 2) { | 245 if (ncol(myDataFrame) < 2) { |
226 error_message="Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation" | 246 error_message <- "Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation" |
227 print(error_message) | 247 print(error_message) |
228 stop(error_message) | 248 stop(error_message) |
229 } | 249 } |
230 return(myDataFrame) | 250 return(myDataFrame) |
231 } | 251 } |
232 | 252 |
233 #@author G. Le Corguille | 253 #@author G. Le Corguille |
234 # Draw the BPI and TIC graphics | 254 # Draw the BPI and TIC graphics |
235 # colored by sample names or class names | 255 # colored by sample names or class names |
236 getPlotChromatogram <- function(chrom, xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") { | 256 getPlotChromatogram <- function(chrom, xdata, pdfname = "Chromatogram.pdf", aggregationFun = "max") { |
237 | 257 |
238 if (aggregationFun == "sum") | 258 if (aggregationFun == "sum") |
239 type="Total Ion Chromatograms" | 259 type <- "Total Ion Chromatograms" |
240 else | 260 else |
241 type="Base Peak Intensity Chromatograms" | 261 type <- "Base Peak Intensity Chromatograms" |
242 | 262 |
243 adjusted="Raw" | 263 adjusted <- "Raw" |
244 if (hasAdjustedRtime(xdata)) | 264 if (hasAdjustedRtime(xdata)) |
245 adjusted="Adjusted" | 265 adjusted <- "Adjusted" |
246 | 266 |
247 main <- paste(type,":",adjusted,"data") | 267 main <- paste(type, ":", adjusted, "data") |
248 | 268 |
249 pdf(pdfname, width=16, height=10) | 269 pdf(pdfname, width = 16, height = 10) |
250 | 270 |
251 # Color by group | 271 # Color by group |
252 if(length(unique(xdata$sample_group))<10){ | 272 if (length(unique(xdata$sample_group)) < 10) { |
253 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") | 273 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") |
254 }else{ | 274 }else{ |
255 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette="Dark 3") | 275 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3") |
256 } | 276 } |
257 if (length(group_colors) > 1) { | 277 if (length(group_colors) > 1) { |
258 names(group_colors) <- unique(xdata$sample_group) | 278 names(group_colors) <- unique(xdata$sample_group) |
259 plot(chrom, col = group_colors[as.factor(chrom$sample_group)], main=main, peakType = "none") | 279 plot(chrom, col = group_colors[chrom$sample_group], main = main, peakType = "none") |
260 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | 280 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1) |
261 } | 281 } |
262 | 282 |
263 # Color by sample | 283 # Color by sample |
264 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main, peakType = "none") | 284 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main = main, peakType = "none") |
265 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) | 285 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1) |
266 | 286 |
267 dev.off() | 287 dev.off() |
268 } | 288 } |
269 | 289 |
270 | 290 |
271 # Get the polarities from all the samples of a condition | 291 # Get the polarities from all the samples of a condition |
272 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 292 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
273 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | 293 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM |
274 getSampleMetadata <- function(xdata=NULL, sampleMetadataOutput="sampleMetadata.tsv") { | 294 getSampleMetadata <- function(xdata = NULL, sampleMetadataOutput = "sampleMetadata.tsv") { |
275 cat("Creating the sampleMetadata file...\n") | 295 cat("Creating the sampleMetadata file...\n") |
276 | 296 |
277 #Create the sampleMetada dataframe | 297 #Create the sampleMetada dataframe |
278 sampleMetadata <- xdata@phenoData@data | 298 sampleMetadata <- xdata@phenoData@data |
279 rownames(sampleMetadata) <- NULL | 299 rownames(sampleMetadata) <- NULL |
283 sampleNamesMakeNames <- make.names(sampleNamesOrigin) | 303 sampleNamesMakeNames <- make.names(sampleNamesOrigin) |
284 | 304 |
285 if (any(duplicated(sampleNamesMakeNames))) { | 305 if (any(duplicated(sampleNamesMakeNames))) { |
286 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) | 306 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) |
287 for (sampleName in sampleNamesOrigin) { | 307 for (sampleName in sampleNamesOrigin) { |
288 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) | 308 write(paste(sampleName, "\t->\t", make.names(sampleName)), stderr()) |
289 } | 309 } |
290 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | 310 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") |
291 } | 311 } |
292 | 312 |
293 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { | 313 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { |
294 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") | 314 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") |
295 for (sampleName in sampleNamesOrigin) { | 315 for (sampleName in sampleNamesOrigin) { |
296 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) | 316 cat(paste(sampleName, "\t->\t", make.names(sampleName), "\n")) |
297 } | 317 } |
298 } | 318 } |
299 | 319 |
300 sampleMetadata$sample_name <- sampleNamesMakeNames | 320 sampleMetadata$sample_name <- sampleNamesMakeNames |
301 | 321 |
302 | 322 |
303 #For each sample file, the following actions are done | 323 #For each sample file, the following actions are done |
304 for (fileIdx in 1:length(fileNames(xdata))) { | 324 for (fileIdx in seq_len(length(fileNames(xdata)))) { |
305 #Check if the file is in the CDF format | 325 #Check if the file is in the CDF format |
306 if (!mzR:::netCDFIsFile(fileNames(xdata))) { | 326 if (!mzR:::netCDFIsFile(fileNames(xdata))) { |
307 | 327 |
308 # If the column isn't exist, with add one filled with NA | 328 # If the column isn't exist, with add one filled with NA |
309 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA | 329 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA |
310 | 330 |
311 #Extract the polarity (a list of polarities) | 331 #Extract the polarity (a list of polarities) |
312 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx,"polarity"] | 332 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx, "polarity"] |
313 #Verify if all the scans have the same polarity | 333 #Verify if all the scans have the same polarity |
314 uniq_list <- unique(polarity) | 334 uniq_list <- unique(polarity) |
315 if (length(uniq_list)>1){ | 335 if (length(uniq_list) > 1) { |
316 polarity <- "mixed" | 336 polarity <- "mixed" |
317 } else { | 337 } else { |
318 polarity <- as.character(uniq_list) | 338 polarity <- as.character(uniq_list) |
319 } | 339 } |
320 | 340 |
322 sampleMetadata$polarity[fileIdx] <- polarity | 342 sampleMetadata$polarity[fileIdx] <- polarity |
323 } | 343 } |
324 | 344 |
325 } | 345 } |
326 | 346 |
327 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) | 347 write.table(sampleMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = sampleMetadataOutput) |
328 | 348 |
329 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames)) | 349 return(list("sampleNamesOrigin" = sampleNamesOrigin, "sampleNamesMakeNames" = sampleNamesMakeNames)) |
330 | 350 |
331 } | 351 } |
332 | 352 |
333 | 353 |
334 # This function will compute MD5 checksum to check the data integrity | 354 # This function will compute MD5 checksum to check the data integrity |
335 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 355 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
336 getMd5sum <- function (files) { | 356 getMd5sum <- function(files) { |
337 cat("Compute md5 checksum...\n") | 357 cat("Compute md5 checksum...\n") |
338 library(tools) | 358 library(tools) |
339 return(as.matrix(md5sum(files))) | 359 return(as.matrix(md5sum(files))) |
340 } | 360 } |
341 | 361 |
342 # This function retrieve the raw file in the working directory | 362 # This function retrieve the raw file in the working directory |
343 # - if zipfile: unzip the file with its directory tree | 363 # - if zipfile: unzip the file with its directory tree |
344 # - if singlefiles: set symlink with the good filename | 364 # - if singlefiles: set symlink with the good filename |
345 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 365 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
346 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile, args, prefix="") { | 366 retrieveRawfileInTheWorkingDir <- function(singlefile, zipfile, args, prefix = "") { |
347 | 367 |
348 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") | 368 if (!(prefix %in% c("", "Positive", "Negative", "MS1", "MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") |
349 | 369 |
350 # single - if the file are passed in the command arguments -> refresh singlefile | 370 # single - if the file are passed in the command arguments -> refresh singlefile |
351 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) { | 371 if (!is.null(args[[paste0("singlefile_galaxyPath", prefix)]])) { |
352 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath",prefix)]],"\\|")) | 372 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath", prefix)]], "\\|")) |
353 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName",prefix)]],"\\|")) | 373 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName", prefix)]], "\\|")) |
354 | 374 |
355 singlefile <- NULL | 375 singlefile <- NULL |
356 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { | 376 for (singlefile_galaxyPath_i in seq_len(length(singlefile_galaxyPaths))) { |
357 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] | 377 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] |
358 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] | 378 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] |
359 # In case, an url is used to import data within Galaxy | 379 # In case, an url is used to import data within Galaxy |
360 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1) | 380 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName, "/")), n = 1) |
361 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath | 381 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath |
362 } | 382 } |
363 } | 383 } |
364 # zipfile - if the file are passed in the command arguments -> refresh zipfile | 384 # zipfile - if the file are passed in the command arguments -> refresh zipfile |
365 if (!is.null(args[[paste0("zipfile",prefix)]])) | 385 if (!is.null(args[[paste0("zipfile", prefix)]])) |
366 zipfile <- args[[paste0("zipfile",prefix)]] | 386 zipfile <- args[[paste0("zipfile", prefix)]] |
367 | 387 |
368 # single | 388 # single |
369 if(!is.null(singlefile) && (length("singlefile")>0)) { | 389 if (!is.null(singlefile) && (length("singlefile") > 0)) { |
370 files <- vector() | 390 files <- vector() |
371 for (singlefile_sampleName in names(singlefile)) { | 391 for (singlefile_sampleName in names(singlefile)) { |
372 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] | 392 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] |
373 if(!file.exists(singlefile_galaxyPath)){ | 393 if (!file.exists(singlefile_galaxyPath)) { |
374 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") | 394 error_message <- paste("Cannot access the sample:", singlefile_sampleName, "located:", singlefile_galaxyPath, ". Please, contact your administrator ... if you have one!") |
375 print(error_message); stop(error_message) | 395 print(error_message); stop(error_message) |
376 } | 396 } |
377 | 397 |
378 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) | 398 if (!suppressWarnings(try(file.link(singlefile_galaxyPath, singlefile_sampleName), silent = T))) |
379 file.copy(singlefile_galaxyPath, singlefile_sampleName) | 399 file.copy(singlefile_galaxyPath, singlefile_sampleName) |
380 files <- c(files, singlefile_sampleName) | 400 files <- c(files, singlefile_sampleName) |
381 } | 401 } |
382 } | 402 } |
383 # zipfile | 403 # zipfile |
384 if(!is.null(zipfile) && (zipfile != "")) { | 404 if (!is.null(zipfile) && (zipfile != "")) { |
385 if(!file.exists(zipfile)){ | 405 if (!file.exists(zipfile)) { |
386 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") | 406 error_message <- paste("Cannot access the Zip file:", zipfile, ". Please, contact your administrator ... if you have one!") |
387 print(error_message) | 407 print(error_message) |
388 stop(error_message) | 408 stop(error_message) |
389 } | 409 } |
390 suppressWarnings(unzip(zipfile, unzip="unzip")) | 410 suppressWarnings(unzip(zipfile, unzip = "unzip")) |
391 | 411 |
392 #get the directory name | 412 #get the directory name |
393 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) | 413 suppressWarnings(filesInZip <- unzip(zipfile, list = T)) |
394 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) | 414 directories <- unique(unlist(lapply(strsplit(filesInZip$Name, "/"), function(x) x[1]))) |
395 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] | 415 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] |
396 directory <- "." | 416 directory <- "." |
397 if (length(directories) == 1) directory <- directories | 417 if (length(directories) == 1) directory <- directories |
398 | 418 |
399 cat("files_root_directory\t",directory,"\n") | 419 cat("files_root_directory\t", directory, "\n") |
400 | 420 |
401 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | 421 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") |
402 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | 422 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") |
403 info <- file.info(directory) | 423 info <- file.info(directory) |
404 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) | 424 listed <- list.files(directory[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) |
405 files <- c(directory[!info$isdir], listed) | 425 files <- c(directory[!info$isdir], listed) |
406 exists <- file.exists(files) | 426 exists <- file.exists(files) |
407 files <- files[exists] | 427 files <- files[exists] |
408 | 428 |
409 } | 429 } |
410 return(list(zipfile=zipfile, singlefile=singlefile, files=files)) | 430 return(list(zipfile = zipfile, singlefile = singlefile, files = files)) |
411 | 431 |
412 } | 432 } |
413 | 433 |
414 | 434 |
415 # This function retrieve a xset like object | 435 # This function retrieve a xset like object |
416 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 436 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
417 getxcmsSetObject <- function(xobject) { | 437 getxcmsSetObject <- function(xobject) { |
418 # XCMS 1.x | 438 # XCMS 1.x |
419 if (class(xobject) == "xcmsSet") | 439 if (class(xobject) == "xcmsSet") |
420 return (xobject) | 440 return(xobject) |
421 # XCMS 3.x | 441 # XCMS 3.x |
422 if (class(xobject) == "XCMSnExp") { | 442 if (class(xobject) == "XCMSnExp") { |
423 # Get the legacy xcmsSet object | 443 # Get the legacy xcmsSet object |
424 suppressWarnings(xset <- as(xobject, 'xcmsSet')) | 444 suppressWarnings(xset <- as(xobject, "xcmsSet")) |
425 if (!is.null(xset@phenoData$sample_group)) | 445 if (!is.null(xset@phenoData$sample_group)) |
426 sampclass(xset) <- xset@phenoData$sample_group | 446 sampclass(xset) <- xset@phenoData$sample_group |
427 else | 447 else |
428 sampclass(xset) <- "." | 448 sampclass(xset) <- "." |
429 return (xset) | 449 return(xset) |
430 } | 450 } |
431 } | 451 } |