xcms_xcmsset: lib.r comparison

comparison lib.r @ 15:b62808a2a008 draft

planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 9f72e947d9c241d11221cad561f3525d27231857

author	lecorguille
date	Tue, 18 Sep 2018 16:07:36 -0400
parents	15646e937936
children	517d4375db53

comparison

equal deleted inserted replaced

-:363cce459fff
+:b62808a2a008
-#Authors ABiMS TEAM
+#@authors ABiMS TEAM, Y. Guitton
-#Lib.r for Galaxy Workflow4Metabolomics xcms tools
+# lib.r for Galaxy Workflow4Metabolomics xcms tools
-#
-#version 2.4: lecorguille
+#@author G. Le Corguille
-#   add getPeaklistW4M
+# solve an issue with batch if arguments are logical TRUE/FALSE
-#version 2.3: yguitton
+parseCommandArgs <- function(...) {
-#   correction for empty PDF when only 1 class
+args <- batch::parseCommandArgs(...)
-#version 2.2
+for (key in names(args)) {
-#   correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet
+if (args[key] %in% c("TRUE","FALSE"))
-#   Note if scanrange is used a warning is prompted in R console but do not stop PDF generation
+args[key] = as.logical(args[key])
-#version 2.1: yguitton
+}
-#   Modifications made by Guitton Yann
+return(args)
+}
 #@author G. Le Corguille
-#This function convert if it is required the Retention Time in minutes
+# This function will
+# - load the packages
+# - display the sessionInfo
+loadAndDisplayPackages <- function(pkgs) {
+for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE)))
+sessioninfo = sessionInfo()
+cat(sessioninfo$R.version$version.string,"\n")
+cat("Main packages:\n")
+for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
+cat("Other loaded packages:\n")
+for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
+}
+#@author G. Le Corguille
+# This function merge several chromBPI or chromTIC into one.
+mergeChrom <- function(chrom_merged, chrom) {
+if (is.null(chrom_merged)) return(NULL)
+chrom_merged@.Data <- cbind(chrom_merged@.Data, chrom@.Data)
+return(chrom_merged)
+}
+#@author G. Le Corguille
+# This function merge several xdata into one.
+mergeXData <- function(args) {
+chromTIC <- NULL
+chromBPI <- NULL
+chromTIC_adjusted <- NULL
+chromBPI_adjusted <- NULL
+for(image in args$images) {
+load(image)
+# Handle infiles
+if (!exists("singlefile")) singlefile <- NULL
+if (!exists("zipfile")) zipfile <- NULL
+rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args)
+zipfile <- rawFilePath$zipfile
+singlefile <- rawFilePath$singlefile
+retrieveRawfileInTheWorkingDirectory(singlefile, zipfile)
+if (exists("raw_data")) xdata <- raw_data
+if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*")
+cat(sampleNamesList$sampleNamesOrigin,"\n")
+if (!exists("xdata_merged")) {
+xdata_merged <- xdata
+singlefile_merged <- singlefile
+md5sumList_merged <- md5sumList
+sampleNamesList_merged <- sampleNamesList
+chromTIC_merged <- chromTIC
+chromBPI_merged <- chromBPI
+chromTIC_adjusted_merged <- chromTIC_adjusted
+chromBPI_adjusted_merged <- chromBPI_adjusted
+} else {
+if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged,xdata)
+else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- .concatenate_OnDiskMSnExp(xdata_merged,xdata)
+else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata")
+singlefile_merged <- c(singlefile_merged,singlefile)
+md5sumList_merged$origin <- rbind(md5sumList_merged$origin,md5sumList$origin)
+sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin,sampleNamesList$sampleNamesOrigin)
+sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames,sampleNamesList$sampleNamesMakeNames)
+chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC)
+chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI)
+chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted)
+chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted)
+}
+}
+rm(image)
+xdata <- xdata_merged; rm(xdata_merged)
+singlefile <- singlefile_merged; rm(singlefile_merged)
+md5sumList <- md5sumList_merged; rm(md5sumList_merged)
+sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged)
+if (!is.null(args$sampleMetadata)) {
+cat("\tXSET PHENODATA SETTING...\n")
+sampleMetadataFile <- args$sampleMetadata
+sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header=F)
+xdata@phenoData@data$sample_group=sampleMetadata$V2[match(xdata@phenoData@data$sample_name,sampleMetadata$V1)]
+if (any(is.na(pData(xdata)$sample_group))) {
+sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)]
+error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse=" "))
+print(error_message)
+stop(error_message)
+}
+}
+if (!is.null(chromTIC_merged)) { chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData }
+if (!is.null(chromBPI_merged)) { chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData }
+if (!is.null(chromTIC_adjusted_merged)) { chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData }
+if (!is.null(chromBPI_adjusted_merged)) { chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData }
+return(list("xdata"=xdata, "singlefile"=singlefile, "md5sumList"=md5sumList,"sampleNamesList"=sampleNamesList, "chromTIC"=chromTIC, "chromBPI"=chromBPI, "chromTIC_adjusted"=chromTIC_adjusted, "chromBPI_adjusted"=chromBPI_adjusted))
+}
+#@author G. Le Corguille
+# This function convert if it is required the Retention Time in minutes
 RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
 if (convertRTMinute){
 #converting the retention times (seconds) into minutes
 print("converting the retention times into minutes in the variableMetadata")
-variableMetadata[,"rt"]=variableMetadata[,"rt"]/60
+variableMetadata[,"rt"] <- variableMetadata[,"rt"]/60
-variableMetadata[,"rtmin"]=variableMetadata[,"rtmin"]/60
+variableMetadata[,"rtmin"] <- variableMetadata[,"rtmin"]/60
-variableMetadata[,"rtmax"]=variableMetadata[,"rtmax"]/60
+variableMetadata[,"rtmax"] <- variableMetadata[,"rtmax"]/60
 }
 return (variableMetadata)
 }
 #@author G. Le Corguille
-#This function format ions identifiers
+# This function format ions identifiers
 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) {
-splitDeco = strsplit(as.character(variableMetadata$name),"_")
+splitDeco <- strsplit(as.character(variableMetadata$name),"_")
-idsDeco = sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) })
+idsDeco <- sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) })
-namecustom = make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco))
+namecustom <- make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco))
-variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))])
+variableMetadata <- cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))])
 return(variableMetadata)
 }
 #@author G. Le Corguille
+# This function convert the remain NA to 0 in the dataMatrix
+naTOzeroDataMatrix <- function(dataMatrix, naTOzero) {
+if (naTOzero){
+dataMatrix[is.na(dataMatrix)] <- 0
+}
+return (dataMatrix)
+}
+#@author G. Le Corguille
+# Draw the plotChromPeakDensity 3 per page in a pdf file
+getPlotChromPeakDensity <- function(xdata, mzdigit=4) {
+pdf(file="plotChromPeakDensity.pdf", width=16, height=12)
+par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5))
+group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
+names(group_colors) <- unique(xdata$sample_group)
+xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax))
+for (i in 1:nrow(featureDefinitions(xdata))) {
+mzmin = featureDefinitions(xdata)[i,]$mzmin
+mzmax = featureDefinitions(xdata)[i,]$mzmax
+plotChromPeakDensity(xdata, mz=c(mzmin,mzmax), col=group_colors, pch=16, xlim=xlim, main=paste(round(mzmin,mzdigit),round(mzmax,mzdigit)))
+legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
+}
+dev.off()
+}
+#@author G. Le Corguille
+# Draw the plotChromPeakDensity 3 per page in a pdf file
+getPlotAdjustedRtime <- function(xdata) {
+pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12)
+# Color by group
+group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
+if (length(group_colors) > 1) {
+names(group_colors) <- unique(xdata$sample_group)
+plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group])
+legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
+}
+# Color by sample
+plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name)))
+legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1)
+dev.off()
+}
+#@author G. Le Corguille
 # value: intensity values to be used into, maxo or intb
-getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) {
+getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, naTOzero=T, variableMetadataOutput, dataMatrixOutput) {
-variableMetadata_dataMatrix = peakTable(xset, method="medret", value=intval)
+dataMatrix <- featureValues(xdata, method="medret", value=intval)
-variableMetadata_dataMatrix = cbind(name=groupnames(xset),variableMetadata_dataMatrix)
+colnames(dataMatrix) <- tools::file_path_sans_ext(colnames(dataMatrix))
+dataMatrix = cbind(name=groupnamesW4M(xdata), dataMatrix)
-dataMatrix = variableMetadata_dataMatrix[,(make.names(colnames(variableMetadata_dataMatrix)) %in% c("name", make.names(sampnames(xset))))]
+variableMetadata <- featureDefinitions(xdata)
+colnames(variableMetadata)[1] = "mz"; colnames(variableMetadata)[4] = "rt"
-variableMetadata = variableMetadata_dataMatrix[,!(make.names(colnames(variableMetadata_dataMatrix)) %in% c(make.names(sampnames(xset))))]
+variableMetadata = data.frame(name=groupnamesW4M(xdata), variableMetadata)
-variableMetadata = RTSecondToMinute(variableMetadata, convertRTMinute)
-variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)
+variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute)
+variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)
+dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero)
 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F)
 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F)
-}
+}
-#@author Y. Guitton
-getBPC <- function(file,rtcor=NULL, ...) {
+#@author G. Le Corguille
-object <- xcmsRaw(file)
+# It allow different of field separators
-sel <- profRange(object, ...)
+getDataFrameFromFile <- function(filename, header=T) {
-cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE]))
+myDataFrame <- read.table(filename, header=header, sep=";", stringsAsFactors=F)
-#plotChrom(xcmsRaw(file), base=T)
+if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep="\t", stringsAsFactors=F)
-}
+if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep=",", stringsAsFactors=F)
+if (ncol(myDataFrame) < 2) {
-#@author Y. Guitton
+error_message="Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation"
-getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) {
+print(error_message)
-cat("Creating BIC pdf...\n")
+stop(error_message)
+}
-if (is.null(xcmsSet)) {
+return(myDataFrame)
-cat("Enter an xcmsSet \n")
+}
-stop()
-} else {
+#@author G. Le Corguille
-files <- filepaths(xcmsSet)
+# Draw the BPI and TIC graphics
-}
+# colored by sample names or class names
+getPlotChromatogram <- function(chrom, xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") {
-phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class
+if (aggregationFun == "sum")
-classnames<-vector("list",length(phenoDataClass))
+type="Total Ion Chromatograms"
-for (i in 1:length(phenoDataClass)){
+else
-classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i])
+type="Base Peak Intensity Chromatograms"
-}
+adjusted="Raw"
-N <- dim(phenoData(xcmsSet))[1]
+if (hasAdjustedRtime(xdata))
+adjusted="Adjusted"
-TIC <- vector("list",N)
+main <- paste(type,":",adjusted,"data")
-for (j in 1:N) {
+pdf(pdfname, width=16, height=10)
-TIC[[j]] <- getBPC(files[j])
+# Color by group
-#good for raw
+group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
-# seems strange for corrected
+if (length(group_colors) > 1) {
-#errors if scanrange used in xcmsSetgeneration
+names(group_colors) <- unique(xdata$sample_group)
-if (!is.null(xcmsSet) && rt == "corrected")
+plot(chrom, col = group_colors[chrom$sample_group], main=main)
-rtcor <- xcmsSet@rt$corrected[[j]]
+legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
-else
+}
-rtcor <- NULL
+# Color by sample
-TIC[[j]] <- getBPC(files[j],rtcor=rtcor)
+plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main)
-# TIC[[j]][,1]<-rtcor
+legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1)
-}
+dev.off()
+}
-pdf(pdfname,w=16,h=10)
-cols <- rainbow(N)
+# Get the polarities from all the samples of a condition
-lty = 1:N
-pch = 1:N
-#search for max x and max y in BPCs
-xlim = range(sapply(TIC, function(x) range(x[,1])))
-ylim = range(sapply(TIC, function(x) range(x[,2])))
-ylim = c(-ylim[2], ylim[2])
-##plot start
-if (length(phenoDataClass)>2){
-for (k in 1:(length(phenoDataClass)-1)){
-for (l in (k+1):length(phenoDataClass)){
-#print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" "))
-plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
-colvect<-NULL
-for (j in 1:length(classnames[[k]])) {
-tic <- TIC[[classnames[[k]][j]]]
-# points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
-points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
-colvect<-append(colvect,cols[classnames[[k]][j]])
-}
-for (j in 1:length(classnames[[l]])) {
-# i=class2names[j]
-tic <- TIC[[classnames[[l]][j]]]
-points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
-colvect<-append(colvect,cols[classnames[[l]][j]])
-}
-legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
-}
-}
-}#end if length >2
-if (length(phenoDataClass)==2){
-k=1
-l=2
-colvect<-NULL
-plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
-for (j in 1:length(classnames[[k]])) {
-tic <- TIC[[classnames[[k]][j]]]
-# points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
-points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
-colvect<-append(colvect,cols[classnames[[k]][j]])
-}
-for (j in 1:length(classnames[[l]])) {
-# i=class2names[j]
-tic <- TIC[[classnames[[l]][j]]]
-points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
-colvect<-append(colvect,cols[classnames[[l]][j]])
-}
-legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
-}#end length ==2
-#case where only one class
-if (length(phenoDataClass)==1){
-k=1
-ylim = range(sapply(TIC, function(x) range(x[,2])))
-colvect<-NULL
-plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
-for (j in 1:length(classnames[[k]])) {
-tic <- TIC[[classnames[[k]][j]]]
-# points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
-points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
-colvect<-append(colvect,cols[classnames[[k]][j]])
-}
-legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
-}#end length ==1
-dev.off() #pdf(pdfname,w=16,h=10)
-invisible(TIC)
-}
-#@author Y. Guitton
-getTIC <- function(file,rtcor=NULL) {
-object <- xcmsRaw(file)
-cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity)
-}
-##
-##  overlay TIC from all files in current folder or from xcmsSet, create pdf
-##
-#@author Y. Guitton
-getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) {
-cat("Creating TIC pdf...\n")
-if (is.null(xcmsSet)) {
-filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
-filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
-if (is.null(files))
-files <- getwd()
-info <- file.info(files)
-listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE)
-files <- c(files[!info$isdir], listed)
-} else {
-files <- filepaths(xcmsSet)
-}
-phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class
-classnames<-vector("list",length(phenoDataClass))
-for (i in 1:length(phenoDataClass)){
-classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i])
-}
-N <- length(files)
-TIC <- vector("list",N)
-for (i in 1:N) {
-if (!is.null(xcmsSet) && rt == "corrected")
-rtcor <- xcmsSet@rt$corrected[[i]] else
-rtcor <- NULL
-TIC[[i]] <- getTIC(files[i],rtcor=rtcor)
-}
-pdf(pdfname,w=16,h=10)
-cols <- rainbow(N)
-lty = 1:N
-pch = 1:N
-#search for max x and max y in TICs
-xlim = range(sapply(TIC, function(x) range(x[,1])))
-ylim = range(sapply(TIC, function(x) range(x[,2])))
-ylim = c(-ylim[2], ylim[2])
-##plot start
-if (length(phenoDataClass)>2){
-for (k in 1:(length(phenoDataClass)-1)){
-for (l in (k+1):length(phenoDataClass)){
-#print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" "))
-plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
-colvect<-NULL
-for (j in 1:length(classnames[[k]])) {
-tic <- TIC[[classnames[[k]][j]]]
-# points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
-points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
-colvect<-append(colvect,cols[classnames[[k]][j]])
-}
-for (j in 1:length(classnames[[l]])) {
-# i=class2names[j]
-tic <- TIC[[classnames[[l]][j]]]
-points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
-colvect<-append(colvect,cols[classnames[[l]][j]])
-}
-legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
-}
-}
-}#end if length >2
-if (length(phenoDataClass)==2){
-k=1
-l=2
-plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
-colvect<-NULL
-for (j in 1:length(classnames[[k]])) {
-tic <- TIC[[classnames[[k]][j]]]
-# points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
-points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
-colvect<-append(colvect,cols[classnames[[k]][j]])
-}
-for (j in 1:length(classnames[[l]])) {
-# i=class2names[j]
-tic <- TIC[[classnames[[l]][j]]]
-points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
-colvect<-append(colvect,cols[classnames[[l]][j]])
-}
-legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
-}#end length ==2
-#case where only one class
-if (length(phenoDataClass)==1){
-k=1
-ylim = range(sapply(TIC, function(x) range(x[,2])))
-plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
-colvect<-NULL
-for (j in 1:length(classnames[[k]])) {
-tic <- TIC[[classnames[[k]][j]]]
-# points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
-points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
-colvect<-append(colvect,cols[classnames[[k]][j]])
-}
-legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
-}#end length ==1
-dev.off() #pdf(pdfname,w=16,h=10)
-invisible(TIC)
-}
-##
-##  Get the polarities from all the samples of a condition
 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
-getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") {
+getSampleMetadata <- function(xdata=NULL, sampleMetadataOutput="sampleMetadata.tsv") {
 cat("Creating the sampleMetadata file...\n")
 #Create the sampleMetada dataframe
-sampleMetadata=xset@phenoData
+sampleMetadata <- xdata@phenoData@data
-sampleNamesOrigin=rownames(sampleMetadata)
+rownames(sampleMetadata) <- NULL
-sampleNamesMakeNames=make.names(sampleNamesOrigin)
+colnames(sampleMetadata) <-  c("sampleMetadata", "class")
+sampleNamesOrigin <- sampleMetadata$sampleMetadata
+sampleNamesMakeNames <- make.names(sampleNamesOrigin)
 if (any(duplicated(sampleNamesMakeNames))) {
 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())
 for (sampleName in sampleNamesOrigin) {
 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr())
 for (sampleName in sampleNamesOrigin) {
 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n"))
 }
 }
-sampleMetadata$sampleMetadata=sampleNamesMakeNames
+sampleMetadata$sampleMetadata <- sampleNamesMakeNames
-sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns
-rownames(sampleMetadata)=NULL
-#Create a list of files name in the current directory
-list_files=xset@filepaths
 #For each sample file, the following actions are done
-for (file in list_files){
+for (fileIdx in 1:length(fileNames(xdata))) {
 #Check if the file is in the CDF format
-if (!mzR:::netCDFIsFile(file)){
+if (!mzR:::netCDFIsFile(fileNames(xdata))) {
 # If the column isn't exist, with add one filled with NA
-if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA
+if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA
-#Create a simple xcmsRaw object for each sample
-xcmsRaw=xcmsRaw(file)
 #Extract the polarity (a list of polarities)
-polarity=xcmsRaw@polarity
+polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx,"polarity"]
 #Verify if all the scans have the same polarity
-uniq_list=unique(polarity)
+uniq_list <- unique(polarity)
 if (length(uniq_list)>1){
-polarity="mixed"
+polarity <- "mixed"
 } else {
-polarity=as.character(uniq_list)
+polarity <- as.character(uniq_list)
 }
-#Transforms the character to obtain only the sample name
-filename=basename(file)
-library(tools)
-samplename=file_path_sans_ext(filename)
 #Set the polarity attribute
-sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity
+sampleMetadata$polarity[fileIdx] <- polarity
-#Delete xcmsRaw object because it creates a bug for the fillpeaks step
-rm(xcmsRaw)
 }
 }
 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput)
-return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames))
+return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames))
 }
-##
+# This function check if xcms will found all the files
-## This function check if xcms will found all the files
-##
 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
 checkFilesCompatibilityWithXcms <- function(directory) {
 cat("Checking files filenames compatibilities with xmcs...\n")
 # WHAT XCMS WILL FIND
 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
-filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|")
+filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
 info <- file.info(directory)
-listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE)
+listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE)
 files <- c(directory[!info$isdir], listed)
 files_abs <- file.path(getwd(), files)
 exists <- file.exists(files_abs)
 files[exists] <- files_abs[exists]
 files[exists] <- sub("//","/",files[exists])
 # WHAT IS ON THE FILESYSTEM
-filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T)
+filesystem_filepaths <- system(paste0("find \"$PWD/",directory,"\" -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\""), intern=T)
-filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
+filesystem_filepaths <- filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
 # COMPARISON
 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
 }
 }
+#This function list the compatible files within the directory as xcms did
-##
+#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
-## This function check if XML contains special caracters. It also checks integrity and completness.
+getMSFiles <- function (directory) {
-##
+filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
+filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
+info <- file.info(directory)
+listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE)
+files <- c(directory[!info$isdir], listed)
+exists <- file.exists(files)
+files <- files[exists]
+return(files)
+}
+# This function check if XML contains special caracters. It also checks integrity and completness.
 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
 checkXmlStructure <- function (directory) {
 cat("Checking XML structure...\n")
-cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
+cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
-capture=system(cmd,intern=TRUE)
+capture <- system(cmd, intern=TRUE)
 if (length(capture)>0){
 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
 write(capture, stderr())
 }
 }
-##
+# This function check if XML contain special characters
-## This function check if XML contain special characters
-##
 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
 deleteXmlBadCharacters<- function (directory) {
 cat("Checking Non ASCII characters in the XML...\n")
-processed=F
+processed <- F
-l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE)
+l <- system( paste0("find '",directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE)
 for (i in l){
-cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="")
+cmd <- paste("LC_ALL=C grep '[^ -~]' \"", i, "\"", sep="")
-capture=suppressWarnings(system(cmd,intern=TRUE))
+capture <- suppressWarnings(system(cmd, intern=TRUE))
 if (length(capture)>0){
-cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i)
+cmd <- paste("perl -i -pe 's/[^[:ascii:]]//g;'",i)
 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") )
-c=system(cmd,intern=TRUE)
+c <- system(cmd, intern=TRUE)
-capture=""
+capture <- ""
-processed=T
+processed <- T
 }
 }
 if (processed) cat("\n\n")
 return(processed)
 }
-##
+# This function will compute MD5 checksum to check the data integrity
-## This function will compute MD5 checksum to check the data integrity
-##
 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
 getMd5sum <- function (directory) {
 cat("Compute md5 checksum...\n")
 # WHAT XCMS WILL FIND
 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
-filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|")
+filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
 info <- file.info(directory)
-listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE)
+listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE)
 files <- c(directory[!info$isdir], listed)
 exists <- file.exists(files)
 files <- files[exists]
 library(tools)
 return(as.matrix(md5sum(files)))
 }
 # This function get the raw file path from the arguments
-getRawfilePathFromArguments <- function(singlefile, zipfile, listArguments) {
+#@author Gildas Le Corguille lecorguille@sb-roscoff.fr
-if (!is.null(listArguments[["zipfile"]]))           zipfile = listArguments[["zipfile"]]
+getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix="") {
-if (!is.null(listArguments[["zipfilePositive"]]))   zipfile = listArguments[["zipfilePositive"]]
+if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
-if (!is.null(listArguments[["zipfileNegative"]]))   zipfile = listArguments[["zipfileNegative"]]
+if (!is.null(args[[paste0("zipfile",prefix)]])) zipfile <- args[[paste0("zipfile",prefix)]]
-if (!is.null(listArguments[["singlefile_galaxyPath"]])) {
-singlefile_galaxyPaths = listArguments[["singlefile_galaxyPath"]];
+if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) {
-singlefile_sampleNames = listArguments[["singlefile_sampleName"]]
+singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath",prefix)]]
-}
+singlefile_sampleNames <- args[[paste0("singlefile_sampleName",prefix)]]
-if (!is.null(listArguments[["singlefile_galaxyPathPositive"]])) {
+}
-singlefile_galaxyPaths = listArguments[["singlefile_galaxyPathPositive"]];
+if (exists("singlefile_galaxyPaths")){
-singlefile_sampleNames = listArguments[["singlefile_sampleNamePositive"]]
+singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|"))
-}
+singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|"))
-if (!is.null(listArguments[["singlefile_galaxyPathNegative"]])) {
-singlefile_galaxyPaths = listArguments[["singlefile_galaxyPathNegative"]];
+singlefile <- NULL
-singlefile_sampleNames = listArguments[["singlefile_sampleNameNegative"]]
+for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
-}
+singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
-if (exists("singlefile_galaxyPaths")){
+singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
-singlefile_galaxyPaths = unlist(strsplit(singlefile_galaxyPaths,","))
+# In case, an url is used to import data within Galaxy
-singlefile_sampleNames = unlist(strsplit(singlefile_sampleNames,","))
+singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1)
+singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
-singlefile=NULL
+}
-for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
+}
-singlefile_galaxyPath=singlefile_galaxyPaths[singlefile_galaxyPath_i]
+return(list(zipfile=zipfile, singlefile=singlefile))
-singlefile_sampleName=singlefile_sampleNames[singlefile_galaxyPath_i]
+}
-singlefile[[singlefile_sampleName]] = singlefile_galaxyPath
-}
-}
-for (argument in c("zipfile","zipfilePositive","zipfileNegative","singlefile_galaxyPath","singlefile_sampleName","singlefile_galaxyPathPositive","singlefile_sampleNamePositive","singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) {
-listArguments[[argument]]=NULL
-}
-return(list(zipfile=zipfile, singlefile=singlefile, listArguments=listArguments))
-}
 # This function retrieve the raw file in the working directory
 #   - if zipfile: unzip the file with its directory tree
 #   - if singlefiles: set symlink with the good filename
+#@author Gildas Le Corguille lecorguille@sb-roscoff.fr
 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) {
 if(!is.null(singlefile) && (length("singlefile")>0)) {
 for (singlefile_sampleName in names(singlefile)) {
-singlefile_galaxyPath = singlefile[[singlefile_sampleName]]
+singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
 if(!file.exists(singlefile_galaxyPath)){
-error_message=paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!")
+error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!")
 print(error_message); stop(error_message)
 }
-file.symlink(singlefile_galaxyPath,singlefile_sampleName)
+if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T)))
-}
+file.copy(singlefile_galaxyPath, singlefile_sampleName)
-directory = "."
+}
-}
+directory <- "."
-if(!is.null(zipfile) && (zipfile!="")) {
+}
+if(!is.null(zipfile) && (zipfile != "")) {
 if(!file.exists(zipfile)){
-error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")
+error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")
 print(error_message)
 stop(error_message)
 }
 #list all file in the zip file
-#zip_files=unzip(zipfile,list=T)[,"Name"]
+#zip_files <- unzip(zipfile,list=T)[,"Name"]
 #unzip
 suppressWarnings(unzip(zipfile, unzip="unzip"))
 #get the directory name
-filesInZip=unzip(zipfile, list=T);
+suppressWarnings(filesInZip <- unzip(zipfile, list=T))
-directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])));
+directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])))
-directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
+directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
-directory = "."
+directory <- "."
-if (length(directories) == 1) directory = directories
+if (length(directories) == 1) directory <- directories
 cat("files_root_directory\t",directory,"\n")
 }
 return (directory)
 }
+# This function retrieve a xset like object
+#@author Gildas Le Corguille lecorguille@sb-roscoff.fr
+getxcmsSetObject <- function(xobject) {
+# XCMS 1.x
+if (class(xobject) == "xcmsSet")
+return (xobject)
+# XCMS 3.x
+if (class(xobject) == "XCMSnExp") {
+# Get the legacy xcmsSet object
+suppressWarnings(xset <- as(xobject, 'xcmsSet'))
+if (!is.null(xset@phenoData$sample_group))
+sampclass(xset) <- xset@phenoData$sample_group
+else
+sampclass(xset) <- "."
+return (xset)
+}
+}

Mercurial > repos > lecorguille > xcms_xcmsset

comparison lib.r @ 15:b62808a2a008 draft