Mercurial > repos > lecorguille > xcms_fillpeaks
changeset 4:2edfa5e1f719 draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 83b80dcd96b379518c2e4ace992affc889d32ca6
| author | lecorguille | 
|---|---|
| date | Fri, 08 Apr 2016 10:39:59 -0400 | 
| parents | 5a54397dbc97 | 
| children | cd351af95d14 | 
| files | README.rst abims_xcms_fillPeaks.xml lib.r macros.xml planemo.sh planemo_test.sh repository_dependencies.xml test-data/log.txt test-data/sacuri.zip test-data/sacuri_dir_root.zip test-data/xset.group.retcor.group.RData test-data/xset.group.retcor.group.fillPeaks.RData tool_dependencies.xml xcms.r | 
| diffstat | 14 files changed, 788 insertions(+), 77 deletions(-) [+] | 
line wrap: on
 line diff
--- a/README.rst Mon Feb 22 16:40:19 2016 -0500 +++ b/README.rst Fri Apr 08 10:39:59 2016 -0400 @@ -2,6 +2,11 @@ Changelog/News -------------- +**Version 2.0.6 - 04/04/2016** + +- TEST: refactoring to pass planemo test using conda dependencies + + **Version 2.0.5 - 10/02/2016** - BUGFIX: better management of errors. Datasets remained green although the process failed @@ -17,3 +22,14 @@ - IMPROVEMENT: parameter labels have changed to facilitate their reading. + +Test Status +----------- + +Planemo test using conda: passed + +Planemo test using source env.sh: passed + +Planemo shed_test : passed + +
--- a/abims_xcms_fillPeaks.xml Mon Feb 22 16:40:19 2016 -0500 +++ b/abims_xcms_fillPeaks.xml Fri Apr 08 10:39:59 2016 -0400 @@ -1,20 +1,16 @@ -<tool id="abims_xcms_fillPeaks" name="xcms.fillPeaks" version="2.0.5"> +<tool id="abims_xcms_fillPeaks" name="xcms.fillPeaks" version="2.0.6"> <description>Integrate the signal in the region of that peak group not represented and create a new peak</description> - <requirements> - <requirement type="package" version="3.1.2">R</requirement> - <requirement type="binary">Rscript</requirement> - <requirement type="package" version="1.44.0">xcms</requirement> - <requirement type="package" version="2.2.0">xcms_w4m_script</requirement> - </requirements> - - <stdio> - <exit_code range="1:" level="fatal" /> - </stdio> + <macros> + <import>macros.xml</import> + </macros> + + <expand macro="requirements"/> + <expand macro="stdio"/> <command><![CDATA[ - xcms.r + @COMMAND_XCMS_SCRIPT@ xfunction fillPeaks image $image @@ -22,14 +18,10 @@ method $method - ###if $zip_file: - ## zipfile $zip_file - ###end if - ; - return=\$?; - mv log.txt $log; - cat $log; - sh -c "exit \$return" + #if $zip_file: + zipfile $zip_file + #end if + @COMMAND_LOG_EXIT@ ]]></command> @@ -40,7 +32,7 @@ <option value="MSW" >MSW</option> </param> <!-- To pass planemo test --> - <!--<param name="zip_file" type="hidden_data" format="no_unzip.zip" label="Zip file" />--> + <param name="zip_file" type="data" format="no_unzip.zip,zip" label="Zip file" help="Use only if you get a message which say that your original zip file have been deleted on the server." /> </inputs> <outputs> @@ -52,15 +44,14 @@ <test> <param name="image" value="xset.group.retcor.group.RData"/> <param name="method" value="chrom"/> - <param name="zip_file" value="sacuri.zip"/> - <!--<output name="xsetRData" file="xset.group.retcor.group.fillPeaks.RData" />--> + <param name="zip_file" value="sacuri_dir_root.zip" ftype="zip" /> <output name="log"> <assert_contents> - <has_text text="object with 9 samples" /> - <has_text text="Time range: 0.7-1139.9 seconds (0-19 minutes)" /> - <has_text text="Mass range: 50.0019-999.9863 m/z" /> - <has_text text="Peaks: 157780 (about 17531 per sample)" /> - <has_text text="Peak Groups: 6761" /> + <has_text text="object with 4 samples" /> + <has_text text="Time range: 0.2-1140.1 seconds (0-19 minutes)" /> + <has_text text="Mass range: 50.0021-999.9863 m/z" /> + <has_text text="Peaks: 199718 (about 49930 per sample)" /> + <has_text text="Peak Groups: 48958" /> <has_text text="Sample classes: bio, blank" /> </assert_contents> </output> @@ -68,20 +59,8 @@ </tests> <help><![CDATA[ - -.. class:: infomark - -**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu - -.. class:: infomark - -**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@univ-nantes.fr - part of Workflow4Metabolomics.org [W4M] - - | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool. - - ---------------------------------------------------- +@HELP_AUTHORS@ ============== Xcms.fillPeaks @@ -207,6 +186,11 @@ Changelog/News -------------- +**Version 2.0.6 - 04/04/2016** + +- TEST: refactoring to pass planemo test using conda dependencies + + **Version 2.0.5 - 10/02/2016** - BUGFIX: better management of errors. Datasets remained green although the process failed @@ -225,10 +209,8 @@ ]]></help> - <citations> - <citation type="doi">10.1021/ac051437y</citation> - <citation type="doi">10.1093/bioinformatics/btu813</citation> - </citations> + + <expand macro="citation" /> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib.r Fri Apr 08 10:39:59 2016 -0400 @@ -0,0 +1,400 @@ +# lib.r version="2.0.1" +#Authors ABiMS TEAM +#Lib.r for Galaxy Workflow4Metabo +#version 2.2 +#Based on lib.r 2.1 +#Modifications made by Guitton Yann +#correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet +#Note if scanrange is used a warning is prompted in R console but do not stop PDF generation + + + + +#@author Y. Guitton +getBPC <- function(file,rtcor=NULL, ...) { + object <- xcmsRaw(file) + sel <- profRange(object, ...) + cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) + #plotChrom(xcmsRaw(file), base=T) +} + +#@author Y. Guitton +getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) { + cat("Creating BIC pdf...\n") + + if (is.null(xcmsSet)) { + cat("Enter an xcmsSet \n") + stop() + } else { + files <- filepaths(xcmsSet) + } + + class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class + + classnames<-vector("list",length(class)) + for (i in 1:length(class)){ + classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) + } + + N <- dim(phenoData(xcmsSet))[1] + + TIC <- vector("list",N) + + + for (j in 1:N) { + + TIC[[j]] <- getBPC(files[j]) + #good for raw + # seems strange for corrected + #errors if scanrange used in xcmsSetgeneration + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[j]] else + rtcor <- NULL + + TIC[[j]] <- getBPC(files[j],rtcor=rtcor) + # TIC[[j]][,1]<-rtcor + } + + + + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in BPCs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) + + + ##plot start + + if (length(class)>2){ + for (k in 1:(length(class)-1)){ + for (l in (k+1):length(class)){ + #print(paste(class[k],"vs",class[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + + if (length(class)==2){ + k=1 + l=2 + colvect<-NULL + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + + for (j in 1:length(classnames[[k]])) { + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==2 + + dev.off() #pdf(pdfname,w=16,h=10) + + invisible(TIC) +} + + + +#@author Y. Guitton +getTIC <- function(file,rtcor=NULL) { + object <- xcmsRaw(file) + cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) +} + +## +## overlay TIC from all files in current folder or from xcmsSet, create pdf +## +#@author Y. Guitton +getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) { + cat("Creating TIC pdf...\n") + + if (is.null(xcmsSet)) { + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") + if (is.null(files)) + files <- getwd() + info <- file.info(files) + listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) + files <- c(files[!info$isdir], listed) + } else { + files <- filepaths(xcmsSet) + } + + class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class + + classnames<-vector("list",length(class)) + for (i in 1:length(class)){ + classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) + } + + N <- length(files) + TIC <- vector("list",N) + + for (i in 1:N) { + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[i]] else + rtcor <- NULL + TIC[[i]] <- getTIC(files[i],rtcor=rtcor) + } + + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in TICs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) + + + ##plot start + if (length(class)>2){ + for (k in 1:(length(class)-1)){ + for (l in (k+1):length(class)){ + #print(paste(class[k],"vs",class[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + if (length(class)==2){ + k=1 + l=2 + + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==2 + dev.off() #pdf(pdfname,w=16,h=10) + + invisible(TIC) +} + + + +## +## Get the polarities from all the samples of a condition +#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM +getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") { + cat("Creating the sampleMetadata file...\n") + + #Create the sampleMetada dataframe + sampleMetadata=xset@phenoData + sampleNamesOrigin=rownames(sampleMetadata) + sampleNamesMakeNames=make.names(sampleNamesOrigin) + + if (any(duplicated(sampleNamesMakeNames))) { + write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) + for (sampleName in sampleNamesOrigin) { + write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) + } + stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") + } + + if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { + cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") + for (sampleName in sampleNamesOrigin) { + cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) + } + } + + sampleMetadata$sampleMetadata=sampleNamesMakeNames + sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns + rownames(sampleMetadata)=NULL + + #Create a list of files name in the current directory + list_files=xset@filepaths + #For each sample file, the following actions are done + for (file in list_files){ + #Check if the file is in the CDF format + if (!mzR:::netCDFIsFile(file)){ + + # If the column isn't exist, with add one filled with NA + if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA + + #Create a simple xcmsRaw object for each sample + xcmsRaw=xcmsRaw(file) + #Extract the polarity (a list of polarities) + polarity=xcmsRaw@polarity + #Verify if all the scans have the same polarity + uniq_list=unique(polarity) + if (length(uniq_list)>1){ + polarity="mixed" + } else { + polarity=as.character(uniq_list) + } + #Transforms the character to obtain only the sample name + filename=basename(file) + library(tools) + samplename=file_path_sans_ext(filename) + + #Set the polarity attribute + sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity + + #Delete xcmsRaw object because it creates a bug for the fillpeaks step + rm(xcmsRaw) + } + + } + + write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) + + return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) + +} + + +## +## This function check if xcms will found all the files +## +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM +checkFilesCompatibilityWithXcms <- function(directory) { + cat("Checking files filenames compatibilities with xmcs...\n") + # WHAT XCMS WILL FIND + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) + files <- c(directory[!info$isdir], listed) + files_abs <- file.path(getwd(), files) + exists <- file.exists(files_abs) + files[exists] <- files_abs[exists] + files[exists] <- sub("//","/",files[exists]) + + # WHAT IS ON THE FILESYSTEM + filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) + filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] + + # COMPARISON + if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { + write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) + write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) + stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") + + } +} + + + +## +## This function check if XML contains special caracters. It also checks integrity and completness. +## +#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM +checkXmlStructure <- function (directory) { + cat("Checking XML structure...\n") + + cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") + capture=system(cmd,intern=TRUE) + + if (length(capture)>0){ + #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) + write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) + write(capture, stderr()) + stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") + } + +} + + +## +## This function check if XML contain special characters +## +#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM +deleteXmlBadCharacters<- function (directory) { + cat("Checking Non ASCII characters in the XML...\n") + + processed=F + l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) + for (i in l){ + cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") + capture=suppressWarnings(system(cmd,intern=TRUE)) + if (length(capture)>0){ + cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) + print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) + c=system(cmd,intern=TRUE) + capture="" + processed=T + } + } + if (processed) cat("\n\n") + return(processed) +} + + +## +## This function will compute MD5 checksum to check the data integrity +## +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr +getMd5sum <- function (directory) { + cat("Compute md5 checksum...\n") + # WHAT XCMS WILL FIND + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) + files <- c(directory[!info$isdir], listed) + exists <- file.exists(files) + files <- files[exists] + + library(tools) + + #cat("\n\n") + + return(as.matrix(md5sum(files))) +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Apr 08 10:39:59 2016 -0400 @@ -0,0 +1,51 @@ +<?xml version="1.0"?> +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="3.1.2">R</requirement> + <requirement type="package" version="0.4_1">r-snow</requirement> + <requirement type="package" version="1.44.0">bioconductor-xcms</requirement> + <requirement type="package" version="1.1_4">r-batch</requirement> + </requirements> + </xml> + <xml name="stdio"> + <stdio> + <exit_code range="1" level="fatal" /> + </stdio> + </xml> + + <token name="@COMMAND_XCMS_SCRIPT@"> + LANG=C Rscript $__tool_directory__/xcms.r + </token> + + <token name="@COMMAND_LOG_EXIT@"> + ; + return=\$?; + mv log.txt $log; + cat $log; + sh -c "exit \$return" + </token> + + <token name="@HELP_AUTHORS@"> +.. class:: infomark + +**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu + +.. class:: infomark + +**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@oniris-nantes.fr - part of Workflow4Metabolomics.org [W4M] + + | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool. + +--------------------------------------------------- + + </token> + + + <xml name="citation"> + <citations> + <citation type="doi">10.1021/ac051437y</citation> + <citation type="doi">10.1093/bioinformatics/btu813</citation> + </citations> + </xml> +</macros>
--- a/planemo.sh Mon Feb 22 16:40:19 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -planemo shed_init -f --name=xcms_fillpeaks --owner=lecorguille --description="[W4M][GC-MS] XCMS R Package - Preprocessing - Integrate areas of missing peaks" --homepage_url="http://workflow4metabolomics.org" --long_description="Part of the W4M project: http://workflow4metabolomics.org\n\nXCMS: http://www.bioconductor.org/packages/release/bioc/html/xcms.html\n\nFor each sample, identify peak groups where that sample is not represented. For each of those peak groups, integrate the signal in the region of that peak group and create a new peak.\n\nBEWARE: this tool don't come with its script. You will need to install the dedicated package_xcms_w4m_script too" --category="Metabolomics"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/planemo_test.sh Fri Apr 08 10:39:59 2016 -0400 @@ -0,0 +1,21 @@ +planemo conda_init +planemo conda_install . +planemo test --install_galaxy --conda_dependency_resolution --galaxy_branch "dev" + +#All 1 test(s) executed passed. +#abims_xcms_fillPeaks[0]: passed + + +source /w/galaxy/dev/shed_tools_tool_dependency_dir/R/3.1.2/iuc/package_r_3_1_2/1ca39eb16186/env.sh +source /w/galaxy/dev/shed_tools_tool_dependency_dir/bioconductor-xcms/1.44.0/lecorguille/package_bioconductor_xcms_1_44_0/0c38f7d43e08/env.sh +planemo test --install_galaxy --galaxy_branch "dev" + +#All 1 test(s) executed passed. +#abims_xcms_fillPeaks[0]: passed + + + + +planemo shed_test --install_galaxy --galaxy_branch "dev" -t testtoolshed +#All 1 test(s) executed passed. +#testtoolshed.g2.bx.psu.edu/repos/lecorguille/xcms_fillpeaks/abims_xcms_fillPeaks/2.0.6[0]: passed
--- a/repository_dependencies.xml Mon Feb 22 16:40:19 2016 -0500 +++ b/repository_dependencies.xml Fri Apr 08 10:39:59 2016 -0400 @@ -1,5 +1,5 @@ <?xml version="1.0"?> <repositories> <repository changeset_revision="7800ba9a4c1e" name="no_unzip_datatype" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" /> - <repository changeset_revision="d64562a4ebb3" name="rdata_xcms_datatypes" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="d64562a4ebb3" name="rdata_xcms_datatypes" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" /> </repositories>
--- a/test-data/log.txt Mon Feb 22 16:40:19 2016 -0500 +++ b/test-data/log.txt Fri Apr 08 10:39:59 2016 -0400 @@ -1,55 +1,54 @@ PACKAGE INFO -parallel 3.1.2 -BiocGenerics 0.14.0 -Biobase 2.28.0 -Rcpp 0.12.0 -mzR 2.2.2 +parallel 3.2.2 +BiocGenerics 0.16.1 +Biobase 2.30.0 +Rcpp 0.12.2 +mzR 2.4.1 xcms 1.44.0 -snow 0.3.13 +snow 0.4.1 batch 1.1.4 ARGUMENTS INFO xfunction fillPeaks -image test-data/xset.group.retcor.group.RData -xsetRdataOutput test-data/xset.group.retcor.group.fillPeaks.RData +image /tmp/tmpk97EMC/files/000/dataset_1.dat +xsetRdataOutput /tmp/tmpk97EMC/files/000/dataset_3.dat method chrom -zipfile test-data/sacuri.zip +zipfile /tmp/tmpk97EMC/files/000/dataset_2.dat INFILE PROCESSING INFO ARGUMENTS PROCESSING INFO +files_root_directory sacuri +Compute md5 checksum... MAIN PROCESSING INFO -./sacuri/bio/HU_neg_028.mzXML -./sacuri/bio/HU_neg_060.mzXML -./sacuri/bio/HU_neg_051.mzXML -./sacuri/bio/HU_neg_017.mzXML -./sacuri/bio/HU_neg_034.mzXML -./sacuri/blank/Blanc09.mzXML -./sacuri/blank/Blanc06.mzXML -./sacuri/blank/Blanc12.mzXML -./sacuri/blank/Blanc04.mzXML +sacuri/bio/HU_neg_017.mzXML +sacuri/bio/HU_neg_028.mzXML +sacuri/blank/Blanc04.mzXML +sacuri/blank/Blanc06.mzXML + + null device 1 XSET OBJECT INFO -An "xcmsSet" object with 9 samples +An "xcmsSet" object with 4 samples -Time range: 0.7-1139.9 seconds (0-19 minutes) -Mass range: 50.0019-999.9863 m/z -Peaks: 157780 (about 17531 per sample) -Peak Groups: 6761 +Time range: 0.2-1140.1 seconds (0-19 minutes) +Mass range: 50.0021-999.9863 m/z +Peaks: 199718 (about 49930 per sample) +Peak Groups: 48958 Sample classes: bio, blank Profile settings: method = bin step = 0.01 -Memory usage: 17.4 MB +Memory usage: 27.3 MB DONE
--- a/tool_dependencies.xml Mon Feb 22 16:40:19 2016 -0500 +++ b/tool_dependencies.xml Fri Apr 08 10:39:59 2016 -0400 @@ -1,12 +1,9 @@ <?xml version="1.0"?> <tool_dependency> <package name="R" version="3.1.2"> - <repository changeset_revision="c987143177d4" name="package_r_3_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="4d2fd1413b56" name="package_r_3_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> - <package name="xcms" version="1.44.0"> - <repository changeset_revision="4443617bdd85" name="package_r_xcms_1_44_0" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - <package name="xcms_w4m_script" version="2.2.0"> - <repository changeset_revision="115cf2b43a3c" name="package_xcms_w4m_script_2_2_0" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" /> + <package name="bioconductor-xcms" version="1.44.0"> + <repository changeset_revision="58ebb405a3d6" name="package_bioconductor_xcms_1_44_0" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> </tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xcms.r Fri Apr 08 10:39:59 2016 -0400 @@ -0,0 +1,246 @@ +#!/usr/bin/env Rscript +# xcms.r version="2.2.0" +#Authors ABIMS TEAM +#BPC Addition from Y.guitton + + +# ----- LOG FILE ----- +log_file=file("log.txt", open = "wt") +sink(log_file) +sink(log_file, type = "output") + + +# ----- PACKAGE ----- +cat("\tPACKAGE INFO\n") +#pkgs=c("xcms","batch") +pkgs=c("parallel","BiocGenerics", "Biobase", "Rcpp", "mzR", "xcms","snow","batch") +for(pkg in pkgs) { + suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) + cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="") +} +source_local <- function(fname){ argv <- commandArgs(trailingOnly = FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) } +cat("\n\n"); + + + + + +# ----- ARGUMENTS ----- +cat("\tARGUMENTS INFO\n") +listArguments = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects +write.table(as.matrix(listArguments), col.names=F, quote=F, sep='\t') + +cat("\n\n"); + + +# ----- ARGUMENTS PROCESSING ----- +cat("\tINFILE PROCESSING INFO\n") + +#image is an .RData file necessary to use xset variable given by previous tools +if (!is.null(listArguments[["image"]])){ + load(listArguments[["image"]]); listArguments[["image"]]=NULL +} + +#Import the different functions +source_local("lib.r") + +cat("\n\n") + +#Import the different functions + +# ----- PROCESSING INFILE ----- +cat("\tARGUMENTS PROCESSING INFO\n") + +# Save arguments to generate a report +if (!exists("listOFlistArguments")) listOFlistArguments=list() +listOFlistArguments[[paste(format(Sys.time(), "%y%m%d-%H:%M:%S_"),listArguments[["xfunction"]],sep="")]] = listArguments + + +#saving the commun parameters +thefunction = listArguments[["xfunction"]]; listArguments[["xfunction"]]=NULL #delete from the list of arguments + +xsetRdataOutput = paste(thefunction,"RData",sep=".") +if (!is.null(listArguments[["xsetRdataOutput"]])){ + xsetRdataOutput = listArguments[["xsetRdataOutput"]]; listArguments[["xsetRdataOutput"]]=NULL +} + +rplotspdf = "Rplots.pdf" +if (!is.null(listArguments[["rplotspdf"]])){ + rplotspdf = listArguments[["rplotspdf"]]; listArguments[["rplotspdf"]]=NULL +} + +sampleMetadataOutput = "sampleMetadata.tsv" +if (!is.null(listArguments[["sampleMetadataOutput"]])){ + sampleMetadataOutput = listArguments[["sampleMetadataOutput"]]; listArguments[["sampleMetadataOutput"]]=NULL +} + + + + +if (thefunction %in% c("xcmsSet","retcor")) { + ticspdf = listArguments[["ticspdf"]]; listArguments[["ticspdf"]]=NULL + bicspdf = listArguments[["bicspdf"]]; listArguments[["bicspdf"]]=NULL +} + +#necessary to unzip .zip file uploaded to Galaxy +#thanks to .zip file it's possible to upload many file as the same time conserving the tree hierarchy of directories + + +if (!is.null(listArguments[["zipfile"]])){ + zipfile= listArguments[["zipfile"]]; listArguments[["zipfile"]]=NULL +} + +if (!is.null(listArguments[["library"]])){ + directory=listArguments[["library"]]; listArguments[["library"]]=NULL + if(!file.exists(directory)){ + error_message=paste("Cannot access the directory:",directory,". Please verify if the directory exists or not.") + print(error_message) + stop(error_message) + } +} + +# We unzip automatically the chromatograms from the zip files. +if (thefunction %in% c("xcmsSet","retcor","fillPeaks")) { + if(exists("zipfile") && (zipfile!="")) { + if(!file.exists(zipfile)){ + error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") + print(error_message) + stop(error_message) + } + + #list all file in the zip file + #zip_files=unzip(zipfile,list=T)[,"Name"] + + + #unzip + suppressWarnings(unzip(zipfile, unzip="unzip")) + + #get the directory name + filesInZip=unzip(zipfile, list=T); + directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); + directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] + directory = "." + if (length(directories) == 1) directory = directories + + cat("files_root_directory\t",directory,"\n") + + # + md5sumList=list("origin"=getMd5sum(directory)) + + # Check and fix if there are non ASCII characters. If so, they will be removed from the *mzXML mzML files. + # Remove because can create issue with some clean files + #@TODO: fix me + #if (deleteXmlBadCharacters(directory)) { + # md5sumList=list("removalBadCharacters"=getMd5sum(directory)) + #} + + } +} + +#addition of the directory to the list of arguments in the first position +if (thefunction == "xcmsSet") { + checkXmlStructure(directory) + checkFilesCompatibilityWithXcms(directory) + listArguments=append(directory, listArguments) +} + + +#addition of xset object to the list of arguments in the first position +if (exists("xset")){ + listArguments=append(list(xset), listArguments) +} + +cat("\n\n") + + + + + + +# ----- MAIN PROCESSING INFO ----- +cat("\tMAIN PROCESSING INFO\n") + + +#Verification of a group step before doing the fillpeaks job. + +if (thefunction == "fillPeaks") { + res=try(is.null(groupnames(xset))) + if (class(res) == "try-error"){ + error<-geterrmessage() + write(error, stderr()) + stop("You must always do a group step after a retcor. Otherwise it won't work for the fillpeaks step") + } + +} + +#change the default display settings +#dev.new(file="Rplots.pdf", width=16, height=12) +pdf(file=rplotspdf, width=16, height=12) +if (thefunction == "group") { + par(mfrow=c(2,2)) +} +#else if (thefunction == "retcor") { +#try to change the legend display +# par(xpd=NA) +# par(xpd=T, mar=par()$mar+c(0,0,0,4)) +#} + + +#execution of the function "thefunction" with the parameters given in "listArguments" +xset = do.call(thefunction, listArguments) + + +cat("\n\n") + +dev.off() #dev.new(file="Rplots.pdf", width=16, height=12) + +if (thefunction == "xcmsSet") { + + #transform the files absolute pathways into relative pathways + xset@filepaths<-sub(paste(getwd(),"/",sep="") ,"", xset@filepaths) + + if(exists("zipfile") && (zipfile!="")) { + + #Modify the samples names (erase the path) + for(i in 1:length(sampnames(xset))){ + + sample_name=unlist(strsplit(sampnames(xset)[i], "/")) + sample_name=sample_name[length(sample_name)] + sample_name= unlist(strsplit(sample_name,"[.]"))[1] + sampnames(xset)[i]=sample_name + + } + + } + +} + +# -- TIC -- +if (thefunction == "xcmsSet") { + sampleNamesList = getSampleMetadata(xcmsSet=xset, sampleMetadataOutput=sampleMetadataOutput) + getTICs(xcmsSet=xset, pdfname=ticspdf,rt="raw") + getBPCs(xcmsSet=xset,rt="raw",pdfname=bicspdf) +} else if (thefunction == "retcor") { + getTICs(xcmsSet=xset, pdfname=ticspdf,rt="corrected") + getBPCs(xcmsSet=xset,rt="corrected",pdfname=bicspdf) +} + +cat("\n\n") + + +# ----- EXPORT ----- + +cat("\tXSET OBJECT INFO\n") +print(xset) +#delete the parameters to avoid the passage to the next tool in .RData image + + +#saving R data in .Rdata file to save the variables used in the present tool +objects2save = c("xset","zipfile","listOFlistArguments","md5sumList","sampleNamesList") +save(list=objects2save[objects2save %in% ls()], file=xsetRdataOutput) + +cat("\n\n") + + +cat("\tDONE\n") +
