Mercurial > repos > pieterlukasse > prims_metabolomics
diff xcms_get_alignment_eic.r @ 49:f772a5caa86a
Added more options and better documentation.
Added MsClust support for parsing XCMS alignment results.
Improved output reports for XCMS wrappers.
New tools.
author | pieter.lukasse@wur.nl |
---|---|
date | Wed, 10 Dec 2014 22:03:27 +0100 |
parents | |
children | 35f506f30ae4 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xcms_get_alignment_eic.r Wed Dec 10 22:03:27 2014 +0100 @@ -0,0 +1,153 @@ +# shows all alignment results in a rt region +## read args: +args <- commandArgs(TRUE) +# xset data: +args.xsetData <- args[1] + +args.rtStart <- strtoi(args[2]) +args.rtEnd <- strtoi(args[3]) + +# limit max diff to 600 and minNrSamples to at least 2 : +if (args.rtEnd - args.rtStart > 600) + stop("The RT window should be <= 600") + +args.minNrSamples <- strtoi(args[4]) #min nr samples +if (args.minNrSamples < 2) + stop("Set 'Minimum number of samples' to 2 or higher") + + +args.sampleNames <- strsplit(args[5], ",")[[1]] +# trim leading and trailing spaces: +args.sampleNames <- gsub("^\\s+|\\s+$", "", args.sampleNames) + +## report files +args.htmlReportFile <- args[6] +args.htmlReportFile.files_path <- args[7] + + +if (length(args) == 8) +{ + args.outLogFile <- args[8] + # suppress messages: + # Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888 + msg <- file(args.outLogFile, open="wt") + sink(msg, type="message") + sink(msg, type="output") +} + + + +tryCatch( + { + library(metaMS) + + # load the xset data : + xsetData <- readRDS(args.xsetData) + # if here to support both scenarios: + if ("xcmsSet" %in% slotNames(xsetData) ) + { + xsetData <- xsetData@xcmsSet + } + + # report + dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE, recursive = TRUE) + message(paste("\nGenerating report.........in ", args.htmlReportFile.files_path)) + + write(paste("<html><body><h1>Extracted Ion Chromatograms (EIC) of alignments with peaks in ",args.minNrSamples, " or more samples</h1>"), + file=args.htmlReportFile) + + gt <- groups(xsetData) + message("\nParsed groups... ") + groupidx1 <- which(gt[,"rtmed"] > args.rtStart & gt[,"rtmed"] < args.rtEnd & gt[,"npeaks"] >= args.minNrSamples) # this should be only on samples selected + + if (length(groupidx1) > 0) + { + message("\nGetting EIC... ") + eiccor <- getEIC(xsetData, groupidx = c(groupidx1), sampleidx=args.sampleNames) + #eicraw <- getEIC(xsetData, groupidx = c(groupidx1), sampleidx=args.sampleNames, rt = "raw") + + #sampleNamesIdx <- which(sampnames(LC$xset@xcmsSet) %in% args.sampleNames, arr.ind = TRUE) + #or (from bioconductor code for getEIC): NB: this is assuming sample indexes used in data are ordered after the order of sample names!! + sampNames <- sampnames(xsetData) + sampleNamesIdx <- match( args.sampleNames, sampNames) + message(paste("Samples: ", sampleNamesIdx)) + + #TODO html <- paste(html, "<table><tbody>") + message(paste("\nPlotting figures... ")) + + #get the mz list (interestingly, this [,"mz"] is relatively slow): + peakMzList <- xsetData@peaks[,"mz"] + peakSampleList <- xsetData@peaks[,"sample"] + #signal to noise list: + peakSnList <- xsetData@peaks[,"sn"] + + message(paste("Total nr of peaks: ", length(peakMzList))) + + for (i in 1:length(groupidx1)) + { + groupMembers <- xsetData@groupidx[[groupidx1[i]]] + + groupMzList <- "" + groupSampleList <- "" + finalGroupSize <- 0 + + for (j in 1:length(groupMembers)) + { + #get only the peaks from the selected samples: + memberSample <- peakSampleList[groupMembers[j]] + memberSn <- peakSnList[groupMembers[j]] + if (!is.na(memberSn) && memberSample %in% sampleNamesIdx) + { + message(paste("Group: ", groupidx1[i], " / Member sample: ", memberSample)) + memberMz <- peakMzList[groupMembers[j]] + + + if (finalGroupSize == 0) + { + groupMzList <- memberMz + groupSampleList <- sampNames[memberSample] + } else { + groupMzList <- paste(groupMzList,",",memberMz, sep="") + groupSampleList <- paste(groupSampleList,",",sampNames[memberSample], sep="") + } + + finalGroupSize <- finalGroupSize +1 + } + } + # here we do the real check on group size and only the groups that have enough + # members with signal to noise > 0 will be plotted here: + if (finalGroupSize >= args.minNrSamples) + { + message(paste("Plotting figure ",i, " of max ", length(groupidx1)," figures... ")) + + figureName <- paste(args.htmlReportFile.files_path, "/figure", i,".png", sep="") + write(paste("<img src='", "figure", i,".png' />", sep="") , + file=args.htmlReportFile, append=TRUE) + + png( figureName, type="cairo" ) + plot(eiccor, xsetData, groupidx = i) + devname = dev.off() + + write(paste("<p>Alignment id: [", groupidx1[i], "]. m/z list of peaks in alignment with signal/noise <> NA:<br/>", groupMzList,"</p>", sep="") , + file=args.htmlReportFile, append=TRUE) + write(paste("<p>m/z values found in the following samples respectively: <br/>", groupSampleList,"</p>", sep="") , + file=args.htmlReportFile, append=TRUE) + } + } + + } + + write("</body><html>", + file=args.htmlReportFile, append=TRUE) + message("finished generating report") + # unlink(args.htmlReportFile) + cat("\nWarnings================:\n") + str( warnings() ) + }, + error=function(cond) { + sink(NULL, type="message") # default setting + sink(stderr(), type="output") + message("\nERROR: ===========\n") + print(cond) + } + )