diff diffbind.R @ 11:4c7ab9995f9e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit cc4c1c4131518b9cbf986a1f252767ff73ca938e
author iuc
date Sat, 07 Apr 2018 15:45:41 -0400
parents d7725c5596ab
children 1de83981d43c
line wrap: on
line diff
--- a/diffbind.R	Tue Mar 20 04:51:25 2018 -0400
+++ b/diffbind.R	Sat Apr 07 15:45:41 2018 -0400
@@ -4,8 +4,9 @@
 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 
 suppressPackageStartupMessages({
-	library('getopt')
-	library('DiffBind')
+    library('getopt')
+    library('DiffBind')
+    library('rjson')
 })
 
 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
@@ -14,15 +15,19 @@
 #get options, using the spec as defined by the enclosed list.
 #we read the options from the default: commandArgs(TRUE).
 spec = matrix(c(
-    'verbose', 'v', 2, "integer",
-    'help' , 'h', 0, "logical",
+    'infile' , 'i', 1, "character",
     'outfile' , 'o', 1, "character",
+    'scorecol', 'n', 1, "integer",
+    'lowerbetter', 'l', 1, "logical",
+    'summits', 's', 1, "integer",
+    'th', 't', 1, "double",
+    'format', 'f', 1, "character",
     'plots' , 'p', 2, "character",
-    'infile' , 'i', 1, "character",
-    'format', 'f', 1, "character",
-    'th', 't', 1, "double",
     'bmatrix', 'b', 0, "logical",
-    "rdaOpt", "r", 0, "logical"
+    "rdaOpt", "r", 0, "logical",
+    'infoOpt' , 'a', 0, "logical",
+    'verbose', 'v', 2, "integer",
+    'help' , 'h', 0, "logical"
 ), byrow=TRUE, ncol=4);
 
 opt = getopt(spec);
@@ -34,31 +39,81 @@
     q(status=1);
 }
 
-if ( !is.null(opt$plots) ) {
-    pdf(opt$plots)
+parser <- newJSONParser()
+parser$addData(opt$infile)
+factorList <- parser$getObject()
+filenamesIn <- unname(unlist(factorList[[1]][[2]]))
+peaks <- filenamesIn[grepl("peaks.bed", filenamesIn)]
+bams <- filenamesIn[grepl("bamreads.bam", filenamesIn)]
+ctrls <- filenamesIn[grepl("bamcontrol.bam", filenamesIn)]
+
+# get the group and sample id from the peaks filenames
+groups <- sapply(strsplit(peaks,"-"), `[`, 1)
+samples <- sapply(strsplit(peaks,"-"), `[`, 2)
+
+if ( length(ctrls) != 0 ) {
+    sampleTable <- data.frame(SampleID=samples,
+                        Condition=groups,
+                        bamReads=bams,
+                        bamControl=ctrls,
+                        Peaks=peaks,
+                        Tissue=samples, # using "Tissue" column to display ids as labels in PCA plot
+                        stringsAsFactors=FALSE)
+} else {
+    sampleTable <- data.frame(SampleID=samples,
+                        Replicate=samples,
+                        Condition=groups,
+                        bamReads=bams,
+                        Peaks=peaks,
+                        Tissue=samples,
+                        stringsAsFactors=FALSE)
 }
 
-sample = dba(sampleSheet=opt$infile, peakFormat='bed')
-sample_count = dba.count(sample)
+sample = dba(sampleSheet=sampleTable, peakFormat='bed', scoreCol=opt$scorecol, bLowerScoreBetter=opt$lowerbetter)
+
+if ( !is.null(opt$summits) ) {
+    sample_count = dba.count(sample, summits=opt$summits)
+} else {
+    sample_count = dba.count(sample)
+}
+
 sample_contrast = dba.contrast(sample_count, categories=DBA_CONDITION, minMembers=2)
 sample_analyze = dba.analyze(sample_contrast)
-diff_bind = dba.report(sample_analyze)
-orvals = dba.plotHeatmap(sample_analyze, contrast=1, correlations=FALSE)
-dev.off()
+diff_bind = dba.report(sample_analyze, th=opt$th)
 
+# Generate plots
+if ( !is.null(opt$plots) ) {
+    pdf(opt$plots)
+    orvals = dba.plotHeatmap(sample_analyze, contrast=1, correlations=FALSE, cexCol=0.8, th=opt$th)
+    dba.plotPCA(sample_analyze, contrast=1, th=opt$th, label=DBA_TISSUE, labelSize=0.3)
+    dba.plotMA(sample_analyze, th=opt$th)
+    dba.plotVolcano(sample_analyze, th=opt$th)
+    dba.plotBox(sample_analyze, th=opt$th)
+    dev.off()
+}
+
+# Output differential binding sites
 resSorted <- diff_bind[order(diff_bind$FDR),]
-write.table(as.data.frame(resSorted), file = opt$outfile, sep="\t", quote = FALSE, append=TRUE, row.names = FALSE, col.names = FALSE)
+write.table(as.data.frame(resSorted), file = opt$outfile, sep="\t", quote = FALSE, append=TRUE, row.names = FALSE)
 
 # Output binding affinity scores
 if (!is.null(opt$bmatrix)) {
     bmat <- dba.peakset(sample_count, bRetrieve=TRUE, DataType=DBA_DATA_FRAME)
-    write.table(as.data.frame(bmat), file="bmatrix.tab", sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE)
+    write.table(as.data.frame(bmat), file="bmatrix.tab", sep="\t", quote=FALSE, row.names=FALSE)
 }
 
-## Output RData file
-
+# Output RData file
 if (!is.null(opt$rdaOpt)) {
     save.image(file = "DiffBind_analysis.RData")
 }
 
-sessionInfo()
+# Output analysis info
+if (!is.null(opt$infoOpt)) {
+    info <- "DiffBind_analysis_info.txt"
+    cat("dba.count Info\n\n", file=info, append = TRUE)
+    capture.output(sample, file=info, append=TRUE)
+    cat("\ndba.analyze Info\n\n", file=info, append = TRUE)
+    capture.output(sample_analyze, file=info, append=TRUE)
+    cat("\nSessionInfo\n\n", file=info, append = TRUE)
+    capture.output(sessionInfo(), file=info, append=TRUE)
+}
\ No newline at end of file