diff diffbind.R @ 16:163688bb8f73 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit 77828512472bf9815bdda725a58a2321f7803661"
author iuc
date Wed, 18 Nov 2020 12:54:07 +0000
parents 194e3f2c1d86
children 2605cbdaa7d8
line wrap: on
line diff
--- a/diffbind.R	Tue Jul 09 18:46:09 2019 -0400
+++ b/diffbind.R	Wed Nov 18 12:54:07 2020 +0000
@@ -1,12 +1,14 @@
 ## Setup R error handling to go to stderr
-options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+options(show.error.messages = F, error = function() {
+    cat(geterrmessage(), file = stderr()); q("no", 1, F)
+})
 # we need that to not crash galaxy with an UTF8 error on German LC settings.
 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 
 suppressPackageStartupMessages({
-    library('getopt')
-    library('DiffBind')
-    library('rjson')
+    library("getopt")
+    library("DiffBind")
+    library("rjson")
 })
 
 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
@@ -14,128 +16,128 @@
 
 #get options, using the spec as defined by the enclosed list.
 #we read the options from the default: commandArgs(TRUE).
-spec = matrix(c(
-    'infile' , 'i', 1, "character",
-    'outfile' , 'o', 1, "character",
-    'scorecol', 'n', 1, "integer",
-    'lowerbetter', 'l', 1, "logical",
-    'summits', 's', 1, "integer",
-    'th', 't', 1, "double",
-    'format', 'f', 1, "character",
-    'plots' , 'p', 2, "character",
-    'bmatrix', 'b', 0, "logical",
+spec <- matrix(c(
+    "infile", "i", 1, "character",
+    "outfile", "o", 1, "character",
+    "scorecol", "n", 1, "integer",
+    "lowerbetter", "l", 1, "logical",
+    "summits", "s", 1, "integer",
+    "th", "t", 1, "double",
+    "format", "f", 1, "character",
+    "plots", "p", 2, "character",
+    "bmatrix", "b", 0, "logical",
     "rdaOpt", "r", 0, "logical",
-    'infoOpt' , 'a', 0, "logical",
-    'verbose', 'v', 2, "integer",
-    'help' , 'h', 0, "logical"
-), byrow=TRUE, ncol=4);
+    "infoOpt", "a", 0, "logical",
+    "verbose", "v", 2, "integer",
+    "help", "h", 0, "logical"
+), byrow = TRUE, ncol = 4);
 
-opt = getopt(spec);
+opt <- getopt(spec);
 
 # if help was asked for print a friendly message
 # and exit with a non-zero error code
-if ( !is.null(opt$help) ) {
-    cat(getopt(spec, usage=TRUE));
-    q(status=1);
+if (!is.null(opt$help)) {
+    cat(getopt(spec, usage = TRUE));
+    q(status = 1);
 }
 
 parser <- newJSONParser()
 parser$addData(opt$infile)
-factorList <- parser$getObject()
-filenamesIn <- unname(unlist(factorList[[1]][[2]]))
-peaks <- filenamesIn[grepl("peaks.bed", filenamesIn)]
-bams <- filenamesIn[grepl("bamreads.bam", filenamesIn)]
-ctrls <- filenamesIn[grepl("bamcontrol.bam", filenamesIn)]
+factor_list <- parser$getObject()
+filenames_in <- unname(unlist(factor_list[[1]][[2]]))
+peaks <- filenames_in[grepl("peaks.bed", filenames_in)]
+bams <- filenames_in[grepl("bamreads.bam", filenames_in)]
+ctrls <- filenames_in[grepl("bamcontrol.bam", filenames_in)]
 
 # get the group and sample id from the peaks filenames
-groups <- sapply(strsplit(peaks,"-"), `[`, 1)
-samples <- sapply(strsplit(peaks,"-"), `[`, 2)
+groups <- sapply(strsplit(peaks, "-"), `[`, 1)
+samples <- sapply(strsplit(peaks, "-"), `[`, 2)
 
-if ( length(ctrls) != 0 ) {
-    sampleTable <- data.frame(SampleID=samples,
-                        Condition=groups,
-                        bamReads=bams,
-                        bamControl=ctrls,
-                        Peaks=peaks,
-                        Tissue=samples) # using "Tissue" column to display ids as labels in PCA plot
+if (length(ctrls) != 0) {
+    sample_table <- data.frame(SampleID = samples,
+                        Condition = groups,
+                        bamReads = bams,
+                        bamControl = ctrls,
+                        Peaks = peaks,
+                        Tissue = samples) # using "Tissue" column to display ids as labels in PCA plot
 } else {
 
-    sampleTable <- data.frame(SampleID=samples,
-                        Replicate=samples,
-                        Condition=groups,
-                        bamReads=bams,
-                        Peaks=peaks,
-                        Tissue=samples)
+    sample_table <- data.frame(SampleID = samples,
+                        Replicate = samples,
+                        Condition = groups,
+                        bamReads = bams,
+                        Peaks = peaks,
+                        Tissue = samples)
 }
 
-sample = dba(sampleSheet=sampleTable, peakFormat='bed', scoreCol=opt$scorecol, bLowerScoreBetter=opt$lowerbetter)
+sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter)
 
-if ( !is.null(opt$summits) ) {
-    sample_count = dba.count(sample, summits=opt$summits)
+if (!is.null(opt$summits)) {
+    sample_count <- dba.count(sample, summits = opt$summits)
 } else {
-    sample_count = dba.count(sample)
+    sample_count <- dba.count(sample)
 }
 
-sample_contrast = dba.contrast(sample_count, categories=DBA_CONDITION, minMembers=2)
-sample_analyze = dba.analyze(sample_contrast)
-diff_bind = dba.report(sample_analyze, th=opt$th)
+sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2)
+sample_analyze <- dba.analyze(sample_contrast)
+diff_bind <- dba.report(sample_analyze, th = opt$th)
 
 # Generate plots
-if ( !is.null(opt$plots) ) {
+if (!is.null(opt$plots)) {
     pdf(opt$plots)
-    orvals = dba.plotHeatmap(sample_analyze, contrast=1, correlations=FALSE, cexCol=0.8, th=opt$th)
-    dba.plotPCA(sample_analyze, contrast=1, th=opt$th, label=DBA_TISSUE, labelSize=0.3)
-    dba.plotMA(sample_analyze, th=opt$th)
-    dba.plotVolcano(sample_analyze, th=opt$th)
-    dba.plotBox(sample_analyze, th=opt$th)
+    orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th)
+    dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3)
+    dba.plotMA(sample_analyze, th = opt$th)
+    dba.plotVolcano(sample_analyze, th = opt$th)
+    dba.plotBox(sample_analyze, th = opt$th)
     dev.off()
 }
 
 # Output differential binding sites
-resSorted <- diff_bind[order(diff_bind$FDR),]
+res_sorted <- diff_bind[order(diff_bind$FDR), ]
 # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/)
 if (opt$format == "bed") {
-    resSorted  <- data.frame(Chrom=seqnames(resSorted),
-        Start=start(resSorted) - 1,
-        End=end(resSorted),
-        Name=rep("DiffBind", length(resSorted)),
-        Score=rep("0", length(resSorted)),
-        Strand=gsub("\\*", ".", strand(resSorted)))
+    res_sorted  <- data.frame(Chrom = seqnames(res_sorted),
+        Start = start(res_sorted) - 1,
+        End = end(res_sorted),
+        Name = rep("DiffBind", length(res_sorted)),
+        Score = rep("0", length(res_sorted)),
+        Strand = gsub("\\*", ".", strand(res_sorted)))
 } else if (opt$format == "interval") {
      # Output as interval
-    df <- as.data.frame(resSorted)
+    df <- as.data.frame(res_sorted)
     extrainfo <- NULL
-    for (i in 1:nrow(df)) {
-        extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse="|")
+    for (i in seq_len(nrow(df))) {
+        extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|")
     }
-    resSorted  <- data.frame(Chrom=seqnames(resSorted),
-        Start=start(resSorted) - 1,
-        End=end(resSorted),
-        Name=rep("DiffBind", length(resSorted)),
-        Score=rep("0", length(resSorted)),
-        Strand=gsub("\\*", ".", strand(resSorted)),
-        Comment=extrainfo)
+    res_sorted  <- data.frame(Chrom = seqnames(res_sorted),
+        Start = start(res_sorted) - 1,
+        End = end(res_sorted),
+        Name = rep("DiffBind", length(res_sorted)),
+        Score = rep("0", length(res_sorted)),
+        Strand = gsub("\\*", ".", strand(res_sorted)),
+        Comment = extrainfo)
 } else {
     # Output as 0-based tabular
-    resSorted <- data.frame(Chrom=seqnames(resSorted),
-        Start=start(resSorted) - 1,
-        End=end(resSorted),
-        Name=rep("DiffBind", length(resSorted)),
-        Score=rep("0", length(resSorted)),
-        Strand=gsub("\\*", ".", strand(resSorted)),
-        mcols(resSorted))
+    res_sorted <- data.frame(Chrom = seqnames(res_sorted),
+        Start = start(res_sorted) - 1,
+        End = end(res_sorted),
+        Name = rep("DiffBind", length(res_sorted)),
+        Score = rep("0", length(res_sorted)),
+        Strand = gsub("\\*", ".", strand(res_sorted)),
+        mcols(res_sorted))
 }
-write.table(resSorted, file = opt$outfile, sep="\t", quote = FALSE, row.names = FALSE)
+write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names = FALSE)
 
 # Output binding affinity scores
 if (!is.null(opt$bmatrix)) {
-    bmat <- dba.peakset(sample_count, bRetrieve=TRUE, DataType=DBA_DATA_FRAME)
+    bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME)
     # Output as 0-based tabular
-    bmat <- data.frame(Chrom=bmat[, 1],
-        Start=bmat[, 2] - 1,
-        End=bmat[, 3],
+    bmat <- data.frame(Chrom = bmat[, 1],
+        Start = bmat[, 2] - 1,
+        End = bmat[, 3],
         bmat[, 4:ncol(bmat)])
-    write.table(bmat, file="bmatrix.tab", sep="\t", quote=FALSE, row.names=FALSE)
+    write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE)
 }
 
 # Output RData file
@@ -146,10 +148,10 @@
 # Output analysis info
 if (!is.null(opt$infoOpt)) {
     info <- "DiffBind_analysis_info.txt"
-    cat("dba.count Info\n\n", file=info, append = TRUE)
-    capture.output(sample, file=info, append=TRUE)
-    cat("\ndba.analyze Info\n\n", file=info, append = TRUE)
-    capture.output(sample_analyze, file=info, append=TRUE)
-    cat("\nSessionInfo\n\n", file=info, append = TRUE)
-    capture.output(sessionInfo(), file=info, append=TRUE)
-}
\ No newline at end of file
+    cat("dba.count Info\n\n", file = info, append = TRUE)
+    capture.output(sample, file = info, append = TRUE)
+    cat("\ndba.analyze Info\n\n", file = info, append = TRUE)
+    capture.output(sample_analyze, file = info, append = TRUE)
+    cat("\nSessionInfo\n\n", file = info, append = TRUE)
+    capture.output(sessionInfo(), file = info, append = TRUE)
+}