changeset 17:2605cbdaa7d8 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit 3da34ac6e5b18fd5deacaf31b757aca6bae82251
author iuc
date Fri, 15 Dec 2023 19:39:14 +0000
parents 163688bb8f73
children f907216064f6
files diffbind.R diffbind.xml
diffstat 2 files changed, 129 insertions(+), 97 deletions(-) [+]
line wrap: on
line diff
--- a/diffbind.R	Wed Nov 18 12:54:07 2020 +0000
+++ b/diffbind.R	Fri Dec 15 19:39:14 2023 +0000
@@ -1,14 +1,15 @@
 ## Setup R error handling to go to stderr
-options(show.error.messages = F, error = function() {
-    cat(geterrmessage(), file = stderr()); q("no", 1, F)
+options(show.error.messages = FALSE, error = function() {
+  cat(geterrmessage(), file = stderr())
+  q("no", 1, FALSE)
 })
 # we need that to not crash galaxy with an UTF8 error on German LC settings.
 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 
 suppressPackageStartupMessages({
-    library("getopt")
-    library("DiffBind")
-    library("rjson")
+  library("getopt")
+  library("DiffBind")
+  library("rjson")
 })
 
 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
@@ -17,28 +18,28 @@
 #get options, using the spec as defined by the enclosed list.
 #we read the options from the default: commandArgs(TRUE).
 spec <- matrix(c(
-    "infile", "i", 1, "character",
-    "outfile", "o", 1, "character",
-    "scorecol", "n", 1, "integer",
-    "lowerbetter", "l", 1, "logical",
-    "summits", "s", 1, "integer",
-    "th", "t", 1, "double",
-    "format", "f", 1, "character",
-    "plots", "p", 2, "character",
-    "bmatrix", "b", 0, "logical",
-    "rdaOpt", "r", 0, "logical",
-    "infoOpt", "a", 0, "logical",
-    "verbose", "v", 2, "integer",
-    "help", "h", 0, "logical"
-), byrow = TRUE, ncol = 4);
+  "infile", "i", 1, "character",
+  "outfile", "o", 1, "character",
+  "scorecol", "n", 1, "integer",
+  "lowerbetter", "l", 1, "logical",
+  "summits", "s", 1, "integer",
+  "th", "t", 1, "double",
+  "format", "f", 1, "character",
+  "plots", "p", 2, "character",
+  "bmatrix", "b", 0, "logical",
+  "rdaOpt", "r", 0, "logical",
+  "infoOpt", "a", 0, "logical",
+  "verbose", "v", 2, "integer",
+  "help", "h", 0, "logical"
+), byrow = TRUE, ncol = 4)
 
-opt <- getopt(spec);
+opt <- getopt(spec)
 
 # if help was asked for print a friendly message
 # and exit with a non-zero error code
 if (!is.null(opt$help)) {
-    cat(getopt(spec, usage = TRUE));
-    q(status = 1);
+  cat(getopt(spec, usage = TRUE))
+  q(status = 1)
 }
 
 parser <- newJSONParser()
@@ -54,28 +55,31 @@
 samples <- sapply(strsplit(peaks, "-"), `[`, 2)
 
 if (length(ctrls) != 0) {
-    sample_table <- data.frame(SampleID = samples,
-                        Condition = groups,
-                        bamReads = bams,
-                        bamControl = ctrls,
-                        Peaks = peaks,
-                        Tissue = samples) # using "Tissue" column to display ids as labels in PCA plot
+  sample_table <- data.frame(
+    SampleID = samples,
+    Condition = groups,
+    bamReads = bams,
+    bamControl = ctrls,
+    Peaks = peaks,
+    Tissue = samples
+  ) # using "Tissue" column to display ids as labels in PCA plot
 } else {
-
-    sample_table <- data.frame(SampleID = samples,
-                        Replicate = samples,
-                        Condition = groups,
-                        bamReads = bams,
-                        Peaks = peaks,
-                        Tissue = samples)
+  sample_table <- data.frame(
+    SampleID = samples,
+    Replicate = samples,
+    Condition = groups,
+    bamReads = bams,
+    Peaks = peaks,
+    Tissue = samples
+  )
 }
 
 sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter)
 
 if (!is.null(opt$summits)) {
-    sample_count <- dba.count(sample, summits = opt$summits)
+  sample_count <- dba.count(sample, summits = opt$summits)
 } else {
-    sample_count <- dba.count(sample)
+  sample_count <- dba.count(sample)
 }
 
 sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2)
@@ -84,74 +88,82 @@
 
 # Generate plots
 if (!is.null(opt$plots)) {
-    pdf(opt$plots)
-    orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th)
-    dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3)
-    dba.plotMA(sample_analyze, th = opt$th)
-    dba.plotVolcano(sample_analyze, th = opt$th)
-    dba.plotBox(sample_analyze, th = opt$th)
-    dev.off()
+  pdf(opt$plots)
+  orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th)
+  dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3)
+  dba.plotMA(sample_analyze, th = opt$th)
+  dba.plotVolcano(sample_analyze, th = opt$th)
+  dba.plotBox(sample_analyze, th = opt$th)
+  dev.off()
 }
 
 # Output differential binding sites
 res_sorted <- diff_bind[order(diff_bind$FDR), ]
 # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/)
 if (opt$format == "bed") {
-    res_sorted  <- data.frame(Chrom = seqnames(res_sorted),
-        Start = start(res_sorted) - 1,
-        End = end(res_sorted),
-        Name = rep("DiffBind", length(res_sorted)),
-        Score = rep("0", length(res_sorted)),
-        Strand = gsub("\\*", ".", strand(res_sorted)))
+  res_sorted  <- data.frame(
+    Chrom = seqnames(res_sorted),
+    Start = start(res_sorted) - 1,
+    End = end(res_sorted),
+    Name = rep("DiffBind", length(res_sorted)),
+    Score = rep("0", length(res_sorted)),
+    Strand = gsub("\\*", ".", strand(res_sorted))
+  )
 } else if (opt$format == "interval") {
-     # Output as interval
-    df <- as.data.frame(res_sorted)
-    extrainfo <- NULL
-    for (i in seq_len(nrow(df))) {
-        extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|")
-    }
-    res_sorted  <- data.frame(Chrom = seqnames(res_sorted),
-        Start = start(res_sorted) - 1,
-        End = end(res_sorted),
-        Name = rep("DiffBind", length(res_sorted)),
-        Score = rep("0", length(res_sorted)),
-        Strand = gsub("\\*", ".", strand(res_sorted)),
-        Comment = extrainfo)
+  # Output as interval
+  df <- as.data.frame(res_sorted)
+  extrainfo <- NULL
+  for (i in seq_len(nrow(df))) {
+    extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|")
+  }
+  res_sorted  <- data.frame(
+    Chrom = seqnames(res_sorted),
+    Start = start(res_sorted) - 1,
+    End = end(res_sorted),
+    Name = rep("DiffBind", length(res_sorted)),
+    Score = rep("0", length(res_sorted)),
+    Strand = gsub("\\*", ".", strand(res_sorted)),
+    Comment = extrainfo
+  )
 } else {
-    # Output as 0-based tabular
-    res_sorted <- data.frame(Chrom = seqnames(res_sorted),
-        Start = start(res_sorted) - 1,
-        End = end(res_sorted),
-        Name = rep("DiffBind", length(res_sorted)),
-        Score = rep("0", length(res_sorted)),
-        Strand = gsub("\\*", ".", strand(res_sorted)),
-        mcols(res_sorted))
+  # Output as 0-based tabular
+  res_sorted <- data.frame(
+    Chrom = seqnames(res_sorted),
+    Start = start(res_sorted) - 1,
+    End = end(res_sorted),
+    Name = rep("DiffBind", length(res_sorted)),
+    Score = rep("0", length(res_sorted)),
+    Strand = gsub("\\*", ".", strand(res_sorted)),
+    mcols(res_sorted)
+  )
 }
 write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names = FALSE)
 
 # Output binding affinity scores
 if (!is.null(opt$bmatrix)) {
-    bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME)
-    # Output as 0-based tabular
-    bmat <- data.frame(Chrom = bmat[, 1],
-        Start = bmat[, 2] - 1,
-        End = bmat[, 3],
-        bmat[, 4:ncol(bmat)])
-    write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE)
+  bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME)
+  # Output as 0-based tabular
+  bmat <- data.frame(
+    Chrom = bmat[, 1],
+    Start = bmat[, 2] - 1,
+    End = bmat[, 3],
+    bmat[, 4:ncol(bmat)]
+  )
+  write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE)
 }
 
 # Output RData file
 if (!is.null(opt$rdaOpt)) {
-    save.image(file = "DiffBind_analysis.RData")
+  save.image(file = "DiffBind_analysis.RData")
 }
 
 # Output analysis info
 if (!is.null(opt$infoOpt)) {
-    info <- "DiffBind_analysis_info.txt"
-    cat("dba.count Info\n\n", file = info, append = TRUE)
-    capture.output(sample, file = info, append = TRUE)
-    cat("\ndba.analyze Info\n\n", file = info, append = TRUE)
-    capture.output(sample_analyze, file = info, append = TRUE)
-    cat("\nSessionInfo\n\n", file = info, append = TRUE)
-    capture.output(sessionInfo(), file = info, append = TRUE)
+  info <- "DiffBind_analysis_info.txt"
+  cat("dba.count Info\n\n", file = info, append = TRUE)
+  capture.output(sample, file = info, append = TRUE)
+  cat("\ndba.analyze Info\n\n", file = info, append = TRUE)
+  capture.output(sample_analyze, file = info, append = TRUE)
+  cat("\nSessionInfo\n\n", file = info, append = TRUE)
+  capture.output(sessionInfo(), file = info, append = TRUE)
 }
--- a/diffbind.xml	Wed Nov 18 12:54:07 2020 +0000
+++ b/diffbind.xml	Fri Dec 15 19:39:14 2023 +0000
@@ -1,7 +1,16 @@
-<tool id="diffbind" name="DiffBind" version="2.10.0+galaxy0">
+<tool id="diffbind" name="DiffBind" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
     <description> differential binding analysis of ChIP-Seq peak data</description>
+    <macros>
+        <token name="@TOOL_VERSION@">2.10.0</token>
+        <token name="@VERSION_SUFFIX@">1</token>
+        <token name="@PROFILE@">22.05</token>
+    </macros>
+    <xrefs>
+        <xref type="bio.tools">diffbind</xref>
+        <xref type="bioconductor">diffbind</xref>
+    </xrefs>
     <requirements>
-        <requirement type="package" version="2.10.0">bioconductor-diffbind</requirement>
+        <requirement type="package" version="@TOOL_VERSION@">bioconductor-diffbind</requirement>
         <requirement type="package" version="3.5.1">r-base</requirement>
         <requirement type="package" version="1.20.3">r-getopt</requirement>
         <requirement type="package" version="0.2.20">r-rjson</requirement>
@@ -33,14 +42,14 @@
 
 #for $g in $rep_group:
 
-    #set $peak_files = list()
-    #set $bam_files = list()
-    #set $bam_controls = list()
+    #set $peak_files = dict()
+    #set $bam_files = dict()
+    #set $bam_controls = dict()
 
     #for $file in $g.peaks:
         #set $file_name = str($g.groupName) + "-" + re.sub('[^\w\-]', '_', str($file.element_identifier)) + "-peaks.bed"
         ln -s '${file}' '${file_name}' &&
-        $peak_files.append($file_name)
+        #set $peak_files[str($file.element_identifier)] = str($file_name)
     #end for
 
     #for $bam in $g.bamreads:
@@ -49,11 +58,14 @@
         #set $bam_index = $bam_name + "-bamreads.bai"
         ln -s '${bam}' '${bam_file}' &&
         ln -s '${bam.metadata.bam_index}' '${bam_index}' &&
-        $bam_files.append($bam_file)
+        #set $bam_files[str($bam.element_identifier)] = str($bam_file)
     #end for
 
-    $temp_factor.append( {str($g.groupName): $peak_files} )
-    $temp_factor.append( {str($g.groupName): $bam_files} )
+    #if len($peak_files.keys()) != len($bam_files.keys())
+        >&2 echo "Group $g.groupName: same number of Peak and Bam files needs to be given" && exit 1 &&
+    #end if
+    $temp_factor.append( {str($g.groupName): [f[1] for f in sorted($peak_files.items())]} )
+    $temp_factor.append( {str($g.groupName): [f[1] for f in sorted($bam_files.items())]} )
 
     #if str( $g.bamcontrol ) != 'None':
         #for $ctrl in $g.bamcontrol:
@@ -64,9 +76,12 @@
                 ln -s '${ctrl}' '${ctrl_file}' &&
                 ln -s '${ctrl.metadata.bam_index}' '${ctrl_index}' &&
             #end if
-            $bam_controls.append($ctrl_file)
+            #set $bam_controls[str($ctrl.element_identifier)] = str($ctrl_file)
         #end for
-        $temp_factor.append( {str($g.groupName): $bam_controls} )
+        #if len($peak_files.keys()) != len($bam_files.keys())
+            >&2 echo "Group $g.groupName: same number of Peak and Bam control files needs to be given" && exit 1 &&
+        #end if
+        $temp_factor.append( {str($g.groupName): [f[1] for f in sorted($bam_controls.items())]} )
     #end if
 
 #end for
@@ -305,6 +320,11 @@
 be associated with each peakset (one for the ChIP data, and optionally another representing
 a control sample)
 
+Inputs for a group will be sorted by identifier before processing. For each group the corresponding
+sets of peak and BAM files need to be provided. Ideally this is accomplished by providing the data in
+collections.
+
+
 **Groups**
 
 You have to specify the name of the Group and the peak and BAM files for the two Groups you want to compare (e.g Resistant and Responsive) in the tool form above.