# HG changeset patch # User iuc # Date 1702669154 0 # Node ID 2605cbdaa7d86f95fae82dfaf40966a42d018145 # Parent 163688bb8f73d40df4cc19f2445bc84b7f3c73dc planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit 3da34ac6e5b18fd5deacaf31b757aca6bae82251 diff -r 163688bb8f73 -r 2605cbdaa7d8 diffbind.R --- a/diffbind.R Wed Nov 18 12:54:07 2020 +0000 +++ b/diffbind.R Fri Dec 15 19:39:14 2023 +0000 @@ -1,14 +1,15 @@ ## Setup R error handling to go to stderr -options(show.error.messages = F, error = function() { - cat(geterrmessage(), file = stderr()); q("no", 1, F) +options(show.error.messages = FALSE, error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) }) # we need that to not crash galaxy with an UTF8 error on German LC settings. Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") suppressPackageStartupMessages({ - library("getopt") - library("DiffBind") - library("rjson") + library("getopt") + library("DiffBind") + library("rjson") }) options(stringAsfactors = FALSE, useFancyQuotes = FALSE) @@ -17,28 +18,28 @@ #get options, using the spec as defined by the enclosed list. #we read the options from the default: commandArgs(TRUE). spec <- matrix(c( - "infile", "i", 1, "character", - "outfile", "o", 1, "character", - "scorecol", "n", 1, "integer", - "lowerbetter", "l", 1, "logical", - "summits", "s", 1, "integer", - "th", "t", 1, "double", - "format", "f", 1, "character", - "plots", "p", 2, "character", - "bmatrix", "b", 0, "logical", - "rdaOpt", "r", 0, "logical", - "infoOpt", "a", 0, "logical", - "verbose", "v", 2, "integer", - "help", "h", 0, "logical" -), byrow = TRUE, ncol = 4); + "infile", "i", 1, "character", + "outfile", "o", 1, "character", + "scorecol", "n", 1, "integer", + "lowerbetter", "l", 1, "logical", + "summits", "s", 1, "integer", + "th", "t", 1, "double", + "format", "f", 1, "character", + "plots", "p", 2, "character", + "bmatrix", "b", 0, "logical", + "rdaOpt", "r", 0, "logical", + "infoOpt", "a", 0, "logical", + "verbose", "v", 2, "integer", + "help", "h", 0, "logical" +), byrow = TRUE, ncol = 4) -opt <- getopt(spec); +opt <- getopt(spec) # if help was asked for print a friendly message # and exit with a non-zero error code if (!is.null(opt$help)) { - cat(getopt(spec, usage = TRUE)); - q(status = 1); + cat(getopt(spec, usage = TRUE)) + q(status = 1) } parser <- newJSONParser() @@ -54,28 +55,31 @@ samples <- sapply(strsplit(peaks, "-"), `[`, 2) if (length(ctrls) != 0) { - sample_table <- data.frame(SampleID = samples, - Condition = groups, - bamReads = bams, - bamControl = ctrls, - Peaks = peaks, - Tissue = samples) # using "Tissue" column to display ids as labels in PCA plot + sample_table <- data.frame( + SampleID = samples, + Condition = groups, + bamReads = bams, + bamControl = ctrls, + Peaks = peaks, + Tissue = samples + ) # using "Tissue" column to display ids as labels in PCA plot } else { - - sample_table <- data.frame(SampleID = samples, - Replicate = samples, - Condition = groups, - bamReads = bams, - Peaks = peaks, - Tissue = samples) + sample_table <- data.frame( + SampleID = samples, + Replicate = samples, + Condition = groups, + bamReads = bams, + Peaks = peaks, + Tissue = samples + ) } sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter) if (!is.null(opt$summits)) { - sample_count <- dba.count(sample, summits = opt$summits) + sample_count <- dba.count(sample, summits = opt$summits) } else { - sample_count <- dba.count(sample) + sample_count <- dba.count(sample) } sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2) @@ -84,74 +88,82 @@ # Generate plots if (!is.null(opt$plots)) { - pdf(opt$plots) - orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th) - dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3) - dba.plotMA(sample_analyze, th = opt$th) - dba.plotVolcano(sample_analyze, th = opt$th) - dba.plotBox(sample_analyze, th = opt$th) - dev.off() + pdf(opt$plots) + orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th) + dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3) + dba.plotMA(sample_analyze, th = opt$th) + dba.plotVolcano(sample_analyze, th = opt$th) + dba.plotBox(sample_analyze, th = opt$th) + dev.off() } # Output differential binding sites res_sorted <- diff_bind[order(diff_bind$FDR), ] # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/) if (opt$format == "bed") { - res_sorted <- data.frame(Chrom = seqnames(res_sorted), - Start = start(res_sorted) - 1, - End = end(res_sorted), - Name = rep("DiffBind", length(res_sorted)), - Score = rep("0", length(res_sorted)), - Strand = gsub("\\*", ".", strand(res_sorted))) + res_sorted <- data.frame( + Chrom = seqnames(res_sorted), + Start = start(res_sorted) - 1, + End = end(res_sorted), + Name = rep("DiffBind", length(res_sorted)), + Score = rep("0", length(res_sorted)), + Strand = gsub("\\*", ".", strand(res_sorted)) + ) } else if (opt$format == "interval") { - # Output as interval - df <- as.data.frame(res_sorted) - extrainfo <- NULL - for (i in seq_len(nrow(df))) { - extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|") - } - res_sorted <- data.frame(Chrom = seqnames(res_sorted), - Start = start(res_sorted) - 1, - End = end(res_sorted), - Name = rep("DiffBind", length(res_sorted)), - Score = rep("0", length(res_sorted)), - Strand = gsub("\\*", ".", strand(res_sorted)), - Comment = extrainfo) + # Output as interval + df <- as.data.frame(res_sorted) + extrainfo <- NULL + for (i in seq_len(nrow(df))) { + extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|") + } + res_sorted <- data.frame( + Chrom = seqnames(res_sorted), + Start = start(res_sorted) - 1, + End = end(res_sorted), + Name = rep("DiffBind", length(res_sorted)), + Score = rep("0", length(res_sorted)), + Strand = gsub("\\*", ".", strand(res_sorted)), + Comment = extrainfo + ) } else { - # Output as 0-based tabular - res_sorted <- data.frame(Chrom = seqnames(res_sorted), - Start = start(res_sorted) - 1, - End = end(res_sorted), - Name = rep("DiffBind", length(res_sorted)), - Score = rep("0", length(res_sorted)), - Strand = gsub("\\*", ".", strand(res_sorted)), - mcols(res_sorted)) + # Output as 0-based tabular + res_sorted <- data.frame( + Chrom = seqnames(res_sorted), + Start = start(res_sorted) - 1, + End = end(res_sorted), + Name = rep("DiffBind", length(res_sorted)), + Score = rep("0", length(res_sorted)), + Strand = gsub("\\*", ".", strand(res_sorted)), + mcols(res_sorted) + ) } write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names = FALSE) # Output binding affinity scores if (!is.null(opt$bmatrix)) { - bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME) - # Output as 0-based tabular - bmat <- data.frame(Chrom = bmat[, 1], - Start = bmat[, 2] - 1, - End = bmat[, 3], - bmat[, 4:ncol(bmat)]) - write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE) + bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME) + # Output as 0-based tabular + bmat <- data.frame( + Chrom = bmat[, 1], + Start = bmat[, 2] - 1, + End = bmat[, 3], + bmat[, 4:ncol(bmat)] + ) + write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE) } # Output RData file if (!is.null(opt$rdaOpt)) { - save.image(file = "DiffBind_analysis.RData") + save.image(file = "DiffBind_analysis.RData") } # Output analysis info if (!is.null(opt$infoOpt)) { - info <- "DiffBind_analysis_info.txt" - cat("dba.count Info\n\n", file = info, append = TRUE) - capture.output(sample, file = info, append = TRUE) - cat("\ndba.analyze Info\n\n", file = info, append = TRUE) - capture.output(sample_analyze, file = info, append = TRUE) - cat("\nSessionInfo\n\n", file = info, append = TRUE) - capture.output(sessionInfo(), file = info, append = TRUE) + info <- "DiffBind_analysis_info.txt" + cat("dba.count Info\n\n", file = info, append = TRUE) + capture.output(sample, file = info, append = TRUE) + cat("\ndba.analyze Info\n\n", file = info, append = TRUE) + capture.output(sample_analyze, file = info, append = TRUE) + cat("\nSessionInfo\n\n", file = info, append = TRUE) + capture.output(sessionInfo(), file = info, append = TRUE) } diff -r 163688bb8f73 -r 2605cbdaa7d8 diffbind.xml --- a/diffbind.xml Wed Nov 18 12:54:07 2020 +0000 +++ b/diffbind.xml Fri Dec 15 19:39:14 2023 +0000 @@ -1,7 +1,16 @@ - + differential binding analysis of ChIP-Seq peak data + + 2.10.0 + 1 + 22.05 + + + diffbind + diffbind + - bioconductor-diffbind + bioconductor-diffbind r-base r-getopt r-rjson @@ -33,14 +42,14 @@ #for $g in $rep_group: - #set $peak_files = list() - #set $bam_files = list() - #set $bam_controls = list() + #set $peak_files = dict() + #set $bam_files = dict() + #set $bam_controls = dict() #for $file in $g.peaks: #set $file_name = str($g.groupName) + "-" + re.sub('[^\w\-]', '_', str($file.element_identifier)) + "-peaks.bed" ln -s '${file}' '${file_name}' && - $peak_files.append($file_name) + #set $peak_files[str($file.element_identifier)] = str($file_name) #end for #for $bam in $g.bamreads: @@ -49,11 +58,14 @@ #set $bam_index = $bam_name + "-bamreads.bai" ln -s '${bam}' '${bam_file}' && ln -s '${bam.metadata.bam_index}' '${bam_index}' && - $bam_files.append($bam_file) + #set $bam_files[str($bam.element_identifier)] = str($bam_file) #end for - $temp_factor.append( {str($g.groupName): $peak_files} ) - $temp_factor.append( {str($g.groupName): $bam_files} ) + #if len($peak_files.keys()) != len($bam_files.keys()) + >&2 echo "Group $g.groupName: same number of Peak and Bam files needs to be given" && exit 1 && + #end if + $temp_factor.append( {str($g.groupName): [f[1] for f in sorted($peak_files.items())]} ) + $temp_factor.append( {str($g.groupName): [f[1] for f in sorted($bam_files.items())]} ) #if str( $g.bamcontrol ) != 'None': #for $ctrl in $g.bamcontrol: @@ -64,9 +76,12 @@ ln -s '${ctrl}' '${ctrl_file}' && ln -s '${ctrl.metadata.bam_index}' '${ctrl_index}' && #end if - $bam_controls.append($ctrl_file) + #set $bam_controls[str($ctrl.element_identifier)] = str($ctrl_file) #end for - $temp_factor.append( {str($g.groupName): $bam_controls} ) + #if len($peak_files.keys()) != len($bam_files.keys()) + >&2 echo "Group $g.groupName: same number of Peak and Bam control files needs to be given" && exit 1 && + #end if + $temp_factor.append( {str($g.groupName): [f[1] for f in sorted($bam_controls.items())]} ) #end if #end for @@ -305,6 +320,11 @@ be associated with each peakset (one for the ChIP data, and optionally another representing a control sample) +Inputs for a group will be sorted by identifier before processing. For each group the corresponding +sets of peak and BAM files need to be provided. Ideally this is accomplished by providing the data in +collections. + + **Groups** You have to specify the name of the Group and the peak and BAM files for the two Groups you want to compare (e.g Resistant and Responsive) in the tool form above.