Mercurial > repos > bgruening > diffbind
changeset 18:f907216064f6 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit fd148a124034b44d0d61db3eec32ff991d8c152c
| author | iuc |
|---|---|
| date | Mon, 08 Jul 2024 18:31:51 +0000 |
| parents | 2605cbdaa7d8 |
| children | |
| files | diffbind.R diffbind.xml test-data/DiffBind_analysis.RData test-data/out_binding_matrix.tab test-data/out_binding_matrix_edger.tab test-data/out_diffbind.bed test-data/out_diffbind.interval test-data/out_diffbind.tab test-data/out_diffbind_blacklist.tab test-data/out_diffbind_ctrl.interval test-data/out_diffbind_edger.interval test-data/out_diffbind_minoverlap1.tab test-data/out_plots.pdf test-data/out_plots_edger.pdf |
| diffstat | 14 files changed, 281 insertions(+), 1533 deletions(-) [+] |
line wrap: on
line diff
--- a/diffbind.R Fri Dec 15 19:39:14 2023 +0000 +++ b/diffbind.R Mon Jul 08 18:31:51 2024 +0000 @@ -1,45 +1,47 @@ ## Setup R error handling to go to stderr -options(show.error.messages = FALSE, error = function() { - cat(geterrmessage(), file = stderr()) - q("no", 1, FALSE) +options(show.error.messages = F, error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, F) }) # we need that to not crash galaxy with an UTF8 error on German LC settings. Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") suppressPackageStartupMessages({ - library("getopt") - library("DiffBind") - library("rjson") + library("getopt") + library("DiffBind") + library("rjson") }) options(stringAsfactors = FALSE, useFancyQuotes = FALSE) args <- commandArgs(trailingOnly = TRUE) -#get options, using the spec as defined by the enclosed list. -#we read the options from the default: commandArgs(TRUE). +# get options, using the spec as defined by the enclosed list. +# we read the options from the default: commandArgs(TRUE). spec <- matrix(c( - "infile", "i", 1, "character", - "outfile", "o", 1, "character", - "scorecol", "n", 1, "integer", - "lowerbetter", "l", 1, "logical", - "summits", "s", 1, "integer", - "th", "t", 1, "double", - "format", "f", 1, "character", - "plots", "p", 2, "character", - "bmatrix", "b", 0, "logical", - "rdaOpt", "r", 0, "logical", - "infoOpt", "a", 0, "logical", - "verbose", "v", 2, "integer", - "help", "h", 0, "logical" + "infile", "i", 1, "character", + "outfile", "o", 1, "character", + "method", "m", 1, "character", + "scorecol", "n", 1, "integer", + "lowerbetter", "l", 1, "logical", + "summits", "s", 1, "integer", + "th", "t", 1, "double", + "minoverlap", "O", 1, "integer", + "use_blacklist", "B", 0, "logical", + "format", "f", 1, "character", + "plots", "p", 2, "character", + "bmatrix", "b", 0, "logical", + "rdaOpt", "r", 0, "logical", + "infoOpt", "a", 0, "logical", + "verbose", "v", 2, "integer", + "help", "h", 0, "logical" ), byrow = TRUE, ncol = 4) opt <- getopt(spec) - # if help was asked for print a friendly message # and exit with a non-zero error code if (!is.null(opt$help)) { - cat(getopt(spec, usage = TRUE)) - q(status = 1) + cat(getopt(spec, usage = TRUE)) + q(status = 1) } parser <- newJSONParser() @@ -55,115 +57,127 @@ samples <- sapply(strsplit(peaks, "-"), `[`, 2) if (length(ctrls) != 0) { - sample_table <- data.frame( - SampleID = samples, - Condition = groups, - bamReads = bams, - bamControl = ctrls, - Peaks = peaks, - Tissue = samples - ) # using "Tissue" column to display ids as labels in PCA plot + sample_table <- data.frame( + SampleID = samples, + Condition = groups, + bamReads = bams, + bamControl = ctrls, + Peaks = peaks, + Tissue = samples + ) # using "Tissue" column to display ids as labels in PCA plot } else { - sample_table <- data.frame( - SampleID = samples, - Replicate = samples, - Condition = groups, - bamReads = bams, - Peaks = peaks, - Tissue = samples - ) + sample_table <- data.frame( + SampleID = samples, + Replicate = samples, + Condition = groups, + bamReads = bams, + Peaks = peaks, + Tissue = samples + ) } -sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter) +sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter, minOverlap = opt$minoverlap) + +if (!is.null(opt$use_blacklist)) { + sample <- dba.blacklist(sample, blacklist = TRUE) +} if (!is.null(opt$summits)) { - sample_count <- dba.count(sample, summits = opt$summits) + sample_count <- dba.count(sample, summits = opt$summits) } else { - sample_count <- dba.count(sample) + sample_count <- dba.count(sample) } sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2) -sample_analyze <- dba.analyze(sample_contrast) -diff_bind <- dba.report(sample_analyze, th = opt$th) + +if (opt$method == "DBA_DESEQ2") { + method <- DBA_DESEQ2 +} else if (opt$method == "DBA_EDGER") { + method <- DBA_EDGER +} + +sample_analyze <- dba.analyze(sample_contrast, method = method, bBlacklist = FALSE, bGreylist = FALSE) + +diff_bind <- dba.report(sample_analyze, th = opt$th, method = method) # Generate plots if (!is.null(opt$plots)) { - pdf(opt$plots) - orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th) - dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3) - dba.plotMA(sample_analyze, th = opt$th) - dba.plotVolcano(sample_analyze, th = opt$th) - dba.plotBox(sample_analyze, th = opt$th) - dev.off() + pdf(opt$plots) + orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th, method = method) + dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3, method = method) + dba.plotMA(sample_analyze, th = opt$th, method = method) + dba.plotVolcano(sample_analyze, th = opt$th, method = method) + dba.plotBox(sample_analyze, th = opt$th, method = method) + dev.off() } # Output differential binding sites res_sorted <- diff_bind[order(diff_bind$FDR), ] # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/) if (opt$format == "bed") { - res_sorted <- data.frame( - Chrom = seqnames(res_sorted), - Start = start(res_sorted) - 1, - End = end(res_sorted), - Name = rep("DiffBind", length(res_sorted)), - Score = rep("0", length(res_sorted)), - Strand = gsub("\\*", ".", strand(res_sorted)) - ) + res_sorted <- data.frame( + Chrom = seqnames(res_sorted), + Start = start(res_sorted) - 1, + End = end(res_sorted), + Name = rep("DiffBind", length(res_sorted)), + Score = rep("0", length(res_sorted)), + Strand = gsub("\\*", ".", strand(res_sorted)) + ) } else if (opt$format == "interval") { - # Output as interval - df <- as.data.frame(res_sorted) - extrainfo <- NULL - for (i in seq_len(nrow(df))) { - extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|") - } - res_sorted <- data.frame( - Chrom = seqnames(res_sorted), - Start = start(res_sorted) - 1, - End = end(res_sorted), - Name = rep("DiffBind", length(res_sorted)), - Score = rep("0", length(res_sorted)), - Strand = gsub("\\*", ".", strand(res_sorted)), - Comment = extrainfo - ) + # Output as interval + df <- as.data.frame(res_sorted) + extrainfo <- NULL + for (i in seq_len(nrow(df))) { + extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|") + } + res_sorted <- data.frame( + Chrom = seqnames(res_sorted), + Start = start(res_sorted) - 1, + End = end(res_sorted), + Name = rep("DiffBind", length(res_sorted)), + Score = rep("0", length(res_sorted)), + Strand = gsub("\\*", ".", strand(res_sorted)), + Comment = extrainfo + ) } else { - # Output as 0-based tabular - res_sorted <- data.frame( - Chrom = seqnames(res_sorted), - Start = start(res_sorted) - 1, - End = end(res_sorted), - Name = rep("DiffBind", length(res_sorted)), - Score = rep("0", length(res_sorted)), - Strand = gsub("\\*", ".", strand(res_sorted)), - mcols(res_sorted) - ) + # Output as 0-based tabular + res_sorted <- data.frame( + Chrom = seqnames(res_sorted), + Start = start(res_sorted) - 1, + End = end(res_sorted), + Name = rep("DiffBind", length(res_sorted)), + Score = rep("0", length(res_sorted)), + Strand = gsub("\\*", ".", strand(res_sorted)), + mcols(res_sorted) + ) } write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names = FALSE) # Output binding affinity scores if (!is.null(opt$bmatrix)) { - bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME) - # Output as 0-based tabular - bmat <- data.frame( - Chrom = bmat[, 1], - Start = bmat[, 2] - 1, - End = bmat[, 3], - bmat[, 4:ncol(bmat)] - ) - write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE) + bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME, minOverlap = opt$minoverlap) + # Output as 0-based tabular + bmat <- data.frame( + Chrom = bmat[, 1], + Start = bmat[, 2] - 1, + End = bmat[, 3], + bmat[, 4:ncol(bmat)] + ) + write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE) } # Output RData file if (!is.null(opt$rdaOpt)) { - save.image(file = "DiffBind_analysis.RData") + save.image(file = "DiffBind_analysis.RData") } # Output analysis info if (!is.null(opt$infoOpt)) { - info <- "DiffBind_analysis_info.txt" - cat("dba.count Info\n\n", file = info, append = TRUE) - capture.output(sample, file = info, append = TRUE) - cat("\ndba.analyze Info\n\n", file = info, append = TRUE) - capture.output(sample_analyze, file = info, append = TRUE) - cat("\nSessionInfo\n\n", file = info, append = TRUE) - capture.output(sessionInfo(), file = info, append = TRUE) + info <- "DiffBind_analysis_info.txt" + cat("dba.count Info\n\n", file = info, append = TRUE) + capture.output(sample, file = info, append = TRUE) + cat("\ndba.analyze Info\n\n", file = info, append = TRUE) + capture.output(sample_analyze, file = info, append = TRUE) + cat("\nSessionInfo\n\n", file = info, append = TRUE) + capture.output(sessionInfo(), file = info, append = TRUE) }
--- a/diffbind.xml Fri Dec 15 19:39:14 2023 +0000 +++ b/diffbind.xml Mon Jul 08 18:31:51 2024 +0000 @@ -1,8 +1,8 @@ <tool id="diffbind" name="DiffBind" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description> differential binding analysis of ChIP-Seq peak data</description> <macros> - <token name="@TOOL_VERSION@">2.10.0</token> - <token name="@VERSION_SUFFIX@">1</token> + <token name="@TOOL_VERSION@">3.12.0</token> + <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">22.05</token> </macros> <xrefs> @@ -11,9 +11,8 @@ </xrefs> <requirements> <requirement type="package" version="@TOOL_VERSION@">bioconductor-diffbind</requirement> - <requirement type="package" version="3.5.1">r-base</requirement> - <requirement type="package" version="1.20.3">r-getopt</requirement> - <requirement type="package" version="0.2.20">r-rjson</requirement> + <requirement type="package" version="1.20.4">r-getopt</requirement> + <requirement type="package" version="4.0.16">bioconductor-edger</requirement> </requirements> <stdio> <regex match="Execution halted" @@ -71,7 +70,7 @@ #for $ctrl in $g.bamcontrol: #set $ctrl_name = re.sub('[^\w\-]', '_', str($ctrl.element_identifier)) #set $ctrl_file = $ctrl_name + "-bamcontrol.bam" - #set ctrl_index = $ctrl_name + "-bamcontrol.bai" + #set $ctrl_index = $ctrl_name + "-bamcontrol.bai" #if $ctrl_file not in json.dumps($temp_factor): ln -s '${ctrl}' '${ctrl_file}' && ln -s '${ctrl.metadata.bam_index}' '${ctrl_index}' && @@ -94,13 +93,15 @@ -i '#echo json.dumps(temp_factor_names)#' -o '$outfile' + -m '$method' -t $th -f $out.format -p '$plots' - - #if $scorecol: - -n "$scorecol" + -O $minoverlap + #if $use_blacklist: + -B #end if + -n $scorecol #if $lowerbetter: -l "$lowerbetter" #end if @@ -138,8 +139,17 @@ <param name="bamreads" type="data" format="bam" multiple="true" label="Read BAM files" help="Specify the Read BAM files used in the Peak calling. The input order of the BAM files for the samples MUST match the input order of the peaks files."/> <param name="bamcontrol" type="data" format="bam" multiple="true" optional="True" label="Control BAM files" help="If specifying a control BAM file, all samples are required to specify one, see Help section below. The input order of the BAM files for the samples MUST match the input order of the peaks files."/> </repeat> - - <param name="scorecol" type="integer" min="0" value="8" label="Score Column" help="Column in peak files that contains peak scores. Default: 8 (narrowPeak)"> + <param name="method" type="select" label="Underlying method by which to analyze differential binding affinity"> + <option value="DBA_DESEQ2" selected="True">DESeq2</option> + <option value="DBA_EDGER">edgeR</option> + </param> + <param name="use_blacklist" type="boolean" truevalue="True" falsevalue="" checked="False" label="Filters peak intervals that overlap a blacklist from ENCODE" help="Works with human, mouse, worm and fly. Assembly version is determined from the BAM files." /> + <param name="minoverlap" type="integer" min="1" value="2" label="Only include peaks in at least this many peaksets in the main binding matrix"> + <sanitizer> + <valid initial="string.digits"/> + </sanitizer> + </param> + <param name="scorecol" type="integer" min="0" value="5" label="Score Column" help="Column in peak files that contains peak scores. Default: 5 (narrowPeak)"> <sanitizer> <valid initial="string.digits"/> </sanitizer> @@ -226,6 +236,28 @@ </assert_contents> </output> </test> + <!-- Ensure EDGER works --> + <test expect_num_outputs="3"> + <repeat name="rep_group"> + <param name="groupName" value="Resistant"/> + <param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/> + <param name="bamreads" ftype="bam" value="BT474_ER_1.bam,BT474_ER_2.bam" /> + </repeat> + <repeat name="rep_group"> + <param name="groupName" value="Responsive"/> + <param name="peaks" ftype="bed" value="MCF7_ER_1.bed.gz,MCF7_ER_2.bed.gz"/> + <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" /> + </repeat> + <param name="scorecol" value="5" /> + <param name="method" value="DBA_EDGER" /> + <param name="format" value="interval"/> + <param name="pdf" value="True" /> + <param name="binding_matrix" value="True" /> + <param name="rscript" value="False"/> + <output name="outfile" ftype="interval" value="out_diffbind_edger.interval" /> + <output name="plots" value="out_plots_edger.pdf" compare="sim_size" /> + <output name="binding_matrix" value="out_binding_matrix_edger.tab" /> + </test> <!-- Ensure control BAMs input works --> <test expect_num_outputs="1"> <repeat name="rep_group"> @@ -276,6 +308,40 @@ <param name="format" value="tabular"/> <output name="outfile" ftype="tabular" file="out_diffbind.tab" /> </test> + <!-- Ensure minoverlap works --> + <test expect_num_outputs="1"> + <repeat name="rep_group"> + <param name="groupName" value="Resistant"/> + <param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/> + <param name="bamreads" ftype="bam" value="BT474_ER_1.bam,BT474_ER_2.bam" /> + </repeat> + <repeat name="rep_group"> + <param name="groupName" value="Responsive"/> + <param name="peaks" ftype="bed" value="MCF7_ER_1.bed.gz,MCF7_ER_2.bed.gz"/> + <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" /> + </repeat> + <param name="minoverlap" value="1" /> + <param name="scorecol" value="5" /> + <param name="format" value="tabular"/> + <output name="outfile" ftype="tabular" file="out_diffbind_minoverlap1.tab" /> + </test> + <!-- Ensure blacklist filtering works --> + <test expect_num_outputs="1"> + <repeat name="rep_group"> + <param name="groupName" value="Resistant"/> + <param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/> + <param name="bamreads" ftype="bam" value="BT474_ER_1.bam,BT474_ER_2.bam" /> + </repeat> + <repeat name="rep_group"> + <param name="groupName" value="Responsive"/> + <param name="peaks" ftype="bed" value="MCF7_ER_1.bed.gz,MCF7_ER_2.bed.gz"/> + <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" /> + </repeat> + <param name="use_blacklist" value="True"/> + <param name="scorecol" value="5" /> + <param name="format" value="tabular"/> + <output name="outfile" ftype="tabular" file="out_diffbind_blacklist.tab" /> + </test> </tests> <help><