Mercurial > repos > bgruening > diffbind
changeset 11:4c7ab9995f9e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit cc4c1c4131518b9cbf986a1f252767ff73ca938e
author | iuc |
---|---|
date | Sat, 07 Apr 2018 15:45:41 -0400 |
parents | d7725c5596ab |
children | fa56d93f7980 |
files | diffbind.R diffbind.xml test-data/DiffBind_analysis.RData test-data/out_analysis_info.txt test-data/out_binding.matrix test-data/out_diffbind.bed test-data/out_plots.pdf test-data/out_rscript.txt |
diffstat | 8 files changed, 475 insertions(+), 186 deletions(-) [+] |
line wrap: on
line diff
--- a/diffbind.R Tue Mar 20 04:51:25 2018 -0400 +++ b/diffbind.R Sat Apr 07 15:45:41 2018 -0400 @@ -4,8 +4,9 @@ Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") suppressPackageStartupMessages({ - library('getopt') - library('DiffBind') + library('getopt') + library('DiffBind') + library('rjson') }) options(stringAsfactors = FALSE, useFancyQuotes = FALSE) @@ -14,15 +15,19 @@ #get options, using the spec as defined by the enclosed list. #we read the options from the default: commandArgs(TRUE). spec = matrix(c( - 'verbose', 'v', 2, "integer", - 'help' , 'h', 0, "logical", + 'infile' , 'i', 1, "character", 'outfile' , 'o', 1, "character", + 'scorecol', 'n', 1, "integer", + 'lowerbetter', 'l', 1, "logical", + 'summits', 's', 1, "integer", + 'th', 't', 1, "double", + 'format', 'f', 1, "character", 'plots' , 'p', 2, "character", - 'infile' , 'i', 1, "character", - 'format', 'f', 1, "character", - 'th', 't', 1, "double", 'bmatrix', 'b', 0, "logical", - "rdaOpt", "r", 0, "logical" + "rdaOpt", "r", 0, "logical", + 'infoOpt' , 'a', 0, "logical", + 'verbose', 'v', 2, "integer", + 'help' , 'h', 0, "logical" ), byrow=TRUE, ncol=4); opt = getopt(spec); @@ -34,31 +39,81 @@ q(status=1); } -if ( !is.null(opt$plots) ) { - pdf(opt$plots) +parser <- newJSONParser() +parser$addData(opt$infile) +factorList <- parser$getObject() +filenamesIn <- unname(unlist(factorList[[1]][[2]])) +peaks <- filenamesIn[grepl("peaks.bed", filenamesIn)] +bams <- filenamesIn[grepl("bamreads.bam", filenamesIn)] +ctrls <- filenamesIn[grepl("bamcontrol.bam", filenamesIn)] + +# get the group and sample id from the peaks filenames +groups <- sapply(strsplit(peaks,"-"), `[`, 1) +samples <- sapply(strsplit(peaks,"-"), `[`, 2) + +if ( length(ctrls) != 0 ) { + sampleTable <- data.frame(SampleID=samples, + Condition=groups, + bamReads=bams, + bamControl=ctrls, + Peaks=peaks, + Tissue=samples, # using "Tissue" column to display ids as labels in PCA plot + stringsAsFactors=FALSE) +} else { + sampleTable <- data.frame(SampleID=samples, + Replicate=samples, + Condition=groups, + bamReads=bams, + Peaks=peaks, + Tissue=samples, + stringsAsFactors=FALSE) } -sample = dba(sampleSheet=opt$infile, peakFormat='bed') -sample_count = dba.count(sample) +sample = dba(sampleSheet=sampleTable, peakFormat='bed', scoreCol=opt$scorecol, bLowerScoreBetter=opt$lowerbetter) + +if ( !is.null(opt$summits) ) { + sample_count = dba.count(sample, summits=opt$summits) +} else { + sample_count = dba.count(sample) +} + sample_contrast = dba.contrast(sample_count, categories=DBA_CONDITION, minMembers=2) sample_analyze = dba.analyze(sample_contrast) -diff_bind = dba.report(sample_analyze) -orvals = dba.plotHeatmap(sample_analyze, contrast=1, correlations=FALSE) -dev.off() +diff_bind = dba.report(sample_analyze, th=opt$th) +# Generate plots +if ( !is.null(opt$plots) ) { + pdf(opt$plots) + orvals = dba.plotHeatmap(sample_analyze, contrast=1, correlations=FALSE, cexCol=0.8, th=opt$th) + dba.plotPCA(sample_analyze, contrast=1, th=opt$th, label=DBA_TISSUE, labelSize=0.3) + dba.plotMA(sample_analyze, th=opt$th) + dba.plotVolcano(sample_analyze, th=opt$th) + dba.plotBox(sample_analyze, th=opt$th) + dev.off() +} + +# Output differential binding sites resSorted <- diff_bind[order(diff_bind$FDR),] -write.table(as.data.frame(resSorted), file = opt$outfile, sep="\t", quote = FALSE, append=TRUE, row.names = FALSE, col.names = FALSE) +write.table(as.data.frame(resSorted), file = opt$outfile, sep="\t", quote = FALSE, append=TRUE, row.names = FALSE) # Output binding affinity scores if (!is.null(opt$bmatrix)) { bmat <- dba.peakset(sample_count, bRetrieve=TRUE, DataType=DBA_DATA_FRAME) - write.table(as.data.frame(bmat), file="bmatrix.tab", sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE) + write.table(as.data.frame(bmat), file="bmatrix.tab", sep="\t", quote=FALSE, row.names=FALSE) } -## Output RData file - +# Output RData file if (!is.null(opt$rdaOpt)) { save.image(file = "DiffBind_analysis.RData") } -sessionInfo() +# Output analysis info +if (!is.null(opt$infoOpt)) { + info <- "DiffBind_analysis_info.txt" + cat("dba.count Info\n\n", file=info, append = TRUE) + capture.output(sample, file=info, append=TRUE) + cat("\ndba.analyze Info\n\n", file=info, append = TRUE) + capture.output(sample_analyze, file=info, append=TRUE) + cat("\nSessionInfo\n\n", file=info, append = TRUE) + capture.output(sessionInfo(), file=info, append=TRUE) +} \ No newline at end of file
--- a/diffbind.xml Tue Mar 20 04:51:25 2018 -0400 +++ b/diffbind.xml Sat Apr 07 15:45:41 2018 -0400 @@ -1,8 +1,9 @@ -<tool id="diffbind" name="DiffBind" version="2.6.6.0"> +<tool id="diffbind" name="DiffBind" version="2.6.6.1"> <description> differential binding analysis of ChIP-Seq peak data</description> <requirements> <requirement type="package" version="2.6.6">bioconductor-diffbind</requirement> <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="0.2.15">r-rjson</requirement> </requirements> <stdio> <regex match="Execution halted" @@ -19,69 +20,112 @@ description="An undefined error occured, please check your intput carefully and contact your administrator." /> </stdio> <version_command><![CDATA[ -echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ") +echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rjson version" $(R --vanilla --slave -e "library(rjson); cat(sessionInfo()\$otherPkgs\$rjson\$Version)" 2> /dev/null | grep -v -i "WARNING: ") ]]></version_command> <command><![CDATA[ - ## seems that diffbind also needs file extensions to work properly - #set $counter = 1 - #for $sample in $samples: - ln -s $sample.bamreads #echo str($counter) + "_bamreads.bam"# && - ln -s ${sample.bamreads.metadata.bam_index} #echo str($counter) + "_bamreads.bai"# && - #if str( $sample.bamcontrol ) != 'None': - ln -s $sample.bamcontrol #echo str($counter) + "_bamcontrol.bam"# && - ln -s ${sample.bamcontrol.metadata.bam_index} #echo str($counter) + "_bamcontrol.bai"# && - #end if - #set $counter = $counter + 1 +#import re +#import json + +## Adapted from DESeq2 wrapper +#set $temp_factor_names = list() +#set $temp_factor = list() + +#for $g in $rep_group: + + #set $peak_files = list() + #set $bam_files = list() + #set $bam_controls = list() + + #for $file in $g.peaks: + #set $file_name = re.sub('[^\w\-\s]', '_', str($file.element_identifier)) + ln -s '${file}' ${g.groupName}-${file_name}-peaks.bed && + $peak_files.append(str($g.groupName) + '-' + $file_name + '-peaks.bed') + #end for + + #for $bam in $g.bamreads: + #set $bam_name = re.sub('[^\w\-\s]', '_', str($bam.element_identifier)) + ln -s '${bam}' ${bam_name}-bamreads.bam && + ln -s ${bam.metadata.bam_index} ${bam_name}-bamreads.bai && + $bam_files.append($bam_name + '-bamreads.bam') + #end for + + $temp_factor.append( {str($g.groupName): $peak_files} ) + $temp_factor.append( {str($g.groupName): $bam_files} ) + + #if str( $g.bamcontrol ) != 'None': + #for $ctrl in $g.bamcontrol: + #set $ctrl_name = re.sub('[^\w\-\s]', '_', str($ctrl.element_identifier)) + ln -s '${ctrl}' ${g.groupName}-${ctrl_name}-bamcontrol.bam && + ln -s ${ctrl.metadata.bam_index} ${g.groupName}-${ctrl_name}-bamcontrol.bai && + $bam_controls.append(str($g.groupName) + '-' + $ctrl_name + '-bamcontrol.bam') #end for + $temp_factor.append( {str($g.groupName): $bam_controls} ) + #end if - Rscript '$__tool_directory__/diffbind.R' - -i $infile - -o '$outfile' - -t $th - -f $out.format - -p '$plots' +#end for + +$temp_factor.reverse() +$temp_factor_names.append([str($factorName), $temp_factor]) + + +Rscript '$__tool_directory__/diffbind.R' + + -i '#echo json.dumps(temp_factor_names)#' + -o '$outfile' + -t $th + -f $out.format + -p '$plots' - #if $out.binding_matrix: - -b - #end if + #if $scorecol: + -n "$scorecol" + #end if + #if $lowerbetter: + -l "$lowerbetter" + #end if + #if $summits: + -s "$summits" + #end if - #if $out.rdata: - -r - #end if + #if $out.binding_matrix: + -b + #end if + + #if $out.rdata: + -r + #end if + + #if $out.analysis_info: + -a + #end if + + #if $out.rscript: + && cp '$__tool_directory__/diffbind.R' '$rscript' + #end if ]]> </command> - <configfiles> -<configfile name="infile"><![CDATA[ -#set $counter = 1 -#for $sample in $samples: -#if str( $sample.bamcontrol ) != 'None' and $counter == 1: -SampleID,Tissue,Factor,Condition,Replicate,bamReads,bamControl,Peaks -#elif $counter == 1: -SampleID,Tissue,Factor,Condition,Replicate,bamReads,Peaks -#end if -#if str( $sample.bamcontrol ) != 'None': -$sample.sample_id,$sample.tissue,$sample.factor,$sample.condition,$sample.replicate,#echo str($counter) + '_bamreads.bam'#,#echo str($counter) + '_bamcontrol.bam'#,$sample.peaks -#else: -$sample.sample_id,$sample.tissue,$sample.factor,$sample.condition,$sample.replicate,#echo str($counter) + '_bamreads.bam'#,$sample.peaks -#end if -#set $counter = $counter + 1 -#end for]]></configfile> - </configfiles> <inputs> - <repeat name="samples" title="Samples" min="4"> - <param name="sample_id" type="text" value="Sample ID" label="Specify a sample id" help="e.g. BT474.1-" /> - <param name="tissue" type="text" value="Tissue" label="Specify the tissue" help="e.g. BT474" /> - <param name="factor" type="text" value="Factor Name" label="Specify a factor name" help="e.g. ER" /> - <param name="condition" type="text" value="Condition" label="Specify the condition" help="e.g. Resistent" /> - <param name="replicate" type="integer" value="1" label="Specify the replicate number" help="e.g. 1" /> - <param name="bamreads" type="data" format="bam" label="Read BAM file" help="Specify the Read BAM file, used for Peak calling."/> - <param name="bamcontrol" type="data" format="bam" optional="True" label="Control BAM file" help="If specifying a control BAM file for this sample, then all samples are required to specify one."/> - <param name="peaks" type="data" format="bed" label="Peak file" help="Result of your Peak calling experiment."/> + <param name="factorName" type="text" label="Name" help="Name of experiment factor of interest (e.g. Condition). One factor must be entered and there must be two or more groups. NOTE: Please only use letters, numbers or underscores."> + <sanitizer> + <valid initial="string.letters,string.digits"><add value="_" /></valid> + </sanitizer> + </param> + <repeat name="rep_group" title="Group" min="2" default="2"> + <param name="groupName" type="text" label="Name" + help="Name of group that the peak files belong to (e.g. Resistant or Responsive). NOTE: Please only use letters, numbers or underscores (case sensitive)."> + <sanitizer> + <valid initial="string.letters,string.digits"><add value="_" /></valid> + </sanitizer> + </param> + <param name="peaks" type="data" format="bed" multiple="true" label="Peak files" help="Result of your Peak calling experiment"/> + <param name="bamreads" type="data" format="bam" multiple="true" label="Read BAM file" help="Specify the Read BAM file used for Peak calling."/> + <param name="bamcontrol" type="data" format="bam" multiple="true" optional="True" label="Control BAM file" help="If specifying a control BAM file, all samples are required to specify one."/> </repeat> - <param name="th" type="float" value="1" min="0" max="1" - label="FDR Threshold" - help="Significance threshold; all sites with FDR less than or equal to this value will be included in the report. A value of 1 will include all binding sites in the report. Default: 1"/> - + + <param name="scorecol" type="integer" min="0" value="8" label="Score Column" help="Column in peak files that contains peak scores. Default: 8 (narrowPeak)"/> + <param name="lowerbetter" type="boolean" truevalue="True" falsevalue="" checked="False" label="Lower score is better?" help="DiffBind by default assumes that a higher score indicates a better peak, for example narrowPeaks -log10pvalue. If this is not the case, for example if the score is a p-value or FDR, set this option to Yes. Default: No" /> + <param name="summits" type="integer" min="0" optional="True" label="Summits" help="Extend peaks Nbp up- and downstream of the summit. For punctate peaks it is advisable to extend (e.g. 250bp), see the DiffBind User Guide"/> + <param name="th" type="float" value="0.05" min="0" max="1" label="FDR Threshold" help="Significance threshold; all sites with FDR less than or equal to this value will be included in the output. A value of 1 will output all binding sites. Default: 0.05"/> + <!-- Output Options --> <section name="out" expanded="false" title="Output Options"> <param name="format" type="select" label="Output Format"> @@ -91,8 +135,9 @@ </param> <param name="pdf" type="boolean" truevalue="True" falsevalue="" checked="False" label="Visualising the analysis results" help="output an additional PDF file" /> <param name="binding_matrix" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output binding affinity matrix?" help="Output a table of the binding scores" /> - <param name="rdata" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output RData file?" help="Output all the data used by R to construct the plots and tables, can be loaded into R. Default: No"> - </param> + <param name="rdata" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output RData file?" help="Output all the data used by R to construct the plots and tables, can be loaded into R. Default: No"/> + <param name="rscript" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used will be provided as a text file in the output. Default: No"/> + <param name="analysis_info" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output analysis info?" help="If this option is set to Yes, information from the dba.count and dba.analyze commmands will be output in a text file. Default: No"/> </section> </inputs> @@ -112,53 +157,43 @@ <data name="rdata" format="rdata" from_work_dir="DiffBind_analysis.RData" label="${tool.name} on ${on_string}: RData file"> <filter>out['rdata']</filter> </data> + <data name="rscript" format="txt" label="${tool.name} on ${on_string}: Rscript"> + <filter>out['rscript']</filter> + </data> + <data name="analysis_info" format="txt" from_work_dir="DiffBind_analysis_info.txt" label="${tool.name} on ${on_string}: Analysis info"> + <filter>out['analysis_info']</filter> + </data> </outputs> <tests> - <test expect_num_outputs="4"> - <repeat name="samples"> - <param name="sample_id" value="BT4741" /> - <param name="tissue" value="BT474" /> - <param name="factor" value="ER" /> - <param name="condition" value="Resistant" /> - <param name="replicate" value="1" /> - <param name="bamreads" ftype="bam" value="BT474_ER_1.bam" /> - <param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz" /> - </repeat> - <repeat name="samples"> - <param name="sample_id" value="BT4742" /> - <param name="tissue" value="BT474" /> - <param name="factor" value="ER" /> - <param name="condition" value="Resistant" /> - <param name="replicate" value="2" /> - <param name="bamreads" ftype="bam" value="BT474_ER_2.bam" /> - <param name="peaks" ftype="bed" value="BT474_ER_2.bed.gz" /> + <test expect_num_outputs="6"> + <param name="factorName" value="Condition"/> + <repeat name="rep_group"> + <param name="groupName" value="Resistant"/> + <param name="peaks" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/> + <param name="bamreads" ftype="bam" value="BT474_ER_1.bam,BT474_ER_2.bam" /> </repeat> - <repeat name="samples"> - <param name="sample_id" value="MCF71" /> - <param name="tissue" value="MCF7" /> - <param name="factor" value="ER" /> - <param name="condition" value="Responsive" /> - <param name="replicate" value="1" /> - <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam" /> - <param name="peaks" ftype="bed" value="MCF7_ER_1.bed.gz" /> + <repeat name="rep_group"> + <param name="groupName" value="Responsive"/> + <param name="peaks" value="MCF7_ER_1.bed.gz,MCF7_ER_2.bed.gz"/> + <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" /> </repeat> - <repeat name="samples"> - <param name="sample_id" value="MCF72" /> - <param name="tissue" value="MCF7" /> - <param name="factor" value="ER" /> - <param name="condition" value="Responsive" /> - <param name="replicate" value="2" /> - <param name="bamreads" ftype="bam" value="MCF7_ER_2.bam" /> - <param name="peaks" ftype="bed" value="MCF7_ER_2.bed.gz" /> - </repeat> + <param name="scorecol" value="5" /> <param name="pdf" value="True" /> <param name="binding_matrix" value="True" /> <param name="rdata" value="True" /> + <param name="rscript" value="True"/> + <param name="analysis_info" value="True"/> <output name="outfile" value="out_diffbind.bed" /> <output name="plots" value="out_plots.pdf" compare="sim_size" /> <output name="binding_matrix" value="out_binding.matrix" /> <output name="rdata" value="DiffBind_analysis.RData" compare="sim_size"/> + <output name="rscript" value="out_rscript.txt"/> + <output name="analysis_info" value="out_analysis_info.txt" compare="sim_size" > + <assert_contents> + <has_text text="SessionInfo"/> + </assert_contents> + </output> </test> </tests> <help><![CDATA[ @@ -190,11 +225,7 @@ as well as comparing the results of an occupancy-based analysis with an affinity-based one. Finally, certain technical aspects of the how these analyses are accomplished are detailed. -Note DiffBind requires a minimum of four samples (two groups with two replicates each). - -.. _DiffBind: https://bioconductor.org/packages/release/bioc/html/DiffBind.html -.. _`Bioconductor package`: https://bioconductor.org/packages/release/bioc/html/DiffBind.html -.. _`DiffBind User Guide`: https://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf +Note this DiffBind tool requires a minimum of four samples (two groups with two replicates each). ----- @@ -210,36 +241,29 @@ **Sample Information** -You have to specify your sample information in the tool form above, where Condition contains the groups you want to compare. +You have to specify your sample information in the tool form above, where Factor is the groups you want to compare (e.g Resistant and Responsive). Example: - ============= ========== ========== ============= ============= - **SampleID** **Tissue** **Factor** **Condition** **Replicate** - ------------- ---------- ---------- ------------- ------------- - BT4741 BT474 ER Resistant 1 - BT4742 BT474 ER Resistant 2 - MCF71 MCF7 ER Responsive 1 - MCF72 MCF7 ER Responsive 2 - MCF73 MCF7 ER Responsive 3 - T47D1 T47D ER Responsive 1 - T47D2 T47D ER Responsive 2 - MCF7r1 MCF7 ER Resistant 1 - MCF7r2 MCF7 ER Resistant 2 - ZR751 ZR75 ER Responsive 1 - ZR752 ZR75 ER Responsive 2 - ============= ========== ========== ============= ============= + ============= ============= + **SampleID** **Group** + ------------- ------------- + BT4741 Resistant + BT4742 Resistant + MCF71 Responsive + MCF72 Responsive + ============= ============= **Peak files** -Result of your Peak calling experiment in bed format, one file for each sample is required. +Result of your Peak calling experiment in bed format, one file for each sample is required. The peak caller, format and score column can be specified in the tool form above. The default settings expect narrowPeak bed format, which has the score in the 8th column (-log10pvalue), and can be output from MACS2. -Example: +Example (MACS.xls file in bed format): - ======= ======= ======= =============== ======= - 1 2 3 4 **5** - ======= ======= ======= =============== ======= + ======= ======= ======= =============== ============== + 1 2 3 4 **5 (Score)** + ======= ======= ======= =============== ============== chr18 215562 216063 MACS_peak_16037 56.11 chr18 311530 312105 MACS_peak_16038 222.49 chr18 356656 357315 MACS_peak_16039 92.06 @@ -250,10 +274,10 @@ chr18 503518 504552 MACS_peak_16044 818.30 chr18 531672 532274 MACS_peak_16045 159.30 chr18 568326 569282 MACS_peak_16046 601.11 - ======= ======= ======= =============== ======= + ======= ======= ======= =============== ============== -* BAM file which contains the mapped sequencing reads can be associated with each peakset -* Control BAM file represents a control dataset and are optional, but have to specified for all when used. +* BAM file which contains the mapped sequencing reads associated with each peakset, one file for each sample is required. +* Optional: Control BAM file representing a control dataset. If used, has to be specified for all samples. Note that the DiffBind authors say control reads are best utilized prior to running DiffBind, at the peak calling stage (e.g. with MACS2) and in blacklists, see this `Bioconductor post`_. ----- @@ -265,9 +289,11 @@ Optionally, under **Output Options** you can choose to output - * a correlation heatmap plot + * a PDF of plots (Heatmap, PCA, MA, Volcano, Boxplots) * a binding affinity matrix - * an RData file + * the R script used by this tool + * an RData file of the R objects generated + * a text file with information on the analysis (number of Intervals, FriP scores, method used) **Differentially Bound Sites** @@ -275,15 +301,15 @@ Example - BED format: - ===== ====== ====== ===== ==== ==== ==== ==== ===== ======== ======== - 1 2 3 4 5 6 7 8 9 10 **11** - ===== ====== ====== ===== ==== ==== ==== ==== ===== ======== ======== - chr18 394600 396513 1914 * 7.15 7.89 5.55 2.35 7.06e-24 9.84e-21 - chr18 111567 112005 439 * 5.71 3.63 6.53 -2.89 1.27e-08 8.88e-06 - chr18 346464 347342 879 * 5 3.24 5.77 -2.52 6.51e-06 0.00303 - chr18 399014 400382 1369 * 7.62 8.05 7 1.04 1.04e-05 0.00364 - chr18 371110 372102 993 * 4.63 5.36 3.07 2.3 8.1e-05 0.0226 - ===== ====== ====== ===== ==== ==== ==== ==== ===== ======== ======== + ======== ====== ====== ===== ====== ===== =============== ============== ======= ======== ======== + seqnames start end width strand Conc Conc_Responsive Conc_Resistant Fold p.value **FDR** + ======== ====== ====== ===== ====== ===== =============== ============== ======= ======== ======== + chr18 394600 396513 1914 * 7.15 5.55 7.89 -2.35 7.06e-24 9.84e-21 + chr18 111567 112005 439 * 5.71 6.53 3.63 2.89 1.27e-08 8.88e-06 + chr18 346464 347342 879 * 5 5.77 3.24 2.52 6.51e-06 0.00303 + chr18 399014 400382 1369 * 7.62 7 8.05 -1.04 1.04e-05 0.00364 + chr18 371110 372102 993 * 4.63 3.07 5.36 -2.3 8.1e-05 0.0226 + ======== ====== ====== ===== ====== ===== =============== ============== ======= ======== ======== Columns contain the following data: @@ -307,21 +333,20 @@ Example: - ====== ====== ====== ========== ========== ========= ====== ========= ==== - ID Tissue Factor Condition Treatment Replicate Caller Intervals FRiP - ====== ====== ====== ========== ========== ========= ====== ========= ==== - BT4741 BT474 ER Resistant Full-Media 1 counts 2845 0.16 - BT4742 BT474 ER Resistant Full-Media 2 counts 2845 0.15 - MCF71 MCF7 ER Responsive Full-Media 1 counts 2845 0.27 - MCF72 MCF7 ER Responsive Full-Media 2 counts 2845 0.17 - MCF73 MCF7 ER Responsive Full-Media 3 counts 2845 0.23 - T47D1 T47D ER Responsive Full-Media 1 counts 2845 0.10 - T47D2 T47D ER Responsive Full-Media 2 counts 2845 0.06 - MCF7r1 MCF7 ER Resistant Full-Media 1 counts 2845 0.20 - MCF7r2 MCF7 ER Resistant Full-Media 2 counts 2845 0.13 - ZR751 ZR75 ER Responsive Full-Media 1 counts 2845 0.32 - ZR752 ZR75 ER Responsive Full-Media 2 counts 2845 0.22 - ====== ====== ====== ========== ========== ========= ====== ========= ==== + ===== ====== ====== ================ ================ ================ ================ + CHR START END MCF7_ER_1.bed MCF7_ER_2.bed BT474_ER_1.bed BT474_ER_2.bed + ===== ====== ====== ================ ================ ================ ================ + chr18 111567 112005 137.615208000375 59.878372946728 29.4139375878664 19.9594576489093 + chr18 189223 189652 19.9594576489093 12.6059732519427 11.5554754809475 23.110950961895 + chr18 215232 216063 11.5554754809475 15.7574665649284 31.5149331298568 72.4843461986707 + chr18 311530 312172 17.8584621069189 11.5554754809475 54.6258840917518 43.0704086108043 + chr18 346464 347342 75.6358395116564 40.9694130688139 21.0099554199046 16.8079643359236 + chr18 356560 357362 11.5554754809475 14.7069687939332 57.7773774047375 53.5753863207566 + chr18 371110 372102 8.40398216796182 9.45447993895705 81.9388261376278 82.989323908623 + chr18 394600 396513 56.7268796337423 43.0704086108043 510.541916703681 438.05757050501 + chr18 399014 400382 156.524167878289 117.655750351465 558.864814169461 496.885445680743 + chr18 498906 500200 767.913870597511 278.381909313735 196.443083176108 181.736114382174 + ===== ====== ====== ================ ================ ================ ================ ----- @@ -392,7 +417,7 @@ of reads within differentially bound sites corresponding to whether they gain or lose affinity between the two sample groups. A reporting mechanism enables differentially bound sites to be extracted for further processing, such as annotation, motif, and -pathway analyses. *Note that currently only the correlation plot is implemented in this Galaxy tool.* +pathway analyses. ----- @@ -401,6 +426,11 @@ DiffBind Authors: Rory Stark, Gordon Brown (2011) Wrapper authors: Bjoern Gruening, Pavankumar Videm +.. _DiffBind: https://bioconductor.org/packages/release/bioc/html/DiffBind.html +.. _`Bioconductor package`: https://bioconductor.org/packages/release/bioc/html/DiffBind.html +.. _`DiffBind User Guide`: https://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf +.. _`Bioconductor post`: https://support.bioconductor.org/p/69924/ + ]]> </help> <citations>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out_analysis_info.txt Sat Apr 07 15:45:41 2018 -0400 @@ -0,0 +1,83 @@ +dba.count Info + +4 Samples, 1394 sites in matrix (2141 total): + ID Tissue Condition Caller Intervals +1 MCF7_ER_1_bed MCF7_ER_1_bed Responsive macs 1556 +2 MCF7_ER_2_bed MCF7_ER_2_bed Responsive macs 1046 +3 BT474_ER_1_bed BT474_ER_1_bed Resistant macs 1080 +4 BT474_ER_2_bed BT474_ER_2_bed Resistant macs 1122 + +dba.analyze Info + +4 Samples, 1394 sites in matrix: + ID Tissue Condition Caller Intervals FRiP +1 MCF7_ER_1_bed MCF7_ER_1_bed Responsive counts 1394 0.38 +2 MCF7_ER_2_bed MCF7_ER_2_bed Responsive counts 1394 0.22 +3 BT474_ER_1_bed BT474_ER_1_bed Resistant counts 1394 0.27 +4 BT474_ER_2_bed BT474_ER_2_bed Resistant counts 1394 0.25 + +1 Contrast: + Group1 Members1 Group2 Members2 DB.DESeq2 +1 Responsive 2 Resistant 2 5 + +SessionInfo + +R version 3.4.1 (2017-06-30) +Platform: x86_64-apple-darwin14.5.0 (64-bit) +Running under: OS X El Capitan 10.11.6 + +Matrix products: default +BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib +LAPACK: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libLAPACK.dylib + +locale: +[1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_US.UTF-8 + +attached base packages: +[1] parallel stats4 methods stats graphics grDevices utils +[8] datasets base + +other attached packages: + [1] bindrcpp_0.2 rjson_0.2.15 + [3] DiffBind_2.6.6 SummarizedExperiment_1.8.0 + [5] DelayedArray_0.4.1 matrixStats_0.52.2 + [7] Biobase_2.38.0 GenomicRanges_1.30.3 + [9] GenomeInfoDb_1.14.0 IRanges_2.12.0 +[11] S4Vectors_0.16.0 BiocGenerics_0.24.0 +[13] getopt_1.20.0 + +loaded via a namespace (and not attached): + [1] Category_2.44.0 bitops_1.0-6 bit64_0.9-5 + [4] RColorBrewer_1.1-2 progress_1.1.2 httr_1.3.1 + [7] Rgraphviz_2.22.0 tools_3.4.1 backports_1.0.5 + [10] R6_2.2.2 rpart_4.1-13 KernSmooth_2.23-15 + [13] Hmisc_4.0-3 DBI_0.8 lazyeval_0.2.1 + [16] colorspace_1.3-2 nnet_7.3-12 gridExtra_2.3 + [19] DESeq2_1.18.1 prettyunits_1.0.2 bit_1.1-12 + [22] compiler_3.4.1 sendmailR_1.2-1 graph_1.56.0 + [25] htmlTable_1.9 labeling_0.3 rtracklayer_1.38.0 + [28] caTools_1.17.1 scales_0.5.0 checkmate_1.8.2 + [31] BatchJobs_1.6 genefilter_1.60.0 RBGL_1.54.0 + [34] stringr_1.3.0 digest_0.6.12 Rsamtools_1.30.0 + [37] foreign_0.8-67 AnnotationForge_1.20.0 XVector_0.18.0 + [40] htmltools_0.3.6 base64enc_0.1-3 pkgconfig_2.0.1 + [43] limma_3.34.6 htmlwidgets_1.0 rlang_0.2.0 + [46] RSQLite_2.0 BBmisc_1.11 bindr_0.1.1 + [49] GOstats_2.44.0 hwriter_1.3.2 BiocParallel_1.12.0 + [52] gtools_3.5.0 acepack_1.4.1 dplyr_0.7.4 + [55] RCurl_1.95-4.8 magrittr_1.5 Formula_1.2-1 + [58] GO.db_3.5.0 GenomeInfoDbData_0.99.1 Matrix_1.2-12 + [61] Rcpp_0.12.15 munsell_0.4.3 stringi_1.1.6 + [64] edgeR_3.20.7 zlibbioc_1.24.0 gplots_3.0.1 + [67] fail_1.3 plyr_1.8.4 grid_3.4.1 + [70] blob_1.1.1 ggrepel_0.7.0 gdata_2.18.0 + [73] lattice_0.20-34 Biostrings_2.46.0 splines_3.4.1 + [76] GenomicFeatures_1.28.5 annotate_1.56.0 locfit_1.5-9.1 + [79] knitr_1.20 pillar_1.2.1 systemPipeR_1.12.0 + [82] geneplotter_1.56.0 biomaRt_2.34.2 glue_1.2.0 + [85] XML_3.98-1.6 ShortRead_1.36.0 latticeExtra_0.6-28 + [88] data.table_1.10.4 gtable_0.2.0 amap_0.8-14 + [91] assertthat_0.2.0 ggplot2_2.2.1 xtable_1.8-2 + [94] survival_2.40-1 tibble_1.4.2 pheatmap_1.0.8 + [97] GenomicAlignments_1.14.1 AnnotationDbi_1.40.0 memoise_1.1.0 +[100] cluster_2.0.6 brew_1.0-6 GSEABase_1.40.0
--- a/test-data/out_binding.matrix Tue Mar 20 04:51:25 2018 -0400 +++ b/test-data/out_binding.matrix Sat Apr 07 15:45:41 2018 -0400 @@ -1,17 +1,18 @@ -chr18 111567 112005 29.4139375878664 19.9594576489093 137.615208000375 59.878372946728 -chr18 189223 189652 11.5554754809475 23.110950961895 19.9594576489093 12.6059732519427 -chr18 215232 216063 31.5149331298568 72.4843461986707 11.5554754809475 15.7574665649284 -chr18 311530 312172 54.6258840917518 43.0704086108043 17.8584621069189 11.5554754809475 -chr18 346464 347342 21.0099554199046 16.8079643359236 75.6358395116564 40.9694130688139 -chr18 356560 357362 57.7773774047375 53.5753863207566 11.5554754809475 14.7069687939332 -chr18 371110 372102 81.9388261376278 82.989323908623 8.40398216796182 9.45447993895705 -chr18 394600 396513 510.541916703681 438.05757050501 56.7268796337423 43.0704086108043 -chr18 399014 400382 558.864814169461 496.885445680743 156.524167878289 117.655750351465 -chr18 498906 500200 196.443083176108 181.736114382174 767.913870597511 278.381909313735 -chr18 503518 504552 194.342087634117 231.10950961895 99.7972882445466 61.9793684887184 -chr18 531672 532439 57.7773774047375 73.5348439696659 44.1209063817996 23.110950961895 -chr18 568036 569332 207.998558657055 221.655029679993 165.978647817246 157.574665649284 -chr18 589046 589875 117.655750351466 149.170683481322 50.4238930077709 72.4843461986707 +CHR START END MCF7_ER_1_bed MCF7_ER_2_bed BT474_ER_1_bed BT474_ER_2_bed +chr18 111567 112005 137.615208000375 59.878372946728 29.4139375878664 19.9594576489093 +chr18 189223 189652 19.9594576489093 12.6059732519427 11.5554754809475 23.110950961895 +chr18 215232 216063 11.5554754809475 15.7574665649284 31.5149331298568 72.4843461986707 +chr18 311530 312172 17.8584621069189 11.5554754809475 54.6258840917518 43.0704086108043 +chr18 346464 347342 75.6358395116564 40.9694130688139 21.0099554199046 16.8079643359236 +chr18 356560 357362 11.5554754809475 14.7069687939332 57.7773774047375 53.5753863207566 +chr18 371110 372102 8.40398216796182 9.45447993895705 81.9388261376278 82.989323908623 +chr18 394600 396513 56.7268796337423 43.0704086108043 510.541916703681 438.05757050501 +chr18 399014 400382 156.524167878289 117.655750351465 558.864814169461 496.885445680743 +chr18 498906 500200 767.913870597511 278.381909313735 196.443083176108 181.736114382174 +chr18 503518 504552 99.7972882445466 61.9793684887184 194.342087634117 231.10950961895 +chr18 531672 532439 44.1209063817996 23.110950961895 57.7773774047375 73.5348439696659 +chr18 568036 569332 165.978647817246 157.574665649284 207.998558657055 221.655029679993 +chr18 589046 589875 50.4238930077709 72.4843461986707 117.655750351466 149.170683481322 chr18 651923 652540 1.05049777099523 1.05049777099523 1.05049777099523 1.05049777099523 chr18 657092 657876 1.05049777099523 1.05049777099523 1.05049777099523 1.05049777099523 chr18 770235 770992 1.05049777099523 1.05049777099523 1.05049777099523 1.05049777099523
--- a/test-data/out_diffbind.bed Tue Mar 20 04:51:25 2018 -0400 +++ b/test-data/out_diffbind.bed Sat Apr 07 15:45:41 2018 -0400 @@ -1,5 +1,6 @@ -chr18 394600 396513 1914 * 7.15 7.89 5.55 2.35 7.06e-24 9.84e-21 -chr18 111567 112005 439 * 5.71 3.63 6.53 -2.89 1.27e-08 8.88e-06 -chr18 346464 347342 879 * 5 3.24 5.77 -2.52 6.51e-06 0.00303 -chr18 399014 400382 1369 * 7.62 8.05 7 1.04 1.04e-05 0.00364 -chr18 371110 372102 993 * 4.63 5.36 3.07 2.3 8.1e-05 0.0226 +seqnames start end width strand Conc Conc_Responsive Conc_Resistant Fold p.value FDR +chr18 394600 396513 1914 * 7.15 5.55 7.89 -2.35 7.06e-24 9.84e-21 +chr18 111567 112005 439 * 5.71 6.53 3.63 2.89 1.27e-08 8.88e-06 +chr18 346464 347342 879 * 5 5.77 3.24 2.52 6.51e-06 0.00303 +chr18 399014 400382 1369 * 7.62 7 8.05 -1.04 1.04e-05 0.00364 +chr18 371110 372102 993 * 4.63 3.07 5.36 -2.3 8.1e-05 0.0226
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out_rscript.txt Sat Apr 07 15:45:41 2018 -0400 @@ -0,0 +1,119 @@ +## Setup R error handling to go to stderr +options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) +# we need that to not crash galaxy with an UTF8 error on German LC settings. +Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +suppressPackageStartupMessages({ + library('getopt') + library('DiffBind') + library('rjson') +}) + +options(stringAsfactors = FALSE, useFancyQuotes = FALSE) +args <- commandArgs(trailingOnly = TRUE) + +#get options, using the spec as defined by the enclosed list. +#we read the options from the default: commandArgs(TRUE). +spec = matrix(c( + 'infile' , 'i', 1, "character", + 'outfile' , 'o', 1, "character", + 'scorecol', 'n', 1, "integer", + 'lowerbetter', 'l', 1, "logical", + 'summits', 's', 1, "integer", + 'th', 't', 1, "double", + 'format', 'f', 1, "character", + 'plots' , 'p', 2, "character", + 'bmatrix', 'b', 0, "logical", + "rdaOpt", "r", 0, "logical", + 'infoOpt' , 'a', 0, "logical", + 'verbose', 'v', 2, "integer", + 'help' , 'h', 0, "logical" +), byrow=TRUE, ncol=4); + +opt = getopt(spec); + +# if help was asked for print a friendly message +# and exit with a non-zero error code +if ( !is.null(opt$help) ) { + cat(getopt(spec, usage=TRUE)); + q(status=1); +} + +parser <- newJSONParser() +parser$addData(opt$infile) +factorList <- parser$getObject() +filenamesIn <- unname(unlist(factorList[[1]][[2]])) +peaks <- filenamesIn[grepl("peaks.bed", filenamesIn)] +bams <- filenamesIn[grepl("bamreads.bam", filenamesIn)] +ctrls <- filenamesIn[grepl("bamcontrol.bam", filenamesIn)] + +# get the group and sample id from the peaks filenames +groups <- sapply(strsplit(peaks,"-"), `[`, 1) +samples <- sapply(strsplit(peaks,"-"), `[`, 2) + +if ( length(ctrls) != 0 ) { + sampleTable <- data.frame(SampleID=samples, + Condition=groups, + bamReads=bams, + bamControl=ctrls, + Peaks=peaks, + Tissue=samples, # using "Tissue" column to display ids as labels in PCA plot + stringsAsFactors=FALSE) +} else { + sampleTable <- data.frame(SampleID=samples, + Replicate=samples, + Condition=groups, + bamReads=bams, + Peaks=peaks, + Tissue=samples, + stringsAsFactors=FALSE) +} + +sample = dba(sampleSheet=sampleTable, peakFormat='bed', scoreCol=opt$scorecol, bLowerScoreBetter=opt$lowerbetter) + +if ( !is.null(opt$summits) ) { + sample_count = dba.count(sample, summits=opt$summits) +} else { + sample_count = dba.count(sample) +} + +sample_contrast = dba.contrast(sample_count, categories=DBA_CONDITION, minMembers=2) +sample_analyze = dba.analyze(sample_contrast) +diff_bind = dba.report(sample_analyze, th=opt$th) + +# Generate plots +if ( !is.null(opt$plots) ) { + pdf(opt$plots) + orvals = dba.plotHeatmap(sample_analyze, contrast=1, correlations=FALSE, cexCol=0.8, th=opt$th) + dba.plotPCA(sample_analyze, contrast=1, th=opt$th, label=DBA_TISSUE, labelSize=0.3) + dba.plotMA(sample_analyze, th=opt$th) + dba.plotVolcano(sample_analyze, th=opt$th) + dba.plotBox(sample_analyze, th=opt$th) + dev.off() +} + +# Output differential binding sites +resSorted <- diff_bind[order(diff_bind$FDR),] +write.table(as.data.frame(resSorted), file = opt$outfile, sep="\t", quote = FALSE, append=TRUE, row.names = FALSE) + +# Output binding affinity scores +if (!is.null(opt$bmatrix)) { + bmat <- dba.peakset(sample_count, bRetrieve=TRUE, DataType=DBA_DATA_FRAME) + write.table(as.data.frame(bmat), file="bmatrix.tab", sep="\t", quote=FALSE, row.names=FALSE) +} + +# Output RData file +if (!is.null(opt$rdaOpt)) { + save.image(file = "DiffBind_analysis.RData") +} + +# Output analysis info +if (!is.null(opt$infoOpt)) { + info <- "DiffBind_analysis_info.txt" + cat("dba.count Info\n\n", file=info, append = TRUE) + capture.output(sample, file=info, append=TRUE) + cat("\ndba.analyze Info\n\n", file=info, append = TRUE) + capture.output(sample_analyze, file=info, append=TRUE) + cat("\nSessionInfo\n\n", file=info, append = TRUE) + capture.output(sessionInfo(), file=info, append=TRUE) +} \ No newline at end of file