comparison diffbind.xml @ 10:d7725c5596ab draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit f970dcbe9d0e4c3714b1db74c404ea34223cf8ed
author iuc
date Tue, 20 Mar 2018 04:51:25 -0400
parents 6171163112de
children 4c7ab9995f9e
comparison
equal deleted inserted replaced
9:6171163112de 10:d7725c5596ab
1 <tool id="diffbind" name="DiffBind" version="2.6.5.0"> 1 <tool id="diffbind" name="DiffBind" version="2.6.6.0">
2 <description> differential binding analysis of ChIP-Seq peak data</description> 2 <description> differential binding analysis of ChIP-Seq peak data</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="2.6.5">bioconductor-diffbind</requirement> 4 <requirement type="package" version="2.6.6">bioconductor-diffbind</requirement>
5 <requirement type="package" version="1.20.0">r-getopt</requirement> 5 <requirement type="package" version="1.20.0">r-getopt</requirement>
6 <!--added rmysql requirement to remove: "Warning: namespace ‘RMySQL’ is not available"-->
7 <requirement type="package" version="0.10.11">r-rmysql</requirement>
8 </requirements> 6 </requirements>
9 <stdio> 7 <stdio>
10 <regex match="Execution halted" 8 <regex match="Execution halted"
11 source="both" 9 source="both"
12 level="fatal" 10 level="fatal"
19 source="both" 17 source="both"
20 level="fatal" 18 level="fatal"
21 description="An undefined error occured, please check your intput carefully and contact your administrator." /> 19 description="An undefined error occured, please check your intput carefully and contact your administrator." />
22 </stdio> 20 </stdio>
23 <version_command><![CDATA[ 21 <version_command><![CDATA[
24 echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ")," getopt version" $(R --vanilla --slave -e "library(getopt); cat(sessionInfo()\$otherPkgs\$getopt\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rmysql version" $(R --vanilla --slave -e "library(rmysql); cat(sessionInfo()\$otherPkgs\$rmysql\$Version)" 2> /dev/null | grep -v -i "WARNING: ") 22 echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
25 ]]></version_command> 23 ]]></version_command>
26 <command><![CDATA[ 24 <command><![CDATA[
27 ## seems that diffbind also needs file extensions to work properly 25 ## seems that diffbind also needs file extensions to work properly
28 #set $counter = 1 26 #set $counter = 1
29 #for $sample in $samples: 27 #for $sample in $samples:
37 #end for 35 #end for
38 36
39 Rscript '$__tool_directory__/diffbind.R' 37 Rscript '$__tool_directory__/diffbind.R'
40 -i $infile 38 -i $infile
41 -o '$outfile' 39 -o '$outfile'
40 -t $th
41 -f $out.format
42 -p '$plots' 42 -p '$plots'
43 -f $format 43
44 -t $th 44 #if $out.binding_matrix:
45
46 #if $binding_affinity_matrix:
47 -b 45 -b
46 #end if
47
48 #if $out.rdata:
49 -r
48 #end if 50 #end if
49 ]]> 51 ]]>
50 </command> 52 </command>
51 <configfiles> 53 <configfiles>
52 <configfile name="infile"><![CDATA[ 54 <configfile name="infile"><![CDATA[
64 #end if 66 #end if
65 #set $counter = $counter + 1 67 #set $counter = $counter + 1
66 #end for]]></configfile> 68 #end for]]></configfile>
67 </configfiles> 69 </configfiles>
68 <inputs> 70 <inputs>
69 <repeat name="samples" title="Samples" min="2"> 71 <repeat name="samples" title="Samples" min="4">
70 <param name="sample_id" type="text" value="Sample ID" label="Specify a sample id" help="e.g. BT474.1-" /> 72 <param name="sample_id" type="text" value="Sample ID" label="Specify a sample id" help="e.g. BT474.1-" />
71 <param name="tissue" type="text" value="Tissue" label="Specify the tissue" help="e.g. BT474" /> 73 <param name="tissue" type="text" value="Tissue" label="Specify the tissue" help="e.g. BT474" />
72 <param name="factor" type="text" value="Factor Name" label="Specify a factor name" help="e.g. ER" /> 74 <param name="factor" type="text" value="Factor Name" label="Specify a factor name" help="e.g. ER" />
73 <param name="condition" type="text" value="Condition" label="Specify the condition" help="e.g. Resistent" /> 75 <param name="condition" type="text" value="Condition" label="Specify the condition" help="e.g. Resistent" />
74 <param name="replicate" type="integer" value="1" label="Specify the replicate number" help="e.g. 1" /> 76 <param name="replicate" type="integer" value="1" label="Specify the replicate number" help="e.g. 1" />
77 <param name="peaks" type="data" format="bed" label="Peak file" help="Result of your Peak calling experiment."/> 79 <param name="peaks" type="data" format="bed" label="Peak file" help="Result of your Peak calling experiment."/>
78 </repeat> 80 </repeat>
79 <param name="th" type="float" value="1" min="0" max="1" 81 <param name="th" type="float" value="1" min="0" max="1"
80 label="FDR Threshold" 82 label="FDR Threshold"
81 help="Significance threshold; all sites with FDR less than or equal to this value will be included in the report. A value of 1 will include all binding sites in the report. Default: 1"/> 83 help="Significance threshold; all sites with FDR less than or equal to this value will be included in the report. A value of 1 will include all binding sites in the report. Default: 1"/>
82 <param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true" 84
83 label="Visualising the analysis results" 85 <!-- Output Options -->
84 help="output an additional PDF file" /> 86 <section name="out" expanded="false" title="Output Options">
85 <param name="format" type="select" label="Output Format"> 87 <param name="format" type="select" label="Output Format">
86 <option value="bed">BED</option> 88 <option value="bed">BED</option>
87 <option value="gff">GFF</option> 89 <option value="gff">GFF</option>
88 <option value="wig">WIG</option> 90 <option value="wig">WIG</option>
89 </param> 91 </param>
90 <param name="binding_affinity_matrix" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output binding affinity matrix?" help="Output a table of the binding scores" /> 92 <param name="pdf" type="boolean" truevalue="True" falsevalue="" checked="False" label="Visualising the analysis results" help="output an additional PDF file" />
93 <param name="binding_matrix" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output binding affinity matrix?" help="Output a table of the binding scores" />
94 <param name="rdata" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output RData file?" help="Output all the data used by R to construct the plots and tables, can be loaded into R. Default: No">
95 </param>
96 </section>
91 </inputs> 97 </inputs>
98
92 <outputs> 99 <outputs>
93 <data name="outfile" format="bed" label="Differential binding sites on ${on_string}"> 100 <data name="outfile" format="bed" label="${tool.name} on ${on_string}: Differentially bound sites">
94 <change_format> 101 <change_format>
95 <when input="format" value="wig" format="wig" /> 102 <when input="format" value="wig" format="wig" />
96 <when input="format" value="gff" format="gff" /> 103 <when input="format" value="gff" format="gff" />
97 </change_format> 104 </change_format>
98 </data> 105 </data>
99 <data name="plots" format="pdf" label="Differential binding sites on ${on_string}"> 106 <data name="plots" format="pdf" label="${tool.name} on ${on_string}: Plots">
100 <filter>pdf == True</filter> 107 <filter>out['pdf']</filter>
101 </data> 108 </data>
102 <data name="binding_matrix" format="tabular" from_work_dir="bmatrix.tab" label="Differential binding sites on ${on_string}"> 109 <data name="binding_matrix" format="tabular" from_work_dir="bmatrix.tab" label="${tool.name} on ${on_string}: Binding matrix">
103 <filter>binding_affinity_matrix == True</filter> 110 <filter>out['binding_matrix']</filter>
111 </data>
112 <data name="rdata" format="rdata" from_work_dir="DiffBind_analysis.RData" label="${tool.name} on ${on_string}: RData file">
113 <filter>out['rdata']</filter>
104 </data> 114 </data>
105 </outputs> 115 </outputs>
116
106 <tests> 117 <tests>
107 <test> 118 <test expect_num_outputs="4">
108 <repeat name="samples"> 119 <repeat name="samples">
109 <param name="sample_id" value="BT4741" /> 120 <param name="sample_id" value="BT4741" />
110 <param name="tissue" value="BT474" /> 121 <param name="tissue" value="BT474" />
111 <param name="factor" value="ER" /> 122 <param name="factor" value="ER" />
112 <param name="condition" value="Resistant" /> 123 <param name="condition" value="Resistant" />
140 <param name="replicate" value="2" /> 151 <param name="replicate" value="2" />
141 <param name="bamreads" ftype="bam" value="MCF7_ER_2.bam" /> 152 <param name="bamreads" ftype="bam" value="MCF7_ER_2.bam" />
142 <param name="peaks" ftype="bed" value="MCF7_ER_2.bed.gz" /> 153 <param name="peaks" ftype="bed" value="MCF7_ER_2.bed.gz" />
143 </repeat> 154 </repeat>
144 <param name="pdf" value="True" /> 155 <param name="pdf" value="True" />
145 <param name="binding_affinity_matrix" value="True" /> 156 <param name="binding_matrix" value="True" />
157 <param name="rdata" value="True" />
146 <output name="outfile" value="out_diffbind.bed" /> 158 <output name="outfile" value="out_diffbind.bed" />
159 <output name="plots" value="out_plots.pdf" compare="sim_size" />
147 <output name="binding_matrix" value="out_binding.matrix" /> 160 <output name="binding_matrix" value="out_binding.matrix" />
161 <output name="rdata" value="DiffBind_analysis.RData" compare="sim_size"/>
148 </test> 162 </test>
149 </tests> 163 </tests>
150 <help><![CDATA[ 164 <help><![CDATA[
151 165
152 .. class:: infomark 166 .. class:: infomark
164 between two sample groups. It includes functions to support the processing of peak sets, 178 between two sample groups. It includes functions to support the processing of peak sets,
165 including overlapping and merging peak sets, counting sequencing reads overlapping intervals 179 including overlapping and merging peak sets, counting sequencing reads overlapping intervals
166 in peak sets, and identifying statistically significantly differentially bound sites based on 180 in peak sets, and identifying statistically significantly differentially bound sites based on
167 evidence of binding affinity (measured by differences in read densities). To this end it uses 181 evidence of binding affinity (measured by differences in read densities). To this end it uses
168 statistical routines developed in an RNA-Seq context (primarily the Bioconductor packages 182 statistical routines developed in an RNA-Seq context (primarily the Bioconductor packages
169 edgeR and DESeq2 ). Additionally, the package builds on Rgraphics routines to provide a 183 edgeR and DESeq2). Additionally, the package builds on Rgraphics routines to provide a
170 set of standardized plots to aid in binding analysis. 184 set of standardized plots to aid in binding analysis.
171 185
172 The `DiffBind User Guide`_ includes a brief overview of the processing flow, followed by four sections of 186 The `DiffBind User Guide`_ includes a brief overview of the processing flow, followed by four sections of
173 examples: the first focusing on the core task of obtaining differentially bound sites based on 187 examples: the first focusing on the core task of obtaining differentially bound sites based on
174 affinity data, the second working through the main plotting routines, the third discussing the 188 affinity data, the second working through the main plotting routines, the third discussing the
179 Note DiffBind requires a minimum of four samples (two groups with two replicates each). 193 Note DiffBind requires a minimum of four samples (two groups with two replicates each).
180 194
181 .. _DiffBind: https://bioconductor.org/packages/release/bioc/html/DiffBind.html 195 .. _DiffBind: https://bioconductor.org/packages/release/bioc/html/DiffBind.html
182 .. _`Bioconductor package`: https://bioconductor.org/packages/release/bioc/html/DiffBind.html 196 .. _`Bioconductor package`: https://bioconductor.org/packages/release/bioc/html/DiffBind.html
183 .. _`DiffBind User Guide`: https://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf 197 .. _`DiffBind User Guide`: https://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf
198
199 -----
184 200
185 **Inputs** 201 **Inputs**
186 202
187 DiffBind works primarily with peaksets, which are sets of genomic intervals representing 203 DiffBind works primarily with peaksets, which are sets of genomic intervals representing
188 candidate protein binding sites. Each interval consists of a chromosome, a start and end 204 candidate protein binding sites. Each interval consists of a chromosome, a start and end
192 be associated with each peakset (one for the ChIP data, and optionally another representing 208 be associated with each peakset (one for the ChIP data, and optionally another representing
193 a control sample) 209 a control sample)
194 210
195 **Sample Information** 211 **Sample Information**
196 212
197 You have to specify your sample information in the tool form above. 213 You have to specify your sample information in the tool form above, where Condition contains the groups you want to compare.
198 214
199 Example: 215 Example:
200 216
201 ============= ========== ========== ============= ============= 217 ============= ========== ========== ============= =============
202 **SampleID** **Tissue** **Factor** **Condition** **Replicate** 218 **SampleID** **Tissue** **Factor** **Condition** **Replicate**
212 MCF7r2 MCF7 ER Resistant 2 228 MCF7r2 MCF7 ER Resistant 2
213 ZR751 ZR75 ER Responsive 1 229 ZR751 ZR75 ER Responsive 1
214 ZR752 ZR75 ER Responsive 2 230 ZR752 ZR75 ER Responsive 2
215 ============= ========== ========== ============= ============= 231 ============= ========== ========== ============= =============
216 232
217 Or provide a sample sheet tabular file such as below.
218
219 Example:
220
221 ======== ====== ====== ========== ========== ========= ==================== ========= ===================== ================= ==========
222 SampleID Tissue Factor Condition Treatment Replicate bamReads ControlID bamControl Peaks PeakCaller
223 ======== ====== ====== ========== ========== ========= ==================== ========= ===================== ================= ==========
224 BT4741 BT474 ER Resistant Full-Media 1 Chr18_BT474_ER_1.bam BT474c Chr18_BT474_input.bam BT474_ER_1.bed.gz bed
225 BT4742 BT474 ER Resistant Full-Media 2 Chr18_BT474_ER_2.bam BT474c Chr18_BT474_input.bam BT474_ER_2.bed.gz bed
226 MCF71 MCF7 ER Responsive Full-Media 1 Chr18_MCF7_ER_1.bam MCF7c Chr18_MCF7_input.bam MCF7_ER_1.bed.gz bed
227 MCF72 MCF7 ER Responsive Full-Media 2 Chr18_MCF7_ER_2.bam MCF7c Chr18_MCF7_input.bam MCF7_ER_2.bed.gz bed
228 MCF73 MCF7 ER Responsive Full-Media 3 Chr18_MCF7_ER_3.bam MCF7c Chr18_MCF7_input.bam MCF7_ER_3.bed.gz bed
229 T47D1 T47D ER Responsive Full-Media 1 Chr18_T47D_ER_1.bam T47Dc Chr18_T47D_input.bam T47D_ER_1.bed.gz bed
230 T47D2 T47D ER Responsive Full-Media 2 Chr18_T47D_ER_2.bam T47Dc Chr18_T47D_input.bam T47D_ER_2.bed.gz bed
231 MCF7r1 MCF7 ER Resistant Full-Media 1 Chr18_TAMR_ER_1.bam TAMRc Chr18_TAMR_input.bam TAMR_ER_1.bed.gz bed
232 MCF7r2 MCF7 ER Resistant Full-Media 2 Chr18_TAMR_ER_2.bam TAMRc Chr18_TAMR_input.bam TAMR_ER_2.bed.gz bed
233 ZR751 ZR75 ER Responsive Full-Media 1 Chr18_ZR75_ER_1.bam ZR75c Chr18_ZR75_input.bam ZR75_ER_1.bed.gz bed
234 ZR752 ZR75 ER Responsive Full-Media 2 Chr18_ZR75_ER_2.bam ZR75c Chr18_ZR75_input.bam ZR75_ER_2.bed.gz bed
235 ======== ====== ====== ========== ========== ========= ==================== ========= ===================== ================= ==========
236
237 233
238 **Peak files** 234 **Peak files**
239 235
240 Result of your Peak calling experiment in bed format, one file for each sample is required. 236 Result of your Peak calling experiment in bed format, one file for each sample is required.
241 237
257 ======= ======= ======= =============== ======= 253 ======= ======= ======= =============== =======
258 254
259 * BAM file which contains the mapped sequencing reads can be associated with each peakset 255 * BAM file which contains the mapped sequencing reads can be associated with each peakset
260 * Control BAM file represents a control dataset and are optional, but have to specified for all when used. 256 * Control BAM file represents a control dataset and are optional, but have to specified for all when used.
261 257
258 -----
262 259
263 **Outputs** 260 **Outputs**
264 261
262 This tool outputs
263
264 * differentially bound sites in BED, WIG or GFF format
265
266 Optionally, under **Output Options** you can choose to output
267
268 * a correlation heatmap plot
269 * a binding affinity matrix
270 * an RData file
271
272 **Differentially Bound Sites**
273
265 As output format you can choose BED, GFF, WIG. 274 As output format you can choose BED, GFF, WIG.
266 275
267 Example: 276 Example - BED format:
268 277
269 ======== ====== =======+ 278 ===== ====== ====== ===== ==== ==== ==== ==== ===== ======== ========
270 seqnames ranges strand Conc Conc_Resistant 279 1 2 3 4 5 6 7 8 9 10 **11**
271 280 ===== ====== ====== ===== ==== ==== ==== ==== ===== ======== ========
272 2452 chr18 [64490686, 64491186] * | 6.36 1.39 281 chr18 394600 396513 1914 * 7.15 7.89 5.55 2.35 7.06e-24 9.84e-21
273 1291 chr18 [34597713, 34598213] * | 5.33 0.22 282 chr18 111567 112005 439 * 5.71 3.63 6.53 -2.89 1.27e-08 8.88e-06
274 976 chr18 [26860997, 26861497] * | 7.3 3.13 283 chr18 346464 347342 879 * 5 3.24 5.77 -2.52 6.51e-06 0.00303
275 2338 chr18 [60892900, 60893400] * | 7.13 1.84 284 chr18 399014 400382 1369 * 7.62 8.05 7 1.04 1.04e-05 0.00364
276 2077 chr18 [55569087, 55569587] * | 5.52 1.89 285 chr18 371110 372102 993 * 4.63 5.36 3.07 2.3 8.1e-05 0.0226
277 286 ===== ====== ====== ===== ==== ==== ==== ==== ===== ======== ========
278 Conc_Responsive Fold p-value FDR 287
279 <numeric> <numeric> <numeric> <numeric> 288 Columns contain the following data:
280 2452 7 -5.61 3.57e-10 1.02e-06 289
281 1291 5.97 -5.75 1.1e-09 1.57e-06 290 * **1st**: Chromosome name
282 976 7.92 -4.79 1.1e-08 1.05e-05 291 * **2nd**: Start position of site
283 2338 7.77 -5.93 1.68e-08 1.17e-05 292 * **3rd**: End position of site
284 2077 6.13 -4.23 2.36e-08 1.17e-05 293 * **4th**: Length of site
285 294 * **5th**: Strand
286 The value columns show the 295 * **6th**: Mean read concentration over all the samples (the default calculation uses log2 normalized ChIP read counts with control read counts subtracted)
287 Conc mean read concentration over all the samples (the default calculation uses log2 normalized ChIP read counts with control read counts subtracted) 296 * **7th**: Mean concentration over the first (e.g. Resistant) group
288 Conc_Resistant mean concentration over the first (Resistant) group 297 * **8th**: Mean concentration over second (e.g. Responsive) group
289 Conc_Responsive mean concentration over second (Responsive) group 298 * **9th**: Fold shows the difference in mean concentrations between the two groups (e.g. Resistant - Responsive), with a positive value indicating increased binding affinity in the first group and a negative value indicating increased binding affinity in the second group.
290 Fold column shows the difference in mean concentrations between the two groups (Conc_Resistant - Conc_Responsive), with a positive value indicating increased binding affinity in the Resistant group and a negative value indicating increased binding affinity in the Responsive group. 299 * **10th**: P-value confidence measure for identifying these sites as differentially bound
291 p-value confidence measure for identifying these sites as differentially bound 300 * **11th**: a multiple testing corrected FDR p-value
292 FDR a multiple testing corrected FDR p-value
293 301
294 302
295 **Binding Affinity Matrix** 303 **Binding Affinity Matrix**
296 304
297 The final result of counting is a binding affinity matrix containing a (normalized) read count for each sample at every potential binding site. With this matrix, the samples can be re-clustered using affinity, rather than occupancy, data. The binding affinity matrix can be used for QC plotting as well as for subsequent 305 The final result of counting is a binding affinity matrix containing a (normalized) read count for each sample at every potential binding site. With this matrix, the samples can be re-clustered using affinity, rather than occupancy, data. The binding affinity matrix can be used for QC plotting as well as for subsequent
313 MCF7r2 MCF7 ER Resistant Full-Media 2 counts 2845 0.13 321 MCF7r2 MCF7 ER Resistant Full-Media 2 counts 2845 0.13
314 ZR751 ZR75 ER Responsive Full-Media 1 counts 2845 0.32 322 ZR751 ZR75 ER Responsive Full-Media 1 counts 2845 0.32
315 ZR752 ZR75 ER Responsive Full-Media 2 counts 2845 0.22 323 ZR752 ZR75 ER Responsive Full-Media 2 counts 2845 0.22
316 ====== ====== ====== ========== ========== ========= ====== ========= ==== 324 ====== ====== ====== ========== ========== ========= ====== ========= ====
317 325
318 326 -----
319 327
320 **More Information** 328 **More Information**
321 329
322 Generally, processing data with DiffBind involves five phases: 330 Generally, processing data with DiffBind involves five phases:
323 331
326 #. Counting reads 334 #. Counting reads
327 #. Differential binding affinity analysis 335 #. Differential binding affinity analysis
328 #. Plotting and reporting 336 #. Plotting and reporting
329 337
330 338
331 * **Reading in peaksets**: 339 **Reading in peaksets**:
332 340
333 The first step is to read in a set of peaksets and associated 341 The first step is to read in a set of peaksets and associated
334 metadata. Peaksets are derived either from ChIP-Seq peak callers, such as MACS 342 metadata. Peaksets are derived either from ChIP-Seq peak callers, such as **MACS2**, or using some other criterion (e.g. genomic windows, or all the promoter regions
335 ([1]), or using some other criterion (e.g. genomic windows, or all the promoter regions 343 in a genome). A single experiment can have more than
336 in a genome). The easiest way to read in peaksets is using a comma-separated value
337 (csv) sample sheet with one line for each peakset. (Spreadsheets in Excel® format, with
338 a .xls or .xlsx suffix, are also accepted.) A single experiment can have more than
339 one associated peakset; e.g. if multiple peak callers are used for comparison purposes 344 one associated peakset; e.g. if multiple peak callers are used for comparison purposes
340 each sample would have more than one line in the sample sheet. Once the peaksets 345 each sample would have more than one line in the sample sheet. Once the peaksets
341 are read in, a merging function finds all overlapping peaks and derives a single set of 346 are read in, a merging function finds all overlapping peaks and derives a single set of
342 unique genomic intervals covering all the supplied peaks (a consensus peakset for the 347 unique genomic intervals covering all the supplied peaks (a consensus peakset for the
343 experiment). 348 experiment).
344 349
345 * **Occupancy analysis**: 350 **Occupancy analysis**:
346 351
347 Peaksets, especially those generated by peak callers, provide 352 Peaksets, especially those generated by peak callers, provide
348 an insight into the potential occupancy of the protein being ChIPed for at specific 353 an insight into the potential occupancy of the protein being ChIPed for at specific
349 genomic loci. After the peaksets have been loaded, it can be useful to perform some 354 genomic loci. After the peaksets have been loaded, it can be useful to perform some
350 exploratory plotting to determine how these occupancy maps agree with each other, 355 exploratory plotting to determine how these occupancy maps agree with each other,
354 overlaps to be examined, as well as functions to determine how well similar samples 359 overlaps to be examined, as well as functions to determine how well similar samples
355 cluster together. Beyond quality control, the product of an occupancy analysis may be 360 cluster together. Beyond quality control, the product of an occupancy analysis may be
356 a consensus peakset, representing an overall set of candidate binding sites to be used 361 a consensus peakset, representing an overall set of candidate binding sites to be used
357 in further analysis. 362 in further analysis.
358 363
359 * **Counting reads**: 364 **Counting reads**:
360 365
361 Once a consensus peakset has been derived, DiffBind can use the 366 Once a consensus peakset has been derived, DiffBind can use the
362 supplied sequence read files to count how many reads overlap each interval for each 367 supplied sequence read files to count how many reads overlap each interval for each
363 unique sample. The peaks in the consensus peakset may be re-centered and trimmed 368 unique sample. The peaks in the consensus peakset may be re-centered and trimmed
364 based on calculating their summits (point of greatest read overlap) in order to provide 369 based on calculating their summits (point of greatest read overlap) in order to provide
366 containing a (normalized) read count for each sample at every potential binding site. 371 containing a (normalized) read count for each sample at every potential binding site.
367 With this matrix, the samples can be re-clustered using affinity, rather than occupancy, 372 With this matrix, the samples can be re-clustered using affinity, rather than occupancy,
368 data. The binding affinity matrix is used for QC plotting as well as for subsequent 373 data. The binding affinity matrix is used for QC plotting as well as for subsequent
369 differential analysis. 374 differential analysis.
370 375
371 * **Differential binding affinity analysis**: 376 **Differential binding affinity analysis**:
372 377
373 The core functionality of DiffBind is the 378 The core functionality of DiffBind is the
374 differential binding affinity analysis, which enables binding sites to be identified that 379 differential binding affinity analysis, which enables binding sites to be identified that
375 are statistically significantly differentially bound between sample groups. To accomplish 380 are statistically significantly differentially bound between sample groups. To accomplish
376 this, first a contrast (or contrasts) is established, dividing the samples into groups to 381 this, first a contrast (or contrasts) is established, dividing the samples into groups to
377 be compared. Next the core analysis routines are executed, by default using DESeq2 . 382 be compared. Next the core analysis routines are executed, by default using DESeq2 .
378 This will assign a p-value and FDR to each candidate binding site indicating confidence 383 This will assign a p-value and FDR to each candidate binding site indicating confidence
379 that they are differentially bound. 384 that they are differentially bound.
380 385
381 * **Plotting and reporting**: 386 **Plotting and reporting**:
382 387
383 Once one or more contrasts have been run, DiffBind provides 388 Once one or more contrasts have been run, DiffBind provides
384 a number of functions for reporting and plotting the results. MA plots give an 389 a number of functions for reporting and plotting the results. MA plots give an
385 overview of the results of the analysis, while correlation heatmaps and PCA plots show 390 overview of the results of the analysis, while correlation heatmaps and PCA plots show
386 how the groups cluster based on differentially bound sites. Boxplots show the distribution 391 how the groups cluster based on differentially bound sites. Boxplots show the distribution
387 of reads within differentially bound sites corresponding to whether they gain or 392 of reads within differentially bound sites corresponding to whether they gain or
388 lose affinity between the two sample groups. A reporting mechanism enables differentially 393 lose affinity between the two sample groups. A reporting mechanism enables differentially
389 bound sites to be extracted for further processing, such as annotation, motif, and 394 bound sites to be extracted for further processing, such as annotation, motif, and
390 pathway analyses. 395 pathway analyses. *Note that currently only the correlation plot is implemented in this Galaxy tool.*
396
397 -----
391 398
392 **References** 399 **References**
393 400
394 DiffBind Authors: Rory Stark, Gordon Brown (2011) 401 DiffBind Authors: Rory Stark, Gordon Brown (2011)
395 Wrapper authors: Bjoern Gruening, Pavankumar Videm 402 Wrapper authors: Bjoern Gruening, Pavankumar Videm