comparison diffbind.xml @ 7:681dedc42aca draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/diffbind commit affbc59222cde9be21e91fa1f9194930a070b830
author iuc
date Sun, 28 Jan 2018 04:26:11 -0500
parents 6031247f61d4
children
comparison
equal deleted inserted replaced
6:6031247f61d4 7:681dedc42aca
1 <tool id="diffbind" name="DiffBind" version="2.2.2"> 1 <tool id="diffbind" name="DiffBind" version="2.6.5.0">
2 <description> differential binding analysis of ChIP-Seq peak data</description> 2 <description> differential binding analysis of ChIP-Seq peak data</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="2.0.9">bioconductor-diffbind</requirement> 4 <requirement type="package" version="2.6.5">bioconductor-diffbind</requirement>
5 <requirement type="package" version="1.20.0">r-getopt</requirement> 5 <requirement type="package" version="1.20.0">r-getopt</requirement>
6 <!--added rmysql requirement to remove: "Warning: namespace ‘RMySQL’ is not available"-->
7 <requirement type="package" version="0.10.11">r-rmysql</requirement>
6 </requirements> 8 </requirements>
7 <stdio> 9 <stdio>
8 <regex match="Execution halted" 10 <regex match="Execution halted"
9 source="both" 11 source="both"
10 level="fatal" 12 level="fatal"
16 <regex match="Error in" 18 <regex match="Error in"
17 source="both" 19 source="both"
18 level="fatal" 20 level="fatal"
19 description="An undefined error occured, please check your intput carefully and contact your administrator." /> 21 description="An undefined error occured, please check your intput carefully and contact your administrator." />
20 </stdio> 22 </stdio>
21 <command> 23 <version_command><![CDATA[
22 <![CDATA[ 24 echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ")," getopt version" $(R --vanilla --slave -e "library(getopt); cat(sessionInfo()\$otherPkgs\$getopt\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rmysql version" $(R --vanilla --slave -e "library(rmysql); cat(sessionInfo()\$otherPkgs\$rmysql\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
25 ]]></version_command>
26 <command><![CDATA[
23 ## seems that diffbind also needs file extensions to work properly 27 ## seems that diffbind also needs file extensions to work properly
24 #set $counter = 1 28 #set $counter = 1
25 #for $sample in $samples: 29 #for $sample in $samples:
26 ln -s $sample.bamreads #echo str($counter) + "_bamreads.bam"# && 30 ln -s $sample.bamreads #echo str($counter) + "_bamreads.bam"# &&
27 ln -s ${sample.bamreads.metadata.bam_index} #echo str($counter) + "_bamreads.bai"# && 31 ln -s ${sample.bamreads.metadata.bam_index} #echo str($counter) + "_bamreads.bai"# &&
30 ln -s ${sample.bamcontrol.metadata.bam_index} #echo str($counter) + "_bamcontrol.bai"# && 34 ln -s ${sample.bamcontrol.metadata.bam_index} #echo str($counter) + "_bamcontrol.bai"# &&
31 #end if 35 #end if
32 #set $counter = $counter + 1 36 #set $counter = $counter + 1
33 #end for 37 #end for
34 38
35 Rscript $__tool_directory__/diffbind.R 39 Rscript '$__tool_directory__/diffbind.R'
36 -i $infile 40 -i $infile
37 -o $outfile 41 -o '$outfile'
38 -p $plots 42 -p '$plots'
39 -f $format 43 -f $format
44 -t $th
45
46 #if $binding_affinity_matrix:
47 -b
48 #end if
40 ]]> 49 ]]>
41 </command> 50 </command>
42 <configfiles> 51 <configfiles>
43 <configfile name="infile"> 52 <configfile name="infile"><![CDATA[
44 #set $counter = 1 53 #set $counter = 1
45 #for $sample in $samples: 54 #for $sample in $samples:
46 #if str( $sample.bamcontrol ) != 'None' and $counter == 1: 55 #if str( $sample.bamcontrol ) != 'None' and $counter == 1:
47 SampleID,Tissue,Factor,Condition,Replicate,bamReads,bamControl,Peaks 56 SampleID,Tissue,Factor,Condition,Replicate,bamReads,bamControl,Peaks
48 #elif $counter == 1: 57 #elif $counter == 1:
52 $sample.sample_id,$sample.tissue,$sample.factor,$sample.condition,$sample.replicate,#echo str($counter) + '_bamreads.bam'#,#echo str($counter) + '_bamcontrol.bam'#,$sample.peaks 61 $sample.sample_id,$sample.tissue,$sample.factor,$sample.condition,$sample.replicate,#echo str($counter) + '_bamreads.bam'#,#echo str($counter) + '_bamcontrol.bam'#,$sample.peaks
53 #else: 62 #else:
54 $sample.sample_id,$sample.tissue,$sample.factor,$sample.condition,$sample.replicate,#echo str($counter) + '_bamreads.bam'#,$sample.peaks 63 $sample.sample_id,$sample.tissue,$sample.factor,$sample.condition,$sample.replicate,#echo str($counter) + '_bamreads.bam'#,$sample.peaks
55 #end if 64 #end if
56 #set $counter = $counter + 1 65 #set $counter = $counter + 1
57 #end for 66 #end for]]></configfile>
58 </configfile>
59 </configfiles> 67 </configfiles>
60 <inputs> 68 <inputs>
61 <repeat name="samples" title="Samples" min="2"> 69 <repeat name="samples" title="Samples" min="2">
62 <param name="sample_id" type="text" value="Sample ID" label="Specify a sample id" help="e.g. BT474.1-" /> 70 <param name="sample_id" type="text" value="Sample ID" label="Specify a sample id" help="e.g. BT474.1-" />
63 <param name="tissue" type="text" value="Tissue" label="Specify the tissue" help="e.g. BT474" /> 71 <param name="tissue" type="text" value="Tissue" label="Specify the tissue" help="e.g. BT474" />
64 <param name="factor" type="text" value="Factor Name" label="Specify a factor name" help="e.g. ER" /> 72 <param name="factor" type="text" value="Factor Name" label="Specify a factor name" help="e.g. ER" />
65 <param name="condition" type="text" value="Condition" label="Specify the condition" help="e.g. Resistent" /> 73 <param name="condition" type="text" value="Condition" label="Specify the condition" help="e.g. Resistent" />
66 <param name="replicate" type="integer" value="1" label="Specify the replicate number" help="e.g. 1" /> 74 <param name="replicate" type="integer" value="1" label="Specify the replicate number" help="e.g. 1" />
67 <param format="bam" name="bamreads" type="data" label="Read BAM file" help="Specify the Read BAM file, used for Peak calling."/> 75 <param name="bamreads" type="data" format="bam" label="Read BAM file" help="Specify the Read BAM file, used for Peak calling."/>
68 <param format="bam" name="bamcontrol" type="data" optional="True" label="Control BAM file" help="If specifying a control BAM file for this sample, then all samples are required to specify one."/> 76 <param name="bamcontrol" type="data" format="bam" optional="True" label="Control BAM file" help="If specifying a control BAM file for this sample, then all samples are required to specify one."/>
69 <param format="bed" name="peaks" type="data" label="Peak file" help="Result of your Peak calling experiment."/> 77 <param name="peaks" type="data" format="bed" label="Peak file" help="Result of your Peak calling experiment."/>
70 </repeat> 78 </repeat>
79 <param name="th" type="float" value="1" min="0" max="1"
80 label="FDR Threshold"
81 help="Significance threshold; all sites with FDR less than or equal to this value will be included in the report. A value of 1 will include all binding sites in the report. Default: 1"/>
71 <param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true" 82 <param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true"
72 label="Visualising the analysis results" 83 label="Visualising the analysis results"
73 help="output an additional PDF files" /> 84 help="output an additional PDF file" />
74 <param name="format" type="select" label="Output Format"> 85 <param name="format" type="select" label="Output Format">
75 <option value="bed">BED</option> 86 <option value="bed">BED</option>
76 <option value="gff">GFF</option> 87 <option value="gff">GFF</option>
77 <option value="wig">WIG</option> 88 <option value="wig">WIG</option>
78 </param> 89 </param>
90 <param name="binding_affinity_matrix" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output binding affinity matrix?" help="Output a table of the binding scores" />
79 </inputs> 91 </inputs>
80 <outputs> 92 <outputs>
81 <data format="bed" name="outfile" label="Differential binding sites on ${on_string}"> 93 <data name="outfile" format="bed" label="Differential binding sites on ${on_string}">
82 <change_format> 94 <change_format>
83 <when input="format" value="wig" format="wig" /> 95 <when input="format" value="wig" format="wig" />
84 <when input="format" value="gff" format="gff" /> 96 <when input="format" value="gff" format="gff" />
85 </change_format> 97 </change_format>
86 </data> 98 </data>
87 <data format="pdf" name="plots" label="Differential binding sites on ${on_string}"> 99 <data name="plots" format="pdf" label="Differential binding sites on ${on_string}">
88 <filter>pdf == True</filter> 100 <filter>pdf == True</filter>
89 </data> 101 </data>
102 <data name="binding_matrix" format="tabular" from_work_dir="bmatrix.tab" label="Differential binding sites on ${on_string}">
103 <filter>binding_affinity_matrix == True</filter>
104 </data>
90 </outputs> 105 </outputs>
91 <help> 106 <tests>
92 <![CDATA[ 107 <test>
93 108 <repeat name="samples">
94 What it does 109 <param name="sample_id" value="BT4741" />
95 ************ 110 <param name="tissue" value="BT474" />
96 111 <param name="factor" value="ER" />
97 Diffbind provides functions for processing ChIP-Seq data enriched for genomic loci where specific protein/DNA binding occurs, including peak sets identified by ChIP-Seq peak callers and aligned sequence read datasets. 112 <param name="condition" value="Resistant" />
98 113 <param name="replicate" value="1" />
99 Input 114 <param name="bamreads" ftype="bam" value="BT474_ER_1.bam" />
100 ***** 115 <param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz" />
101 116 </repeat>
102 * You have to specify your samples. Here is one example:: 117 <repeat name="samples">
103 118 <param name="sample_id" value="BT4742" />
104 ID Tissue Factor Condition Treatment Replicate Caller Intervals 119 <param name="tissue" value="BT474" />
105 BT4741 BT474 ER Resistant Full-Media 1 raw 1084 120 <param name="factor" value="ER" />
121 <param name="condition" value="Resistant" />
122 <param name="replicate" value="2" />
123 <param name="bamreads" ftype="bam" value="BT474_ER_2.bam" />
124 <param name="peaks" ftype="bed" value="BT474_ER_2.bed.gz" />
125 </repeat>
126 <repeat name="samples">
127 <param name="sample_id" value="MCF71" />
128 <param name="tissue" value="MCF7" />
129 <param name="factor" value="ER" />
130 <param name="condition" value="Responsive" />
131 <param name="replicate" value="1" />
132 <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam" />
133 <param name="peaks" ftype="bed" value="MCF7_ER_1.bed.gz" />
134 </repeat>
135 <repeat name="samples">
136 <param name="sample_id" value="MCF72" />
137 <param name="tissue" value="MCF7" />
138 <param name="factor" value="ER" />
139 <param name="condition" value="Responsive" />
140 <param name="replicate" value="2" />
141 <param name="bamreads" ftype="bam" value="MCF7_ER_2.bam" />
142 <param name="peaks" ftype="bed" value="MCF7_ER_2.bed.gz" />
143 </repeat>
144 <param name="pdf" value="True" />
145 <param name="binding_affinity_matrix" value="True" />
146 <output name="outfile" value="out_diffbind.bed" />
147 <output name="binding_matrix" value="out_binding.matrix" />
148 </test>
149 </tests>
150 <help><![CDATA[
151
152 .. class:: infomark
153
154 **What it does**
155
156 DiffBind_ is a `Bioconductor package`_ that provides functions for processing ChIP-Seq data enriched for genomic loci where specific
157 protein/DNA binding occurs, including peak sets identified by ChIP-Seq peak callers and
158 aligned sequence read datasets. It is designed to work with multiple peak sets simultaneously,
159 representing different ChIP experiments (antibodies, transcription factor and/or histone
160 marks, experimental conditions, replicates) as well as managing the results of multiple peak
161 callers.
162
163 The primary emphasis of DiffBind is on identifying sites that are differentially bound
164 between two sample groups. It includes functions to support the processing of peak sets,
165 including overlapping and merging peak sets, counting sequencing reads overlapping intervals
166 in peak sets, and identifying statistically significantly differentially bound sites based on
167 evidence of binding affinity (measured by differences in read densities). To this end it uses
168 statistical routines developed in an RNA-Seq context (primarily the Bioconductor packages
169 edgeR and DESeq2 ). Additionally, the package builds on Rgraphics routines to provide a
170 set of standardized plots to aid in binding analysis.
171
172 The `DiffBind User Guide`_ includes a brief overview of the processing flow, followed by four sections of
173 examples: the first focusing on the core task of obtaining differentially bound sites based on
174 affinity data, the second working through the main plotting routines, the third discussing the
175 use of a blocking factor, and the fourth revisiting occupancy data (peak calls) in more detail,
176 as well as comparing the results of an occupancy-based analysis with an affinity-based one.
177 Finally, certain technical aspects of the how these analyses are accomplished are detailed.
178
179 Note DiffBind requires a minimum of four samples (two groups with two replicates each).
180
181 .. _DiffBind: https://bioconductor.org/packages/release/bioc/html/DiffBind.html
182 .. _`Bioconductor package`: https://bioconductor.org/packages/release/bioc/html/DiffBind.html
183 .. _`DiffBind User Guide`: https://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf
184
185 **Inputs**
186
187 DiffBind works primarily with peaksets, which are sets of genomic intervals representing
188 candidate protein binding sites. Each interval consists of a chromosome, a start and end
189 position, and usually a score of some type indicating confidence in, or strength of, the peak.
190 Associated with each peakset are metadata relating to the experiment from which the peakset
191 was derived. Additionally, files containing mapped sequencing reads (generally .bam files) can
192 be associated with each peakset (one for the ChIP data, and optionally another representing
193 a control sample)
194
195 **Sample Information**
196
197 You have to specify your sample information in the tool form above.
198
199 Example:
200
201 ============= ========== ========== ============= =============
202 **SampleID** **Tissue** **Factor** **Condition** **Replicate**
203 ------------- ---------- ---------- ------------- -------------
204 BT4741 BT474 ER Resistant 1
205 BT4742 BT474 ER Resistant 2
206 MCF71 MCF7 ER Responsive 1
207 MCF72 MCF7 ER Responsive 2
208 MCF73 MCF7 ER Responsive 3
209 T47D1 T47D ER Responsive 1
210 T47D2 T47D ER Responsive 2
211 MCF7r1 MCF7 ER Resistant 1
212 MCF7r2 MCF7 ER Resistant 2
213 ZR751 ZR75 ER Responsive 1
214 ZR752 ZR75 ER Responsive 2
215 ============= ========== ========== ============= =============
216
217 Or provide a sample sheet tabular file such as below.
218
219 Example:
220
221 ======== ====== ====== ========== ========== ========= ==================== ========= ===================== ================= ==========
222 SampleID Tissue Factor Condition Treatment Replicate bamReads ControlID bamControl Peaks PeakCaller
223 ======== ====== ====== ========== ========== ========= ==================== ========= ===================== ================= ==========
224 BT4741 BT474 ER Resistant Full-Media 1 Chr18_BT474_ER_1.bam BT474c Chr18_BT474_input.bam BT474_ER_1.bed.gz bed
225 BT4742 BT474 ER Resistant Full-Media 2 Chr18_BT474_ER_2.bam BT474c Chr18_BT474_input.bam BT474_ER_2.bed.gz bed
226 MCF71 MCF7 ER Responsive Full-Media 1 Chr18_MCF7_ER_1.bam MCF7c Chr18_MCF7_input.bam MCF7_ER_1.bed.gz bed
227 MCF72 MCF7 ER Responsive Full-Media 2 Chr18_MCF7_ER_2.bam MCF7c Chr18_MCF7_input.bam MCF7_ER_2.bed.gz bed
228 MCF73 MCF7 ER Responsive Full-Media 3 Chr18_MCF7_ER_3.bam MCF7c Chr18_MCF7_input.bam MCF7_ER_3.bed.gz bed
229 T47D1 T47D ER Responsive Full-Media 1 Chr18_T47D_ER_1.bam T47Dc Chr18_T47D_input.bam T47D_ER_1.bed.gz bed
230 T47D2 T47D ER Responsive Full-Media 2 Chr18_T47D_ER_2.bam T47Dc Chr18_T47D_input.bam T47D_ER_2.bed.gz bed
231 MCF7r1 MCF7 ER Resistant Full-Media 1 Chr18_TAMR_ER_1.bam TAMRc Chr18_TAMR_input.bam TAMR_ER_1.bed.gz bed
232 MCF7r2 MCF7 ER Resistant Full-Media 2 Chr18_TAMR_ER_2.bam TAMRc Chr18_TAMR_input.bam TAMR_ER_2.bed.gz bed
233 ZR751 ZR75 ER Responsive Full-Media 1 Chr18_ZR75_ER_1.bam ZR75c Chr18_ZR75_input.bam ZR75_ER_1.bed.gz bed
234 ZR752 ZR75 ER Responsive Full-Media 2 Chr18_ZR75_ER_2.bam ZR75c Chr18_ZR75_input.bam ZR75_ER_2.bed.gz bed
235 ======== ====== ====== ========== ========== ========= ==================== ========= ===================== ================= ==========
236
237
238 **Peak files**
239
240 Result of your Peak calling experiment in bed format, one file for each sample is required.
241
242 Example:
243
244 ======= ======= ======= =============== =======
245 1 2 3 4 **5**
246 ======= ======= ======= =============== =======
247 chr18 215562 216063 MACS_peak_16037 56.11
248 chr18 311530 312105 MACS_peak_16038 222.49
249 chr18 356656 357315 MACS_peak_16039 92.06
250 chr18 371110 372092 MACS_peak_16040 123.86
251 chr18 395116 396464 MACS_peak_16041 1545.39
252 chr18 399014 400382 MACS_peak_16042 1835.19
253 chr18 499134 500200 MACS_peak_16043 748.32
254 chr18 503518 504552 MACS_peak_16044 818.30
255 chr18 531672 532274 MACS_peak_16045 159.30
256 chr18 568326 569282 MACS_peak_16046 601.11
257 ======= ======= ======= =============== =======
106 258
107 * BAM file which contains the mapped sequencing reads can be associated with each peakset 259 * BAM file which contains the mapped sequencing reads can be associated with each peakset
108 * Control BAM file represents a control dataset and are optional, but have to specified for all when used. 260 * Control BAM file represents a control dataset and are optional, but have to specified for all when used.
109 * Peak file: Result of your Peak calling experiment 261
110 262
111 Output 263 **Outputs**
112 ******
113 264
114 As output format you can choose BED, GFF, WIG. 265 As output format you can choose BED, GFF, WIG.
115 266
116 References 267 Example:
117 ********** 268
118 269 ======== ====== =======+
119 DiffBind_ Authors: Rory Stark, Gordon Brown (2011) 270 seqnames ranges strand Conc Conc_Resistant
120 271
121 .. _DiffBind: http://www.bioconductor.org/packages/release/bioc/html/DiffBind.html 272 2452 chr18 [64490686, 64491186] * | 6.36 1.39
122 273 1291 chr18 [34597713, 34598213] * | 5.33 0.22
274 976 chr18 [26860997, 26861497] * | 7.3 3.13
275 2338 chr18 [60892900, 60893400] * | 7.13 1.84
276 2077 chr18 [55569087, 55569587] * | 5.52 1.89
277
278 Conc_Responsive Fold p-value FDR
279 <numeric> <numeric> <numeric> <numeric>
280 2452 7 -5.61 3.57e-10 1.02e-06
281 1291 5.97 -5.75 1.1e-09 1.57e-06
282 976 7.92 -4.79 1.1e-08 1.05e-05
283 2338 7.77 -5.93 1.68e-08 1.17e-05
284 2077 6.13 -4.23 2.36e-08 1.17e-05
285
286 The value columns show the
287 Conc mean read concentration over all the samples (the default calculation uses log2 normalized ChIP read counts with control read counts subtracted)
288 Conc_Resistant mean concentration over the first (Resistant) group
289 Conc_Responsive mean concentration over second (Responsive) group
290 Fold column shows the difference in mean concentrations between the two groups (Conc_Resistant - Conc_Responsive), with a positive value indicating increased binding affinity in the Resistant group and a negative value indicating increased binding affinity in the Responsive group.
291 p-value confidence measure for identifying these sites as differentially bound
292 FDR a multiple testing corrected FDR p-value
293
294
295 **Binding Affinity Matrix**
296
297 The final result of counting is a binding affinity matrix containing a (normalized) read count for each sample at every potential binding site. With this matrix, the samples can be re-clustered using affinity, rather than occupancy, data. The binding affinity matrix can be used for QC plotting as well as for subsequent
298 differential analysis.
299
300 Example:
301
302 ====== ====== ====== ========== ========== ========= ====== ========= ====
303 ID Tissue Factor Condition Treatment Replicate Caller Intervals FRiP
304 ====== ====== ====== ========== ========== ========= ====== ========= ====
305 BT4741 BT474 ER Resistant Full-Media 1 counts 2845 0.16
306 BT4742 BT474 ER Resistant Full-Media 2 counts 2845 0.15
307 MCF71 MCF7 ER Responsive Full-Media 1 counts 2845 0.27
308 MCF72 MCF7 ER Responsive Full-Media 2 counts 2845 0.17
309 MCF73 MCF7 ER Responsive Full-Media 3 counts 2845 0.23
310 T47D1 T47D ER Responsive Full-Media 1 counts 2845 0.10
311 T47D2 T47D ER Responsive Full-Media 2 counts 2845 0.06
312 MCF7r1 MCF7 ER Resistant Full-Media 1 counts 2845 0.20
313 MCF7r2 MCF7 ER Resistant Full-Media 2 counts 2845 0.13
314 ZR751 ZR75 ER Responsive Full-Media 1 counts 2845 0.32
315 ZR752 ZR75 ER Responsive Full-Media 2 counts 2845 0.22
316 ====== ====== ====== ========== ========== ========= ====== ========= ====
317
318
319
320 **More Information**
321
322 Generally, processing data with DiffBind involves five phases:
323
324 #. Reading in peaksets
325 #. Occupancy analysis
326 #. Counting reads
327 #. Differential binding affinity analysis
328 #. Plotting and reporting
329
330
331 * **Reading in peaksets**:
332
333 The first step is to read in a set of peaksets and associated
334 metadata. Peaksets are derived either from ChIP-Seq peak callers, such as MACS
335 ([1]), or using some other criterion (e.g. genomic windows, or all the promoter regions
336 in a genome). The easiest way to read in peaksets is using a comma-separated value
337 (csv) sample sheet with one line for each peakset. (Spreadsheets in Excel® format, with
338 a .xls or .xlsx suffix, are also accepted.) A single experiment can have more than
339 one associated peakset; e.g. if multiple peak callers are used for comparison purposes
340 each sample would have more than one line in the sample sheet. Once the peaksets
341 are read in, a merging function finds all overlapping peaks and derives a single set of
342 unique genomic intervals covering all the supplied peaks (a consensus peakset for the
343 experiment).
344
345 * **Occupancy analysis**:
346
347 Peaksets, especially those generated by peak callers, provide
348 an insight into the potential occupancy of the protein being ChIPed for at specific
349 genomic loci. After the peaksets have been loaded, it can be useful to perform some
350 exploratory plotting to determine how these occupancy maps agree with each other,
351 e.g. between experimental replicates (re-doing the ChIP under the same conditions),
352 between different peak callers on the same experiment, and within groups of samples
353 representing a common experimental condition. DiffBind provides functions to enable
354 overlaps to be examined, as well as functions to determine how well similar samples
355 cluster together. Beyond quality control, the product of an occupancy analysis may be
356 a consensus peakset, representing an overall set of candidate binding sites to be used
357 in further analysis.
358
359 * **Counting reads**:
360
361 Once a consensus peakset has been derived, DiffBind can use the
362 supplied sequence read files to count how many reads overlap each interval for each
363 unique sample. The peaks in the consensus peakset may be re-centered and trimmed
364 based on calculating their summits (point of greatest read overlap) in order to provide
365 more standardized peak intervals. The final result of counting is a binding affinity matrix
366 containing a (normalized) read count for each sample at every potential binding site.
367 With this matrix, the samples can be re-clustered using affinity, rather than occupancy,
368 data. The binding affinity matrix is used for QC plotting as well as for subsequent
369 differential analysis.
370
371 * **Differential binding affinity analysis**:
372
373 The core functionality of DiffBind is the
374 differential binding affinity analysis, which enables binding sites to be identified that
375 are statistically significantly differentially bound between sample groups. To accomplish
376 this, first a contrast (or contrasts) is established, dividing the samples into groups to
377 be compared. Next the core analysis routines are executed, by default using DESeq2 .
378 This will assign a p-value and FDR to each candidate binding site indicating confidence
379 that they are differentially bound.
380
381 * **Plotting and reporting**:
382
383 Once one or more contrasts have been run, DiffBind provides
384 a number of functions for reporting and plotting the results. MA plots give an
385 overview of the results of the analysis, while correlation heatmaps and PCA plots show
386 how the groups cluster based on differentially bound sites. Boxplots show the distribution
387 of reads within differentially bound sites corresponding to whether they gain or
388 lose affinity between the two sample groups. A reporting mechanism enables differentially
389 bound sites to be extracted for further processing, such as annotation, motif, and
390 pathway analyses.
391
392 **References**
393
394 DiffBind Authors: Rory Stark, Gordon Brown (2011)
123 Wrapper authors: Bjoern Gruening, Pavankumar Videm 395 Wrapper authors: Bjoern Gruening, Pavankumar Videm
124 396
125 ]]> 397 ]]>
126 </help> 398 </help>
127 <citations> 399 <citations>