Mercurial > repos > bgruening > diffbind
comparison diffbind.xml @ 10:d7725c5596ab draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit f970dcbe9d0e4c3714b1db74c404ea34223cf8ed
author | iuc |
---|---|
date | Tue, 20 Mar 2018 04:51:25 -0400 |
parents | 6171163112de |
children | 4c7ab9995f9e |
comparison
equal
deleted
inserted
replaced
9:6171163112de | 10:d7725c5596ab |
---|---|
1 <tool id="diffbind" name="DiffBind" version="2.6.5.0"> | 1 <tool id="diffbind" name="DiffBind" version="2.6.6.0"> |
2 <description> differential binding analysis of ChIP-Seq peak data</description> | 2 <description> differential binding analysis of ChIP-Seq peak data</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="2.6.5">bioconductor-diffbind</requirement> | 4 <requirement type="package" version="2.6.6">bioconductor-diffbind</requirement> |
5 <requirement type="package" version="1.20.0">r-getopt</requirement> | 5 <requirement type="package" version="1.20.0">r-getopt</requirement> |
6 <!--added rmysql requirement to remove: "Warning: namespace ‘RMySQL’ is not available"--> | |
7 <requirement type="package" version="0.10.11">r-rmysql</requirement> | |
8 </requirements> | 6 </requirements> |
9 <stdio> | 7 <stdio> |
10 <regex match="Execution halted" | 8 <regex match="Execution halted" |
11 source="both" | 9 source="both" |
12 level="fatal" | 10 level="fatal" |
19 source="both" | 17 source="both" |
20 level="fatal" | 18 level="fatal" |
21 description="An undefined error occured, please check your intput carefully and contact your administrator." /> | 19 description="An undefined error occured, please check your intput carefully and contact your administrator." /> |
22 </stdio> | 20 </stdio> |
23 <version_command><![CDATA[ | 21 <version_command><![CDATA[ |
24 echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ")," getopt version" $(R --vanilla --slave -e "library(getopt); cat(sessionInfo()\$otherPkgs\$getopt\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rmysql version" $(R --vanilla --slave -e "library(rmysql); cat(sessionInfo()\$otherPkgs\$rmysql\$Version)" 2> /dev/null | grep -v -i "WARNING: ") | 22 echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ") |
25 ]]></version_command> | 23 ]]></version_command> |
26 <command><![CDATA[ | 24 <command><![CDATA[ |
27 ## seems that diffbind also needs file extensions to work properly | 25 ## seems that diffbind also needs file extensions to work properly |
28 #set $counter = 1 | 26 #set $counter = 1 |
29 #for $sample in $samples: | 27 #for $sample in $samples: |
37 #end for | 35 #end for |
38 | 36 |
39 Rscript '$__tool_directory__/diffbind.R' | 37 Rscript '$__tool_directory__/diffbind.R' |
40 -i $infile | 38 -i $infile |
41 -o '$outfile' | 39 -o '$outfile' |
40 -t $th | |
41 -f $out.format | |
42 -p '$plots' | 42 -p '$plots' |
43 -f $format | 43 |
44 -t $th | 44 #if $out.binding_matrix: |
45 | |
46 #if $binding_affinity_matrix: | |
47 -b | 45 -b |
46 #end if | |
47 | |
48 #if $out.rdata: | |
49 -r | |
48 #end if | 50 #end if |
49 ]]> | 51 ]]> |
50 </command> | 52 </command> |
51 <configfiles> | 53 <configfiles> |
52 <configfile name="infile"><![CDATA[ | 54 <configfile name="infile"><![CDATA[ |
64 #end if | 66 #end if |
65 #set $counter = $counter + 1 | 67 #set $counter = $counter + 1 |
66 #end for]]></configfile> | 68 #end for]]></configfile> |
67 </configfiles> | 69 </configfiles> |
68 <inputs> | 70 <inputs> |
69 <repeat name="samples" title="Samples" min="2"> | 71 <repeat name="samples" title="Samples" min="4"> |
70 <param name="sample_id" type="text" value="Sample ID" label="Specify a sample id" help="e.g. BT474.1-" /> | 72 <param name="sample_id" type="text" value="Sample ID" label="Specify a sample id" help="e.g. BT474.1-" /> |
71 <param name="tissue" type="text" value="Tissue" label="Specify the tissue" help="e.g. BT474" /> | 73 <param name="tissue" type="text" value="Tissue" label="Specify the tissue" help="e.g. BT474" /> |
72 <param name="factor" type="text" value="Factor Name" label="Specify a factor name" help="e.g. ER" /> | 74 <param name="factor" type="text" value="Factor Name" label="Specify a factor name" help="e.g. ER" /> |
73 <param name="condition" type="text" value="Condition" label="Specify the condition" help="e.g. Resistent" /> | 75 <param name="condition" type="text" value="Condition" label="Specify the condition" help="e.g. Resistent" /> |
74 <param name="replicate" type="integer" value="1" label="Specify the replicate number" help="e.g. 1" /> | 76 <param name="replicate" type="integer" value="1" label="Specify the replicate number" help="e.g. 1" /> |
77 <param name="peaks" type="data" format="bed" label="Peak file" help="Result of your Peak calling experiment."/> | 79 <param name="peaks" type="data" format="bed" label="Peak file" help="Result of your Peak calling experiment."/> |
78 </repeat> | 80 </repeat> |
79 <param name="th" type="float" value="1" min="0" max="1" | 81 <param name="th" type="float" value="1" min="0" max="1" |
80 label="FDR Threshold" | 82 label="FDR Threshold" |
81 help="Significance threshold; all sites with FDR less than or equal to this value will be included in the report. A value of 1 will include all binding sites in the report. Default: 1"/> | 83 help="Significance threshold; all sites with FDR less than or equal to this value will be included in the report. A value of 1 will include all binding sites in the report. Default: 1"/> |
82 <param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true" | 84 |
83 label="Visualising the analysis results" | 85 <!-- Output Options --> |
84 help="output an additional PDF file" /> | 86 <section name="out" expanded="false" title="Output Options"> |
85 <param name="format" type="select" label="Output Format"> | 87 <param name="format" type="select" label="Output Format"> |
86 <option value="bed">BED</option> | 88 <option value="bed">BED</option> |
87 <option value="gff">GFF</option> | 89 <option value="gff">GFF</option> |
88 <option value="wig">WIG</option> | 90 <option value="wig">WIG</option> |
89 </param> | 91 </param> |
90 <param name="binding_affinity_matrix" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output binding affinity matrix?" help="Output a table of the binding scores" /> | 92 <param name="pdf" type="boolean" truevalue="True" falsevalue="" checked="False" label="Visualising the analysis results" help="output an additional PDF file" /> |
93 <param name="binding_matrix" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output binding affinity matrix?" help="Output a table of the binding scores" /> | |
94 <param name="rdata" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output RData file?" help="Output all the data used by R to construct the plots and tables, can be loaded into R. Default: No"> | |
95 </param> | |
96 </section> | |
91 </inputs> | 97 </inputs> |
98 | |
92 <outputs> | 99 <outputs> |
93 <data name="outfile" format="bed" label="Differential binding sites on ${on_string}"> | 100 <data name="outfile" format="bed" label="${tool.name} on ${on_string}: Differentially bound sites"> |
94 <change_format> | 101 <change_format> |
95 <when input="format" value="wig" format="wig" /> | 102 <when input="format" value="wig" format="wig" /> |
96 <when input="format" value="gff" format="gff" /> | 103 <when input="format" value="gff" format="gff" /> |
97 </change_format> | 104 </change_format> |
98 </data> | 105 </data> |
99 <data name="plots" format="pdf" label="Differential binding sites on ${on_string}"> | 106 <data name="plots" format="pdf" label="${tool.name} on ${on_string}: Plots"> |
100 <filter>pdf == True</filter> | 107 <filter>out['pdf']</filter> |
101 </data> | 108 </data> |
102 <data name="binding_matrix" format="tabular" from_work_dir="bmatrix.tab" label="Differential binding sites on ${on_string}"> | 109 <data name="binding_matrix" format="tabular" from_work_dir="bmatrix.tab" label="${tool.name} on ${on_string}: Binding matrix"> |
103 <filter>binding_affinity_matrix == True</filter> | 110 <filter>out['binding_matrix']</filter> |
111 </data> | |
112 <data name="rdata" format="rdata" from_work_dir="DiffBind_analysis.RData" label="${tool.name} on ${on_string}: RData file"> | |
113 <filter>out['rdata']</filter> | |
104 </data> | 114 </data> |
105 </outputs> | 115 </outputs> |
116 | |
106 <tests> | 117 <tests> |
107 <test> | 118 <test expect_num_outputs="4"> |
108 <repeat name="samples"> | 119 <repeat name="samples"> |
109 <param name="sample_id" value="BT4741" /> | 120 <param name="sample_id" value="BT4741" /> |
110 <param name="tissue" value="BT474" /> | 121 <param name="tissue" value="BT474" /> |
111 <param name="factor" value="ER" /> | 122 <param name="factor" value="ER" /> |
112 <param name="condition" value="Resistant" /> | 123 <param name="condition" value="Resistant" /> |
140 <param name="replicate" value="2" /> | 151 <param name="replicate" value="2" /> |
141 <param name="bamreads" ftype="bam" value="MCF7_ER_2.bam" /> | 152 <param name="bamreads" ftype="bam" value="MCF7_ER_2.bam" /> |
142 <param name="peaks" ftype="bed" value="MCF7_ER_2.bed.gz" /> | 153 <param name="peaks" ftype="bed" value="MCF7_ER_2.bed.gz" /> |
143 </repeat> | 154 </repeat> |
144 <param name="pdf" value="True" /> | 155 <param name="pdf" value="True" /> |
145 <param name="binding_affinity_matrix" value="True" /> | 156 <param name="binding_matrix" value="True" /> |
157 <param name="rdata" value="True" /> | |
146 <output name="outfile" value="out_diffbind.bed" /> | 158 <output name="outfile" value="out_diffbind.bed" /> |
159 <output name="plots" value="out_plots.pdf" compare="sim_size" /> | |
147 <output name="binding_matrix" value="out_binding.matrix" /> | 160 <output name="binding_matrix" value="out_binding.matrix" /> |
161 <output name="rdata" value="DiffBind_analysis.RData" compare="sim_size"/> | |
148 </test> | 162 </test> |
149 </tests> | 163 </tests> |
150 <help><![CDATA[ | 164 <help><![CDATA[ |
151 | 165 |
152 .. class:: infomark | 166 .. class:: infomark |
164 between two sample groups. It includes functions to support the processing of peak sets, | 178 between two sample groups. It includes functions to support the processing of peak sets, |
165 including overlapping and merging peak sets, counting sequencing reads overlapping intervals | 179 including overlapping and merging peak sets, counting sequencing reads overlapping intervals |
166 in peak sets, and identifying statistically significantly differentially bound sites based on | 180 in peak sets, and identifying statistically significantly differentially bound sites based on |
167 evidence of binding affinity (measured by differences in read densities). To this end it uses | 181 evidence of binding affinity (measured by differences in read densities). To this end it uses |
168 statistical routines developed in an RNA-Seq context (primarily the Bioconductor packages | 182 statistical routines developed in an RNA-Seq context (primarily the Bioconductor packages |
169 edgeR and DESeq2 ). Additionally, the package builds on Rgraphics routines to provide a | 183 edgeR and DESeq2). Additionally, the package builds on Rgraphics routines to provide a |
170 set of standardized plots to aid in binding analysis. | 184 set of standardized plots to aid in binding analysis. |
171 | 185 |
172 The `DiffBind User Guide`_ includes a brief overview of the processing flow, followed by four sections of | 186 The `DiffBind User Guide`_ includes a brief overview of the processing flow, followed by four sections of |
173 examples: the first focusing on the core task of obtaining differentially bound sites based on | 187 examples: the first focusing on the core task of obtaining differentially bound sites based on |
174 affinity data, the second working through the main plotting routines, the third discussing the | 188 affinity data, the second working through the main plotting routines, the third discussing the |
179 Note DiffBind requires a minimum of four samples (two groups with two replicates each). | 193 Note DiffBind requires a minimum of four samples (two groups with two replicates each). |
180 | 194 |
181 .. _DiffBind: https://bioconductor.org/packages/release/bioc/html/DiffBind.html | 195 .. _DiffBind: https://bioconductor.org/packages/release/bioc/html/DiffBind.html |
182 .. _`Bioconductor package`: https://bioconductor.org/packages/release/bioc/html/DiffBind.html | 196 .. _`Bioconductor package`: https://bioconductor.org/packages/release/bioc/html/DiffBind.html |
183 .. _`DiffBind User Guide`: https://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf | 197 .. _`DiffBind User Guide`: https://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf |
198 | |
199 ----- | |
184 | 200 |
185 **Inputs** | 201 **Inputs** |
186 | 202 |
187 DiffBind works primarily with peaksets, which are sets of genomic intervals representing | 203 DiffBind works primarily with peaksets, which are sets of genomic intervals representing |
188 candidate protein binding sites. Each interval consists of a chromosome, a start and end | 204 candidate protein binding sites. Each interval consists of a chromosome, a start and end |
192 be associated with each peakset (one for the ChIP data, and optionally another representing | 208 be associated with each peakset (one for the ChIP data, and optionally another representing |
193 a control sample) | 209 a control sample) |
194 | 210 |
195 **Sample Information** | 211 **Sample Information** |
196 | 212 |
197 You have to specify your sample information in the tool form above. | 213 You have to specify your sample information in the tool form above, where Condition contains the groups you want to compare. |
198 | 214 |
199 Example: | 215 Example: |
200 | 216 |
201 ============= ========== ========== ============= ============= | 217 ============= ========== ========== ============= ============= |
202 **SampleID** **Tissue** **Factor** **Condition** **Replicate** | 218 **SampleID** **Tissue** **Factor** **Condition** **Replicate** |
212 MCF7r2 MCF7 ER Resistant 2 | 228 MCF7r2 MCF7 ER Resistant 2 |
213 ZR751 ZR75 ER Responsive 1 | 229 ZR751 ZR75 ER Responsive 1 |
214 ZR752 ZR75 ER Responsive 2 | 230 ZR752 ZR75 ER Responsive 2 |
215 ============= ========== ========== ============= ============= | 231 ============= ========== ========== ============= ============= |
216 | 232 |
217 Or provide a sample sheet tabular file such as below. | |
218 | |
219 Example: | |
220 | |
221 ======== ====== ====== ========== ========== ========= ==================== ========= ===================== ================= ========== | |
222 SampleID Tissue Factor Condition Treatment Replicate bamReads ControlID bamControl Peaks PeakCaller | |
223 ======== ====== ====== ========== ========== ========= ==================== ========= ===================== ================= ========== | |
224 BT4741 BT474 ER Resistant Full-Media 1 Chr18_BT474_ER_1.bam BT474c Chr18_BT474_input.bam BT474_ER_1.bed.gz bed | |
225 BT4742 BT474 ER Resistant Full-Media 2 Chr18_BT474_ER_2.bam BT474c Chr18_BT474_input.bam BT474_ER_2.bed.gz bed | |
226 MCF71 MCF7 ER Responsive Full-Media 1 Chr18_MCF7_ER_1.bam MCF7c Chr18_MCF7_input.bam MCF7_ER_1.bed.gz bed | |
227 MCF72 MCF7 ER Responsive Full-Media 2 Chr18_MCF7_ER_2.bam MCF7c Chr18_MCF7_input.bam MCF7_ER_2.bed.gz bed | |
228 MCF73 MCF7 ER Responsive Full-Media 3 Chr18_MCF7_ER_3.bam MCF7c Chr18_MCF7_input.bam MCF7_ER_3.bed.gz bed | |
229 T47D1 T47D ER Responsive Full-Media 1 Chr18_T47D_ER_1.bam T47Dc Chr18_T47D_input.bam T47D_ER_1.bed.gz bed | |
230 T47D2 T47D ER Responsive Full-Media 2 Chr18_T47D_ER_2.bam T47Dc Chr18_T47D_input.bam T47D_ER_2.bed.gz bed | |
231 MCF7r1 MCF7 ER Resistant Full-Media 1 Chr18_TAMR_ER_1.bam TAMRc Chr18_TAMR_input.bam TAMR_ER_1.bed.gz bed | |
232 MCF7r2 MCF7 ER Resistant Full-Media 2 Chr18_TAMR_ER_2.bam TAMRc Chr18_TAMR_input.bam TAMR_ER_2.bed.gz bed | |
233 ZR751 ZR75 ER Responsive Full-Media 1 Chr18_ZR75_ER_1.bam ZR75c Chr18_ZR75_input.bam ZR75_ER_1.bed.gz bed | |
234 ZR752 ZR75 ER Responsive Full-Media 2 Chr18_ZR75_ER_2.bam ZR75c Chr18_ZR75_input.bam ZR75_ER_2.bed.gz bed | |
235 ======== ====== ====== ========== ========== ========= ==================== ========= ===================== ================= ========== | |
236 | |
237 | 233 |
238 **Peak files** | 234 **Peak files** |
239 | 235 |
240 Result of your Peak calling experiment in bed format, one file for each sample is required. | 236 Result of your Peak calling experiment in bed format, one file for each sample is required. |
241 | 237 |
257 ======= ======= ======= =============== ======= | 253 ======= ======= ======= =============== ======= |
258 | 254 |
259 * BAM file which contains the mapped sequencing reads can be associated with each peakset | 255 * BAM file which contains the mapped sequencing reads can be associated with each peakset |
260 * Control BAM file represents a control dataset and are optional, but have to specified for all when used. | 256 * Control BAM file represents a control dataset and are optional, but have to specified for all when used. |
261 | 257 |
258 ----- | |
262 | 259 |
263 **Outputs** | 260 **Outputs** |
264 | 261 |
262 This tool outputs | |
263 | |
264 * differentially bound sites in BED, WIG or GFF format | |
265 | |
266 Optionally, under **Output Options** you can choose to output | |
267 | |
268 * a correlation heatmap plot | |
269 * a binding affinity matrix | |
270 * an RData file | |
271 | |
272 **Differentially Bound Sites** | |
273 | |
265 As output format you can choose BED, GFF, WIG. | 274 As output format you can choose BED, GFF, WIG. |
266 | 275 |
267 Example: | 276 Example - BED format: |
268 | 277 |
269 ======== ====== =======+ | 278 ===== ====== ====== ===== ==== ==== ==== ==== ===== ======== ======== |
270 seqnames ranges strand Conc Conc_Resistant | 279 1 2 3 4 5 6 7 8 9 10 **11** |
271 | 280 ===== ====== ====== ===== ==== ==== ==== ==== ===== ======== ======== |
272 2452 chr18 [64490686, 64491186] * | 6.36 1.39 | 281 chr18 394600 396513 1914 * 7.15 7.89 5.55 2.35 7.06e-24 9.84e-21 |
273 1291 chr18 [34597713, 34598213] * | 5.33 0.22 | 282 chr18 111567 112005 439 * 5.71 3.63 6.53 -2.89 1.27e-08 8.88e-06 |
274 976 chr18 [26860997, 26861497] * | 7.3 3.13 | 283 chr18 346464 347342 879 * 5 3.24 5.77 -2.52 6.51e-06 0.00303 |
275 2338 chr18 [60892900, 60893400] * | 7.13 1.84 | 284 chr18 399014 400382 1369 * 7.62 8.05 7 1.04 1.04e-05 0.00364 |
276 2077 chr18 [55569087, 55569587] * | 5.52 1.89 | 285 chr18 371110 372102 993 * 4.63 5.36 3.07 2.3 8.1e-05 0.0226 |
277 | 286 ===== ====== ====== ===== ==== ==== ==== ==== ===== ======== ======== |
278 Conc_Responsive Fold p-value FDR | 287 |
279 <numeric> <numeric> <numeric> <numeric> | 288 Columns contain the following data: |
280 2452 7 -5.61 3.57e-10 1.02e-06 | 289 |
281 1291 5.97 -5.75 1.1e-09 1.57e-06 | 290 * **1st**: Chromosome name |
282 976 7.92 -4.79 1.1e-08 1.05e-05 | 291 * **2nd**: Start position of site |
283 2338 7.77 -5.93 1.68e-08 1.17e-05 | 292 * **3rd**: End position of site |
284 2077 6.13 -4.23 2.36e-08 1.17e-05 | 293 * **4th**: Length of site |
285 | 294 * **5th**: Strand |
286 The value columns show the | 295 * **6th**: Mean read concentration over all the samples (the default calculation uses log2 normalized ChIP read counts with control read counts subtracted) |
287 Conc mean read concentration over all the samples (the default calculation uses log2 normalized ChIP read counts with control read counts subtracted) | 296 * **7th**: Mean concentration over the first (e.g. Resistant) group |
288 Conc_Resistant mean concentration over the first (Resistant) group | 297 * **8th**: Mean concentration over second (e.g. Responsive) group |
289 Conc_Responsive mean concentration over second (Responsive) group | 298 * **9th**: Fold shows the difference in mean concentrations between the two groups (e.g. Resistant - Responsive), with a positive value indicating increased binding affinity in the first group and a negative value indicating increased binding affinity in the second group. |
290 Fold column shows the difference in mean concentrations between the two groups (Conc_Resistant - Conc_Responsive), with a positive value indicating increased binding affinity in the Resistant group and a negative value indicating increased binding affinity in the Responsive group. | 299 * **10th**: P-value confidence measure for identifying these sites as differentially bound |
291 p-value confidence measure for identifying these sites as differentially bound | 300 * **11th**: a multiple testing corrected FDR p-value |
292 FDR a multiple testing corrected FDR p-value | |
293 | 301 |
294 | 302 |
295 **Binding Affinity Matrix** | 303 **Binding Affinity Matrix** |
296 | 304 |
297 The final result of counting is a binding affinity matrix containing a (normalized) read count for each sample at every potential binding site. With this matrix, the samples can be re-clustered using affinity, rather than occupancy, data. The binding affinity matrix can be used for QC plotting as well as for subsequent | 305 The final result of counting is a binding affinity matrix containing a (normalized) read count for each sample at every potential binding site. With this matrix, the samples can be re-clustered using affinity, rather than occupancy, data. The binding affinity matrix can be used for QC plotting as well as for subsequent |
313 MCF7r2 MCF7 ER Resistant Full-Media 2 counts 2845 0.13 | 321 MCF7r2 MCF7 ER Resistant Full-Media 2 counts 2845 0.13 |
314 ZR751 ZR75 ER Responsive Full-Media 1 counts 2845 0.32 | 322 ZR751 ZR75 ER Responsive Full-Media 1 counts 2845 0.32 |
315 ZR752 ZR75 ER Responsive Full-Media 2 counts 2845 0.22 | 323 ZR752 ZR75 ER Responsive Full-Media 2 counts 2845 0.22 |
316 ====== ====== ====== ========== ========== ========= ====== ========= ==== | 324 ====== ====== ====== ========== ========== ========= ====== ========= ==== |
317 | 325 |
318 | 326 ----- |
319 | 327 |
320 **More Information** | 328 **More Information** |
321 | 329 |
322 Generally, processing data with DiffBind involves five phases: | 330 Generally, processing data with DiffBind involves five phases: |
323 | 331 |
326 #. Counting reads | 334 #. Counting reads |
327 #. Differential binding affinity analysis | 335 #. Differential binding affinity analysis |
328 #. Plotting and reporting | 336 #. Plotting and reporting |
329 | 337 |
330 | 338 |
331 * **Reading in peaksets**: | 339 **Reading in peaksets**: |
332 | 340 |
333 The first step is to read in a set of peaksets and associated | 341 The first step is to read in a set of peaksets and associated |
334 metadata. Peaksets are derived either from ChIP-Seq peak callers, such as MACS | 342 metadata. Peaksets are derived either from ChIP-Seq peak callers, such as **MACS2**, or using some other criterion (e.g. genomic windows, or all the promoter regions |
335 ([1]), or using some other criterion (e.g. genomic windows, or all the promoter regions | 343 in a genome). A single experiment can have more than |
336 in a genome). The easiest way to read in peaksets is using a comma-separated value | |
337 (csv) sample sheet with one line for each peakset. (Spreadsheets in Excel® format, with | |
338 a .xls or .xlsx suffix, are also accepted.) A single experiment can have more than | |
339 one associated peakset; e.g. if multiple peak callers are used for comparison purposes | 344 one associated peakset; e.g. if multiple peak callers are used for comparison purposes |
340 each sample would have more than one line in the sample sheet. Once the peaksets | 345 each sample would have more than one line in the sample sheet. Once the peaksets |
341 are read in, a merging function finds all overlapping peaks and derives a single set of | 346 are read in, a merging function finds all overlapping peaks and derives a single set of |
342 unique genomic intervals covering all the supplied peaks (a consensus peakset for the | 347 unique genomic intervals covering all the supplied peaks (a consensus peakset for the |
343 experiment). | 348 experiment). |
344 | 349 |
345 * **Occupancy analysis**: | 350 **Occupancy analysis**: |
346 | 351 |
347 Peaksets, especially those generated by peak callers, provide | 352 Peaksets, especially those generated by peak callers, provide |
348 an insight into the potential occupancy of the protein being ChIPed for at specific | 353 an insight into the potential occupancy of the protein being ChIPed for at specific |
349 genomic loci. After the peaksets have been loaded, it can be useful to perform some | 354 genomic loci. After the peaksets have been loaded, it can be useful to perform some |
350 exploratory plotting to determine how these occupancy maps agree with each other, | 355 exploratory plotting to determine how these occupancy maps agree with each other, |
354 overlaps to be examined, as well as functions to determine how well similar samples | 359 overlaps to be examined, as well as functions to determine how well similar samples |
355 cluster together. Beyond quality control, the product of an occupancy analysis may be | 360 cluster together. Beyond quality control, the product of an occupancy analysis may be |
356 a consensus peakset, representing an overall set of candidate binding sites to be used | 361 a consensus peakset, representing an overall set of candidate binding sites to be used |
357 in further analysis. | 362 in further analysis. |
358 | 363 |
359 * **Counting reads**: | 364 **Counting reads**: |
360 | 365 |
361 Once a consensus peakset has been derived, DiffBind can use the | 366 Once a consensus peakset has been derived, DiffBind can use the |
362 supplied sequence read files to count how many reads overlap each interval for each | 367 supplied sequence read files to count how many reads overlap each interval for each |
363 unique sample. The peaks in the consensus peakset may be re-centered and trimmed | 368 unique sample. The peaks in the consensus peakset may be re-centered and trimmed |
364 based on calculating their summits (point of greatest read overlap) in order to provide | 369 based on calculating their summits (point of greatest read overlap) in order to provide |
366 containing a (normalized) read count for each sample at every potential binding site. | 371 containing a (normalized) read count for each sample at every potential binding site. |
367 With this matrix, the samples can be re-clustered using affinity, rather than occupancy, | 372 With this matrix, the samples can be re-clustered using affinity, rather than occupancy, |
368 data. The binding affinity matrix is used for QC plotting as well as for subsequent | 373 data. The binding affinity matrix is used for QC plotting as well as for subsequent |
369 differential analysis. | 374 differential analysis. |
370 | 375 |
371 * **Differential binding affinity analysis**: | 376 **Differential binding affinity analysis**: |
372 | 377 |
373 The core functionality of DiffBind is the | 378 The core functionality of DiffBind is the |
374 differential binding affinity analysis, which enables binding sites to be identified that | 379 differential binding affinity analysis, which enables binding sites to be identified that |
375 are statistically significantly differentially bound between sample groups. To accomplish | 380 are statistically significantly differentially bound between sample groups. To accomplish |
376 this, first a contrast (or contrasts) is established, dividing the samples into groups to | 381 this, first a contrast (or contrasts) is established, dividing the samples into groups to |
377 be compared. Next the core analysis routines are executed, by default using DESeq2 . | 382 be compared. Next the core analysis routines are executed, by default using DESeq2 . |
378 This will assign a p-value and FDR to each candidate binding site indicating confidence | 383 This will assign a p-value and FDR to each candidate binding site indicating confidence |
379 that they are differentially bound. | 384 that they are differentially bound. |
380 | 385 |
381 * **Plotting and reporting**: | 386 **Plotting and reporting**: |
382 | 387 |
383 Once one or more contrasts have been run, DiffBind provides | 388 Once one or more contrasts have been run, DiffBind provides |
384 a number of functions for reporting and plotting the results. MA plots give an | 389 a number of functions for reporting and plotting the results. MA plots give an |
385 overview of the results of the analysis, while correlation heatmaps and PCA plots show | 390 overview of the results of the analysis, while correlation heatmaps and PCA plots show |
386 how the groups cluster based on differentially bound sites. Boxplots show the distribution | 391 how the groups cluster based on differentially bound sites. Boxplots show the distribution |
387 of reads within differentially bound sites corresponding to whether they gain or | 392 of reads within differentially bound sites corresponding to whether they gain or |
388 lose affinity between the two sample groups. A reporting mechanism enables differentially | 393 lose affinity between the two sample groups. A reporting mechanism enables differentially |
389 bound sites to be extracted for further processing, such as annotation, motif, and | 394 bound sites to be extracted for further processing, such as annotation, motif, and |
390 pathway analyses. | 395 pathway analyses. *Note that currently only the correlation plot is implemented in this Galaxy tool.* |
396 | |
397 ----- | |
391 | 398 |
392 **References** | 399 **References** |
393 | 400 |
394 DiffBind Authors: Rory Stark, Gordon Brown (2011) | 401 DiffBind Authors: Rory Stark, Gordon Brown (2011) |
395 Wrapper authors: Bjoern Gruening, Pavankumar Videm | 402 Wrapper authors: Bjoern Gruening, Pavankumar Videm |