Mercurial > repos > bgruening > diffbind
comparison diffbind.xml @ 7:681dedc42aca draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/diffbind commit affbc59222cde9be21e91fa1f9194930a070b830
author | iuc |
---|---|
date | Sun, 28 Jan 2018 04:26:11 -0500 |
parents | 6031247f61d4 |
children |
comparison
equal
deleted
inserted
replaced
6:6031247f61d4 | 7:681dedc42aca |
---|---|
1 <tool id="diffbind" name="DiffBind" version="2.2.2"> | 1 <tool id="diffbind" name="DiffBind" version="2.6.5.0"> |
2 <description> differential binding analysis of ChIP-Seq peak data</description> | 2 <description> differential binding analysis of ChIP-Seq peak data</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="2.0.9">bioconductor-diffbind</requirement> | 4 <requirement type="package" version="2.6.5">bioconductor-diffbind</requirement> |
5 <requirement type="package" version="1.20.0">r-getopt</requirement> | 5 <requirement type="package" version="1.20.0">r-getopt</requirement> |
6 <!--added rmysql requirement to remove: "Warning: namespace ‘RMySQL’ is not available"--> | |
7 <requirement type="package" version="0.10.11">r-rmysql</requirement> | |
6 </requirements> | 8 </requirements> |
7 <stdio> | 9 <stdio> |
8 <regex match="Execution halted" | 10 <regex match="Execution halted" |
9 source="both" | 11 source="both" |
10 level="fatal" | 12 level="fatal" |
16 <regex match="Error in" | 18 <regex match="Error in" |
17 source="both" | 19 source="both" |
18 level="fatal" | 20 level="fatal" |
19 description="An undefined error occured, please check your intput carefully and contact your administrator." /> | 21 description="An undefined error occured, please check your intput carefully and contact your administrator." /> |
20 </stdio> | 22 </stdio> |
21 <command> | 23 <version_command><![CDATA[ |
22 <![CDATA[ | 24 echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ")," getopt version" $(R --vanilla --slave -e "library(getopt); cat(sessionInfo()\$otherPkgs\$getopt\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rmysql version" $(R --vanilla --slave -e "library(rmysql); cat(sessionInfo()\$otherPkgs\$rmysql\$Version)" 2> /dev/null | grep -v -i "WARNING: ") |
25 ]]></version_command> | |
26 <command><![CDATA[ | |
23 ## seems that diffbind also needs file extensions to work properly | 27 ## seems that diffbind also needs file extensions to work properly |
24 #set $counter = 1 | 28 #set $counter = 1 |
25 #for $sample in $samples: | 29 #for $sample in $samples: |
26 ln -s $sample.bamreads #echo str($counter) + "_bamreads.bam"# && | 30 ln -s $sample.bamreads #echo str($counter) + "_bamreads.bam"# && |
27 ln -s ${sample.bamreads.metadata.bam_index} #echo str($counter) + "_bamreads.bai"# && | 31 ln -s ${sample.bamreads.metadata.bam_index} #echo str($counter) + "_bamreads.bai"# && |
30 ln -s ${sample.bamcontrol.metadata.bam_index} #echo str($counter) + "_bamcontrol.bai"# && | 34 ln -s ${sample.bamcontrol.metadata.bam_index} #echo str($counter) + "_bamcontrol.bai"# && |
31 #end if | 35 #end if |
32 #set $counter = $counter + 1 | 36 #set $counter = $counter + 1 |
33 #end for | 37 #end for |
34 | 38 |
35 Rscript $__tool_directory__/diffbind.R | 39 Rscript '$__tool_directory__/diffbind.R' |
36 -i $infile | 40 -i $infile |
37 -o $outfile | 41 -o '$outfile' |
38 -p $plots | 42 -p '$plots' |
39 -f $format | 43 -f $format |
44 -t $th | |
45 | |
46 #if $binding_affinity_matrix: | |
47 -b | |
48 #end if | |
40 ]]> | 49 ]]> |
41 </command> | 50 </command> |
42 <configfiles> | 51 <configfiles> |
43 <configfile name="infile"> | 52 <configfile name="infile"><![CDATA[ |
44 #set $counter = 1 | 53 #set $counter = 1 |
45 #for $sample in $samples: | 54 #for $sample in $samples: |
46 #if str( $sample.bamcontrol ) != 'None' and $counter == 1: | 55 #if str( $sample.bamcontrol ) != 'None' and $counter == 1: |
47 SampleID,Tissue,Factor,Condition,Replicate,bamReads,bamControl,Peaks | 56 SampleID,Tissue,Factor,Condition,Replicate,bamReads,bamControl,Peaks |
48 #elif $counter == 1: | 57 #elif $counter == 1: |
52 $sample.sample_id,$sample.tissue,$sample.factor,$sample.condition,$sample.replicate,#echo str($counter) + '_bamreads.bam'#,#echo str($counter) + '_bamcontrol.bam'#,$sample.peaks | 61 $sample.sample_id,$sample.tissue,$sample.factor,$sample.condition,$sample.replicate,#echo str($counter) + '_bamreads.bam'#,#echo str($counter) + '_bamcontrol.bam'#,$sample.peaks |
53 #else: | 62 #else: |
54 $sample.sample_id,$sample.tissue,$sample.factor,$sample.condition,$sample.replicate,#echo str($counter) + '_bamreads.bam'#,$sample.peaks | 63 $sample.sample_id,$sample.tissue,$sample.factor,$sample.condition,$sample.replicate,#echo str($counter) + '_bamreads.bam'#,$sample.peaks |
55 #end if | 64 #end if |
56 #set $counter = $counter + 1 | 65 #set $counter = $counter + 1 |
57 #end for | 66 #end for]]></configfile> |
58 </configfile> | |
59 </configfiles> | 67 </configfiles> |
60 <inputs> | 68 <inputs> |
61 <repeat name="samples" title="Samples" min="2"> | 69 <repeat name="samples" title="Samples" min="2"> |
62 <param name="sample_id" type="text" value="Sample ID" label="Specify a sample id" help="e.g. BT474.1-" /> | 70 <param name="sample_id" type="text" value="Sample ID" label="Specify a sample id" help="e.g. BT474.1-" /> |
63 <param name="tissue" type="text" value="Tissue" label="Specify the tissue" help="e.g. BT474" /> | 71 <param name="tissue" type="text" value="Tissue" label="Specify the tissue" help="e.g. BT474" /> |
64 <param name="factor" type="text" value="Factor Name" label="Specify a factor name" help="e.g. ER" /> | 72 <param name="factor" type="text" value="Factor Name" label="Specify a factor name" help="e.g. ER" /> |
65 <param name="condition" type="text" value="Condition" label="Specify the condition" help="e.g. Resistent" /> | 73 <param name="condition" type="text" value="Condition" label="Specify the condition" help="e.g. Resistent" /> |
66 <param name="replicate" type="integer" value="1" label="Specify the replicate number" help="e.g. 1" /> | 74 <param name="replicate" type="integer" value="1" label="Specify the replicate number" help="e.g. 1" /> |
67 <param format="bam" name="bamreads" type="data" label="Read BAM file" help="Specify the Read BAM file, used for Peak calling."/> | 75 <param name="bamreads" type="data" format="bam" label="Read BAM file" help="Specify the Read BAM file, used for Peak calling."/> |
68 <param format="bam" name="bamcontrol" type="data" optional="True" label="Control BAM file" help="If specifying a control BAM file for this sample, then all samples are required to specify one."/> | 76 <param name="bamcontrol" type="data" format="bam" optional="True" label="Control BAM file" help="If specifying a control BAM file for this sample, then all samples are required to specify one."/> |
69 <param format="bed" name="peaks" type="data" label="Peak file" help="Result of your Peak calling experiment."/> | 77 <param name="peaks" type="data" format="bed" label="Peak file" help="Result of your Peak calling experiment."/> |
70 </repeat> | 78 </repeat> |
79 <param name="th" type="float" value="1" min="0" max="1" | |
80 label="FDR Threshold" | |
81 help="Significance threshold; all sites with FDR less than or equal to this value will be included in the report. A value of 1 will include all binding sites in the report. Default: 1"/> | |
71 <param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true" | 82 <param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true" |
72 label="Visualising the analysis results" | 83 label="Visualising the analysis results" |
73 help="output an additional PDF files" /> | 84 help="output an additional PDF file" /> |
74 <param name="format" type="select" label="Output Format"> | 85 <param name="format" type="select" label="Output Format"> |
75 <option value="bed">BED</option> | 86 <option value="bed">BED</option> |
76 <option value="gff">GFF</option> | 87 <option value="gff">GFF</option> |
77 <option value="wig">WIG</option> | 88 <option value="wig">WIG</option> |
78 </param> | 89 </param> |
90 <param name="binding_affinity_matrix" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output binding affinity matrix?" help="Output a table of the binding scores" /> | |
79 </inputs> | 91 </inputs> |
80 <outputs> | 92 <outputs> |
81 <data format="bed" name="outfile" label="Differential binding sites on ${on_string}"> | 93 <data name="outfile" format="bed" label="Differential binding sites on ${on_string}"> |
82 <change_format> | 94 <change_format> |
83 <when input="format" value="wig" format="wig" /> | 95 <when input="format" value="wig" format="wig" /> |
84 <when input="format" value="gff" format="gff" /> | 96 <when input="format" value="gff" format="gff" /> |
85 </change_format> | 97 </change_format> |
86 </data> | 98 </data> |
87 <data format="pdf" name="plots" label="Differential binding sites on ${on_string}"> | 99 <data name="plots" format="pdf" label="Differential binding sites on ${on_string}"> |
88 <filter>pdf == True</filter> | 100 <filter>pdf == True</filter> |
89 </data> | 101 </data> |
102 <data name="binding_matrix" format="tabular" from_work_dir="bmatrix.tab" label="Differential binding sites on ${on_string}"> | |
103 <filter>binding_affinity_matrix == True</filter> | |
104 </data> | |
90 </outputs> | 105 </outputs> |
91 <help> | 106 <tests> |
92 <![CDATA[ | 107 <test> |
93 | 108 <repeat name="samples"> |
94 What it does | 109 <param name="sample_id" value="BT4741" /> |
95 ************ | 110 <param name="tissue" value="BT474" /> |
96 | 111 <param name="factor" value="ER" /> |
97 Diffbind provides functions for processing ChIP-Seq data enriched for genomic loci where specific protein/DNA binding occurs, including peak sets identified by ChIP-Seq peak callers and aligned sequence read datasets. | 112 <param name="condition" value="Resistant" /> |
98 | 113 <param name="replicate" value="1" /> |
99 Input | 114 <param name="bamreads" ftype="bam" value="BT474_ER_1.bam" /> |
100 ***** | 115 <param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz" /> |
101 | 116 </repeat> |
102 * You have to specify your samples. Here is one example:: | 117 <repeat name="samples"> |
103 | 118 <param name="sample_id" value="BT4742" /> |
104 ID Tissue Factor Condition Treatment Replicate Caller Intervals | 119 <param name="tissue" value="BT474" /> |
105 BT4741 BT474 ER Resistant Full-Media 1 raw 1084 | 120 <param name="factor" value="ER" /> |
121 <param name="condition" value="Resistant" /> | |
122 <param name="replicate" value="2" /> | |
123 <param name="bamreads" ftype="bam" value="BT474_ER_2.bam" /> | |
124 <param name="peaks" ftype="bed" value="BT474_ER_2.bed.gz" /> | |
125 </repeat> | |
126 <repeat name="samples"> | |
127 <param name="sample_id" value="MCF71" /> | |
128 <param name="tissue" value="MCF7" /> | |
129 <param name="factor" value="ER" /> | |
130 <param name="condition" value="Responsive" /> | |
131 <param name="replicate" value="1" /> | |
132 <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam" /> | |
133 <param name="peaks" ftype="bed" value="MCF7_ER_1.bed.gz" /> | |
134 </repeat> | |
135 <repeat name="samples"> | |
136 <param name="sample_id" value="MCF72" /> | |
137 <param name="tissue" value="MCF7" /> | |
138 <param name="factor" value="ER" /> | |
139 <param name="condition" value="Responsive" /> | |
140 <param name="replicate" value="2" /> | |
141 <param name="bamreads" ftype="bam" value="MCF7_ER_2.bam" /> | |
142 <param name="peaks" ftype="bed" value="MCF7_ER_2.bed.gz" /> | |
143 </repeat> | |
144 <param name="pdf" value="True" /> | |
145 <param name="binding_affinity_matrix" value="True" /> | |
146 <output name="outfile" value="out_diffbind.bed" /> | |
147 <output name="binding_matrix" value="out_binding.matrix" /> | |
148 </test> | |
149 </tests> | |
150 <help><![CDATA[ | |
151 | |
152 .. class:: infomark | |
153 | |
154 **What it does** | |
155 | |
156 DiffBind_ is a `Bioconductor package`_ that provides functions for processing ChIP-Seq data enriched for genomic loci where specific | |
157 protein/DNA binding occurs, including peak sets identified by ChIP-Seq peak callers and | |
158 aligned sequence read datasets. It is designed to work with multiple peak sets simultaneously, | |
159 representing different ChIP experiments (antibodies, transcription factor and/or histone | |
160 marks, experimental conditions, replicates) as well as managing the results of multiple peak | |
161 callers. | |
162 | |
163 The primary emphasis of DiffBind is on identifying sites that are differentially bound | |
164 between two sample groups. It includes functions to support the processing of peak sets, | |
165 including overlapping and merging peak sets, counting sequencing reads overlapping intervals | |
166 in peak sets, and identifying statistically significantly differentially bound sites based on | |
167 evidence of binding affinity (measured by differences in read densities). To this end it uses | |
168 statistical routines developed in an RNA-Seq context (primarily the Bioconductor packages | |
169 edgeR and DESeq2 ). Additionally, the package builds on Rgraphics routines to provide a | |
170 set of standardized plots to aid in binding analysis. | |
171 | |
172 The `DiffBind User Guide`_ includes a brief overview of the processing flow, followed by four sections of | |
173 examples: the first focusing on the core task of obtaining differentially bound sites based on | |
174 affinity data, the second working through the main plotting routines, the third discussing the | |
175 use of a blocking factor, and the fourth revisiting occupancy data (peak calls) in more detail, | |
176 as well as comparing the results of an occupancy-based analysis with an affinity-based one. | |
177 Finally, certain technical aspects of the how these analyses are accomplished are detailed. | |
178 | |
179 Note DiffBind requires a minimum of four samples (two groups with two replicates each). | |
180 | |
181 .. _DiffBind: https://bioconductor.org/packages/release/bioc/html/DiffBind.html | |
182 .. _`Bioconductor package`: https://bioconductor.org/packages/release/bioc/html/DiffBind.html | |
183 .. _`DiffBind User Guide`: https://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf | |
184 | |
185 **Inputs** | |
186 | |
187 DiffBind works primarily with peaksets, which are sets of genomic intervals representing | |
188 candidate protein binding sites. Each interval consists of a chromosome, a start and end | |
189 position, and usually a score of some type indicating confidence in, or strength of, the peak. | |
190 Associated with each peakset are metadata relating to the experiment from which the peakset | |
191 was derived. Additionally, files containing mapped sequencing reads (generally .bam files) can | |
192 be associated with each peakset (one for the ChIP data, and optionally another representing | |
193 a control sample) | |
194 | |
195 **Sample Information** | |
196 | |
197 You have to specify your sample information in the tool form above. | |
198 | |
199 Example: | |
200 | |
201 ============= ========== ========== ============= ============= | |
202 **SampleID** **Tissue** **Factor** **Condition** **Replicate** | |
203 ------------- ---------- ---------- ------------- ------------- | |
204 BT4741 BT474 ER Resistant 1 | |
205 BT4742 BT474 ER Resistant 2 | |
206 MCF71 MCF7 ER Responsive 1 | |
207 MCF72 MCF7 ER Responsive 2 | |
208 MCF73 MCF7 ER Responsive 3 | |
209 T47D1 T47D ER Responsive 1 | |
210 T47D2 T47D ER Responsive 2 | |
211 MCF7r1 MCF7 ER Resistant 1 | |
212 MCF7r2 MCF7 ER Resistant 2 | |
213 ZR751 ZR75 ER Responsive 1 | |
214 ZR752 ZR75 ER Responsive 2 | |
215 ============= ========== ========== ============= ============= | |
216 | |
217 Or provide a sample sheet tabular file such as below. | |
218 | |
219 Example: | |
220 | |
221 ======== ====== ====== ========== ========== ========= ==================== ========= ===================== ================= ========== | |
222 SampleID Tissue Factor Condition Treatment Replicate bamReads ControlID bamControl Peaks PeakCaller | |
223 ======== ====== ====== ========== ========== ========= ==================== ========= ===================== ================= ========== | |
224 BT4741 BT474 ER Resistant Full-Media 1 Chr18_BT474_ER_1.bam BT474c Chr18_BT474_input.bam BT474_ER_1.bed.gz bed | |
225 BT4742 BT474 ER Resistant Full-Media 2 Chr18_BT474_ER_2.bam BT474c Chr18_BT474_input.bam BT474_ER_2.bed.gz bed | |
226 MCF71 MCF7 ER Responsive Full-Media 1 Chr18_MCF7_ER_1.bam MCF7c Chr18_MCF7_input.bam MCF7_ER_1.bed.gz bed | |
227 MCF72 MCF7 ER Responsive Full-Media 2 Chr18_MCF7_ER_2.bam MCF7c Chr18_MCF7_input.bam MCF7_ER_2.bed.gz bed | |
228 MCF73 MCF7 ER Responsive Full-Media 3 Chr18_MCF7_ER_3.bam MCF7c Chr18_MCF7_input.bam MCF7_ER_3.bed.gz bed | |
229 T47D1 T47D ER Responsive Full-Media 1 Chr18_T47D_ER_1.bam T47Dc Chr18_T47D_input.bam T47D_ER_1.bed.gz bed | |
230 T47D2 T47D ER Responsive Full-Media 2 Chr18_T47D_ER_2.bam T47Dc Chr18_T47D_input.bam T47D_ER_2.bed.gz bed | |
231 MCF7r1 MCF7 ER Resistant Full-Media 1 Chr18_TAMR_ER_1.bam TAMRc Chr18_TAMR_input.bam TAMR_ER_1.bed.gz bed | |
232 MCF7r2 MCF7 ER Resistant Full-Media 2 Chr18_TAMR_ER_2.bam TAMRc Chr18_TAMR_input.bam TAMR_ER_2.bed.gz bed | |
233 ZR751 ZR75 ER Responsive Full-Media 1 Chr18_ZR75_ER_1.bam ZR75c Chr18_ZR75_input.bam ZR75_ER_1.bed.gz bed | |
234 ZR752 ZR75 ER Responsive Full-Media 2 Chr18_ZR75_ER_2.bam ZR75c Chr18_ZR75_input.bam ZR75_ER_2.bed.gz bed | |
235 ======== ====== ====== ========== ========== ========= ==================== ========= ===================== ================= ========== | |
236 | |
237 | |
238 **Peak files** | |
239 | |
240 Result of your Peak calling experiment in bed format, one file for each sample is required. | |
241 | |
242 Example: | |
243 | |
244 ======= ======= ======= =============== ======= | |
245 1 2 3 4 **5** | |
246 ======= ======= ======= =============== ======= | |
247 chr18 215562 216063 MACS_peak_16037 56.11 | |
248 chr18 311530 312105 MACS_peak_16038 222.49 | |
249 chr18 356656 357315 MACS_peak_16039 92.06 | |
250 chr18 371110 372092 MACS_peak_16040 123.86 | |
251 chr18 395116 396464 MACS_peak_16041 1545.39 | |
252 chr18 399014 400382 MACS_peak_16042 1835.19 | |
253 chr18 499134 500200 MACS_peak_16043 748.32 | |
254 chr18 503518 504552 MACS_peak_16044 818.30 | |
255 chr18 531672 532274 MACS_peak_16045 159.30 | |
256 chr18 568326 569282 MACS_peak_16046 601.11 | |
257 ======= ======= ======= =============== ======= | |
106 | 258 |
107 * BAM file which contains the mapped sequencing reads can be associated with each peakset | 259 * BAM file which contains the mapped sequencing reads can be associated with each peakset |
108 * Control BAM file represents a control dataset and are optional, but have to specified for all when used. | 260 * Control BAM file represents a control dataset and are optional, but have to specified for all when used. |
109 * Peak file: Result of your Peak calling experiment | 261 |
110 | 262 |
111 Output | 263 **Outputs** |
112 ****** | |
113 | 264 |
114 As output format you can choose BED, GFF, WIG. | 265 As output format you can choose BED, GFF, WIG. |
115 | 266 |
116 References | 267 Example: |
117 ********** | 268 |
118 | 269 ======== ====== =======+ |
119 DiffBind_ Authors: Rory Stark, Gordon Brown (2011) | 270 seqnames ranges strand Conc Conc_Resistant |
120 | 271 |
121 .. _DiffBind: http://www.bioconductor.org/packages/release/bioc/html/DiffBind.html | 272 2452 chr18 [64490686, 64491186] * | 6.36 1.39 |
122 | 273 1291 chr18 [34597713, 34598213] * | 5.33 0.22 |
274 976 chr18 [26860997, 26861497] * | 7.3 3.13 | |
275 2338 chr18 [60892900, 60893400] * | 7.13 1.84 | |
276 2077 chr18 [55569087, 55569587] * | 5.52 1.89 | |
277 | |
278 Conc_Responsive Fold p-value FDR | |
279 <numeric> <numeric> <numeric> <numeric> | |
280 2452 7 -5.61 3.57e-10 1.02e-06 | |
281 1291 5.97 -5.75 1.1e-09 1.57e-06 | |
282 976 7.92 -4.79 1.1e-08 1.05e-05 | |
283 2338 7.77 -5.93 1.68e-08 1.17e-05 | |
284 2077 6.13 -4.23 2.36e-08 1.17e-05 | |
285 | |
286 The value columns show the | |
287 Conc mean read concentration over all the samples (the default calculation uses log2 normalized ChIP read counts with control read counts subtracted) | |
288 Conc_Resistant mean concentration over the first (Resistant) group | |
289 Conc_Responsive mean concentration over second (Responsive) group | |
290 Fold column shows the difference in mean concentrations between the two groups (Conc_Resistant - Conc_Responsive), with a positive value indicating increased binding affinity in the Resistant group and a negative value indicating increased binding affinity in the Responsive group. | |
291 p-value confidence measure for identifying these sites as differentially bound | |
292 FDR a multiple testing corrected FDR p-value | |
293 | |
294 | |
295 **Binding Affinity Matrix** | |
296 | |
297 The final result of counting is a binding affinity matrix containing a (normalized) read count for each sample at every potential binding site. With this matrix, the samples can be re-clustered using affinity, rather than occupancy, data. The binding affinity matrix can be used for QC plotting as well as for subsequent | |
298 differential analysis. | |
299 | |
300 Example: | |
301 | |
302 ====== ====== ====== ========== ========== ========= ====== ========= ==== | |
303 ID Tissue Factor Condition Treatment Replicate Caller Intervals FRiP | |
304 ====== ====== ====== ========== ========== ========= ====== ========= ==== | |
305 BT4741 BT474 ER Resistant Full-Media 1 counts 2845 0.16 | |
306 BT4742 BT474 ER Resistant Full-Media 2 counts 2845 0.15 | |
307 MCF71 MCF7 ER Responsive Full-Media 1 counts 2845 0.27 | |
308 MCF72 MCF7 ER Responsive Full-Media 2 counts 2845 0.17 | |
309 MCF73 MCF7 ER Responsive Full-Media 3 counts 2845 0.23 | |
310 T47D1 T47D ER Responsive Full-Media 1 counts 2845 0.10 | |
311 T47D2 T47D ER Responsive Full-Media 2 counts 2845 0.06 | |
312 MCF7r1 MCF7 ER Resistant Full-Media 1 counts 2845 0.20 | |
313 MCF7r2 MCF7 ER Resistant Full-Media 2 counts 2845 0.13 | |
314 ZR751 ZR75 ER Responsive Full-Media 1 counts 2845 0.32 | |
315 ZR752 ZR75 ER Responsive Full-Media 2 counts 2845 0.22 | |
316 ====== ====== ====== ========== ========== ========= ====== ========= ==== | |
317 | |
318 | |
319 | |
320 **More Information** | |
321 | |
322 Generally, processing data with DiffBind involves five phases: | |
323 | |
324 #. Reading in peaksets | |
325 #. Occupancy analysis | |
326 #. Counting reads | |
327 #. Differential binding affinity analysis | |
328 #. Plotting and reporting | |
329 | |
330 | |
331 * **Reading in peaksets**: | |
332 | |
333 The first step is to read in a set of peaksets and associated | |
334 metadata. Peaksets are derived either from ChIP-Seq peak callers, such as MACS | |
335 ([1]), or using some other criterion (e.g. genomic windows, or all the promoter regions | |
336 in a genome). The easiest way to read in peaksets is using a comma-separated value | |
337 (csv) sample sheet with one line for each peakset. (Spreadsheets in Excel® format, with | |
338 a .xls or .xlsx suffix, are also accepted.) A single experiment can have more than | |
339 one associated peakset; e.g. if multiple peak callers are used for comparison purposes | |
340 each sample would have more than one line in the sample sheet. Once the peaksets | |
341 are read in, a merging function finds all overlapping peaks and derives a single set of | |
342 unique genomic intervals covering all the supplied peaks (a consensus peakset for the | |
343 experiment). | |
344 | |
345 * **Occupancy analysis**: | |
346 | |
347 Peaksets, especially those generated by peak callers, provide | |
348 an insight into the potential occupancy of the protein being ChIPed for at specific | |
349 genomic loci. After the peaksets have been loaded, it can be useful to perform some | |
350 exploratory plotting to determine how these occupancy maps agree with each other, | |
351 e.g. between experimental replicates (re-doing the ChIP under the same conditions), | |
352 between different peak callers on the same experiment, and within groups of samples | |
353 representing a common experimental condition. DiffBind provides functions to enable | |
354 overlaps to be examined, as well as functions to determine how well similar samples | |
355 cluster together. Beyond quality control, the product of an occupancy analysis may be | |
356 a consensus peakset, representing an overall set of candidate binding sites to be used | |
357 in further analysis. | |
358 | |
359 * **Counting reads**: | |
360 | |
361 Once a consensus peakset has been derived, DiffBind can use the | |
362 supplied sequence read files to count how many reads overlap each interval for each | |
363 unique sample. The peaks in the consensus peakset may be re-centered and trimmed | |
364 based on calculating their summits (point of greatest read overlap) in order to provide | |
365 more standardized peak intervals. The final result of counting is a binding affinity matrix | |
366 containing a (normalized) read count for each sample at every potential binding site. | |
367 With this matrix, the samples can be re-clustered using affinity, rather than occupancy, | |
368 data. The binding affinity matrix is used for QC plotting as well as for subsequent | |
369 differential analysis. | |
370 | |
371 * **Differential binding affinity analysis**: | |
372 | |
373 The core functionality of DiffBind is the | |
374 differential binding affinity analysis, which enables binding sites to be identified that | |
375 are statistically significantly differentially bound between sample groups. To accomplish | |
376 this, first a contrast (or contrasts) is established, dividing the samples into groups to | |
377 be compared. Next the core analysis routines are executed, by default using DESeq2 . | |
378 This will assign a p-value and FDR to each candidate binding site indicating confidence | |
379 that they are differentially bound. | |
380 | |
381 * **Plotting and reporting**: | |
382 | |
383 Once one or more contrasts have been run, DiffBind provides | |
384 a number of functions for reporting and plotting the results. MA plots give an | |
385 overview of the results of the analysis, while correlation heatmaps and PCA plots show | |
386 how the groups cluster based on differentially bound sites. Boxplots show the distribution | |
387 of reads within differentially bound sites corresponding to whether they gain or | |
388 lose affinity between the two sample groups. A reporting mechanism enables differentially | |
389 bound sites to be extracted for further processing, such as annotation, motif, and | |
390 pathway analyses. | |
391 | |
392 **References** | |
393 | |
394 DiffBind Authors: Rory Stark, Gordon Brown (2011) | |
123 Wrapper authors: Bjoern Gruening, Pavankumar Videm | 395 Wrapper authors: Bjoern Gruening, Pavankumar Videm |
124 | 396 |
125 ]]> | 397 ]]> |
126 </help> | 398 </help> |
127 <citations> | 399 <citations> |