Mercurial > repos > bgruening > diffbind
comparison diffbind.xml @ 11:4c7ab9995f9e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit cc4c1c4131518b9cbf986a1f252767ff73ca938e
author | iuc |
---|---|
date | Sat, 07 Apr 2018 15:45:41 -0400 |
parents | d7725c5596ab |
children | fa56d93f7980 |
comparison
equal
deleted
inserted
replaced
10:d7725c5596ab | 11:4c7ab9995f9e |
---|---|
1 <tool id="diffbind" name="DiffBind" version="2.6.6.0"> | 1 <tool id="diffbind" name="DiffBind" version="2.6.6.1"> |
2 <description> differential binding analysis of ChIP-Seq peak data</description> | 2 <description> differential binding analysis of ChIP-Seq peak data</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="2.6.6">bioconductor-diffbind</requirement> | 4 <requirement type="package" version="2.6.6">bioconductor-diffbind</requirement> |
5 <requirement type="package" version="1.20.0">r-getopt</requirement> | 5 <requirement type="package" version="1.20.0">r-getopt</requirement> |
6 <requirement type="package" version="0.2.15">r-rjson</requirement> | |
6 </requirements> | 7 </requirements> |
7 <stdio> | 8 <stdio> |
8 <regex match="Execution halted" | 9 <regex match="Execution halted" |
9 source="both" | 10 source="both" |
10 level="fatal" | 11 level="fatal" |
17 source="both" | 18 source="both" |
18 level="fatal" | 19 level="fatal" |
19 description="An undefined error occured, please check your intput carefully and contact your administrator." /> | 20 description="An undefined error occured, please check your intput carefully and contact your administrator." /> |
20 </stdio> | 21 </stdio> |
21 <version_command><![CDATA[ | 22 <version_command><![CDATA[ |
22 echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ") | 23 echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rjson version" $(R --vanilla --slave -e "library(rjson); cat(sessionInfo()\$otherPkgs\$rjson\$Version)" 2> /dev/null | grep -v -i "WARNING: ") |
23 ]]></version_command> | 24 ]]></version_command> |
24 <command><![CDATA[ | 25 <command><![CDATA[ |
25 ## seems that diffbind also needs file extensions to work properly | 26 #import re |
26 #set $counter = 1 | 27 #import json |
27 #for $sample in $samples: | 28 |
28 ln -s $sample.bamreads #echo str($counter) + "_bamreads.bam"# && | 29 ## Adapted from DESeq2 wrapper |
29 ln -s ${sample.bamreads.metadata.bam_index} #echo str($counter) + "_bamreads.bai"# && | 30 #set $temp_factor_names = list() |
30 #if str( $sample.bamcontrol ) != 'None': | 31 #set $temp_factor = list() |
31 ln -s $sample.bamcontrol #echo str($counter) + "_bamcontrol.bam"# && | 32 |
32 ln -s ${sample.bamcontrol.metadata.bam_index} #echo str($counter) + "_bamcontrol.bai"# && | 33 #for $g in $rep_group: |
33 #end if | 34 |
34 #set $counter = $counter + 1 | 35 #set $peak_files = list() |
36 #set $bam_files = list() | |
37 #set $bam_controls = list() | |
38 | |
39 #for $file in $g.peaks: | |
40 #set $file_name = re.sub('[^\w\-\s]', '_', str($file.element_identifier)) | |
41 ln -s '${file}' ${g.groupName}-${file_name}-peaks.bed && | |
42 $peak_files.append(str($g.groupName) + '-' + $file_name + '-peaks.bed') | |
43 #end for | |
44 | |
45 #for $bam in $g.bamreads: | |
46 #set $bam_name = re.sub('[^\w\-\s]', '_', str($bam.element_identifier)) | |
47 ln -s '${bam}' ${bam_name}-bamreads.bam && | |
48 ln -s ${bam.metadata.bam_index} ${bam_name}-bamreads.bai && | |
49 $bam_files.append($bam_name + '-bamreads.bam') | |
50 #end for | |
51 | |
52 $temp_factor.append( {str($g.groupName): $peak_files} ) | |
53 $temp_factor.append( {str($g.groupName): $bam_files} ) | |
54 | |
55 #if str( $g.bamcontrol ) != 'None': | |
56 #for $ctrl in $g.bamcontrol: | |
57 #set $ctrl_name = re.sub('[^\w\-\s]', '_', str($ctrl.element_identifier)) | |
58 ln -s '${ctrl}' ${g.groupName}-${ctrl_name}-bamcontrol.bam && | |
59 ln -s ${ctrl.metadata.bam_index} ${g.groupName}-${ctrl_name}-bamcontrol.bai && | |
60 $bam_controls.append(str($g.groupName) + '-' + $ctrl_name + '-bamcontrol.bam') | |
35 #end for | 61 #end for |
36 | 62 $temp_factor.append( {str($g.groupName): $bam_controls} ) |
37 Rscript '$__tool_directory__/diffbind.R' | 63 #end if |
38 -i $infile | 64 |
39 -o '$outfile' | 65 #end for |
40 -t $th | 66 |
41 -f $out.format | 67 $temp_factor.reverse() |
42 -p '$plots' | 68 $temp_factor_names.append([str($factorName), $temp_factor]) |
43 | 69 |
44 #if $out.binding_matrix: | 70 |
45 -b | 71 Rscript '$__tool_directory__/diffbind.R' |
46 #end if | 72 |
47 | 73 -i '#echo json.dumps(temp_factor_names)#' |
48 #if $out.rdata: | 74 -o '$outfile' |
49 -r | 75 -t $th |
50 #end if | 76 -f $out.format |
77 -p '$plots' | |
78 | |
79 #if $scorecol: | |
80 -n "$scorecol" | |
81 #end if | |
82 #if $lowerbetter: | |
83 -l "$lowerbetter" | |
84 #end if | |
85 #if $summits: | |
86 -s "$summits" | |
87 #end if | |
88 | |
89 #if $out.binding_matrix: | |
90 -b | |
91 #end if | |
92 | |
93 #if $out.rdata: | |
94 -r | |
95 #end if | |
96 | |
97 #if $out.analysis_info: | |
98 -a | |
99 #end if | |
100 | |
101 #if $out.rscript: | |
102 && cp '$__tool_directory__/diffbind.R' '$rscript' | |
103 #end if | |
51 ]]> | 104 ]]> |
52 </command> | 105 </command> |
53 <configfiles> | |
54 <configfile name="infile"><![CDATA[ | |
55 #set $counter = 1 | |
56 #for $sample in $samples: | |
57 #if str( $sample.bamcontrol ) != 'None' and $counter == 1: | |
58 SampleID,Tissue,Factor,Condition,Replicate,bamReads,bamControl,Peaks | |
59 #elif $counter == 1: | |
60 SampleID,Tissue,Factor,Condition,Replicate,bamReads,Peaks | |
61 #end if | |
62 #if str( $sample.bamcontrol ) != 'None': | |
63 $sample.sample_id,$sample.tissue,$sample.factor,$sample.condition,$sample.replicate,#echo str($counter) + '_bamreads.bam'#,#echo str($counter) + '_bamcontrol.bam'#,$sample.peaks | |
64 #else: | |
65 $sample.sample_id,$sample.tissue,$sample.factor,$sample.condition,$sample.replicate,#echo str($counter) + '_bamreads.bam'#,$sample.peaks | |
66 #end if | |
67 #set $counter = $counter + 1 | |
68 #end for]]></configfile> | |
69 </configfiles> | |
70 <inputs> | 106 <inputs> |
71 <repeat name="samples" title="Samples" min="4"> | 107 <param name="factorName" type="text" label="Name" help="Name of experiment factor of interest (e.g. Condition). One factor must be entered and there must be two or more groups. NOTE: Please only use letters, numbers or underscores."> |
72 <param name="sample_id" type="text" value="Sample ID" label="Specify a sample id" help="e.g. BT474.1-" /> | 108 <sanitizer> |
73 <param name="tissue" type="text" value="Tissue" label="Specify the tissue" help="e.g. BT474" /> | 109 <valid initial="string.letters,string.digits"><add value="_" /></valid> |
74 <param name="factor" type="text" value="Factor Name" label="Specify a factor name" help="e.g. ER" /> | 110 </sanitizer> |
75 <param name="condition" type="text" value="Condition" label="Specify the condition" help="e.g. Resistent" /> | 111 </param> |
76 <param name="replicate" type="integer" value="1" label="Specify the replicate number" help="e.g. 1" /> | 112 <repeat name="rep_group" title="Group" min="2" default="2"> |
77 <param name="bamreads" type="data" format="bam" label="Read BAM file" help="Specify the Read BAM file, used for Peak calling."/> | 113 <param name="groupName" type="text" label="Name" |
78 <param name="bamcontrol" type="data" format="bam" optional="True" label="Control BAM file" help="If specifying a control BAM file for this sample, then all samples are required to specify one."/> | 114 help="Name of group that the peak files belong to (e.g. Resistant or Responsive). NOTE: Please only use letters, numbers or underscores (case sensitive)."> |
79 <param name="peaks" type="data" format="bed" label="Peak file" help="Result of your Peak calling experiment."/> | 115 <sanitizer> |
116 <valid initial="string.letters,string.digits"><add value="_" /></valid> | |
117 </sanitizer> | |
118 </param> | |
119 <param name="peaks" type="data" format="bed" multiple="true" label="Peak files" help="Result of your Peak calling experiment"/> | |
120 <param name="bamreads" type="data" format="bam" multiple="true" label="Read BAM file" help="Specify the Read BAM file used for Peak calling."/> | |
121 <param name="bamcontrol" type="data" format="bam" multiple="true" optional="True" label="Control BAM file" help="If specifying a control BAM file, all samples are required to specify one."/> | |
80 </repeat> | 122 </repeat> |
81 <param name="th" type="float" value="1" min="0" max="1" | 123 |
82 label="FDR Threshold" | 124 <param name="scorecol" type="integer" min="0" value="8" label="Score Column" help="Column in peak files that contains peak scores. Default: 8 (narrowPeak)"/> |
83 help="Significance threshold; all sites with FDR less than or equal to this value will be included in the report. A value of 1 will include all binding sites in the report. Default: 1"/> | 125 <param name="lowerbetter" type="boolean" truevalue="True" falsevalue="" checked="False" label="Lower score is better?" help="DiffBind by default assumes that a higher score indicates a better peak, for example narrowPeaks -log10pvalue. If this is not the case, for example if the score is a p-value or FDR, set this option to Yes. Default: No" /> |
84 | 126 <param name="summits" type="integer" min="0" optional="True" label="Summits" help="Extend peaks Nbp up- and downstream of the summit. For punctate peaks it is advisable to extend (e.g. 250bp), see the DiffBind User Guide"/> |
127 <param name="th" type="float" value="0.05" min="0" max="1" label="FDR Threshold" help="Significance threshold; all sites with FDR less than or equal to this value will be included in the output. A value of 1 will output all binding sites. Default: 0.05"/> | |
128 | |
85 <!-- Output Options --> | 129 <!-- Output Options --> |
86 <section name="out" expanded="false" title="Output Options"> | 130 <section name="out" expanded="false" title="Output Options"> |
87 <param name="format" type="select" label="Output Format"> | 131 <param name="format" type="select" label="Output Format"> |
88 <option value="bed">BED</option> | 132 <option value="bed">BED</option> |
89 <option value="gff">GFF</option> | 133 <option value="gff">GFF</option> |
90 <option value="wig">WIG</option> | 134 <option value="wig">WIG</option> |
91 </param> | 135 </param> |
92 <param name="pdf" type="boolean" truevalue="True" falsevalue="" checked="False" label="Visualising the analysis results" help="output an additional PDF file" /> | 136 <param name="pdf" type="boolean" truevalue="True" falsevalue="" checked="False" label="Visualising the analysis results" help="output an additional PDF file" /> |
93 <param name="binding_matrix" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output binding affinity matrix?" help="Output a table of the binding scores" /> | 137 <param name="binding_matrix" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output binding affinity matrix?" help="Output a table of the binding scores" /> |
94 <param name="rdata" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output RData file?" help="Output all the data used by R to construct the plots and tables, can be loaded into R. Default: No"> | 138 <param name="rdata" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output RData file?" help="Output all the data used by R to construct the plots and tables, can be loaded into R. Default: No"/> |
95 </param> | 139 <param name="rscript" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used will be provided as a text file in the output. Default: No"/> |
140 <param name="analysis_info" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output analysis info?" help="If this option is set to Yes, information from the dba.count and dba.analyze commmands will be output in a text file. Default: No"/> | |
96 </section> | 141 </section> |
97 </inputs> | 142 </inputs> |
98 | 143 |
99 <outputs> | 144 <outputs> |
100 <data name="outfile" format="bed" label="${tool.name} on ${on_string}: Differentially bound sites"> | 145 <data name="outfile" format="bed" label="${tool.name} on ${on_string}: Differentially bound sites"> |
110 <filter>out['binding_matrix']</filter> | 155 <filter>out['binding_matrix']</filter> |
111 </data> | 156 </data> |
112 <data name="rdata" format="rdata" from_work_dir="DiffBind_analysis.RData" label="${tool.name} on ${on_string}: RData file"> | 157 <data name="rdata" format="rdata" from_work_dir="DiffBind_analysis.RData" label="${tool.name} on ${on_string}: RData file"> |
113 <filter>out['rdata']</filter> | 158 <filter>out['rdata']</filter> |
114 </data> | 159 </data> |
160 <data name="rscript" format="txt" label="${tool.name} on ${on_string}: Rscript"> | |
161 <filter>out['rscript']</filter> | |
162 </data> | |
163 <data name="analysis_info" format="txt" from_work_dir="DiffBind_analysis_info.txt" label="${tool.name} on ${on_string}: Analysis info"> | |
164 <filter>out['analysis_info']</filter> | |
165 </data> | |
115 </outputs> | 166 </outputs> |
116 | 167 |
117 <tests> | 168 <tests> |
118 <test expect_num_outputs="4"> | 169 <test expect_num_outputs="6"> |
119 <repeat name="samples"> | 170 <param name="factorName" value="Condition"/> |
120 <param name="sample_id" value="BT4741" /> | 171 <repeat name="rep_group"> |
121 <param name="tissue" value="BT474" /> | 172 <param name="groupName" value="Resistant"/> |
122 <param name="factor" value="ER" /> | 173 <param name="peaks" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/> |
123 <param name="condition" value="Resistant" /> | 174 <param name="bamreads" ftype="bam" value="BT474_ER_1.bam,BT474_ER_2.bam" /> |
124 <param name="replicate" value="1" /> | |
125 <param name="bamreads" ftype="bam" value="BT474_ER_1.bam" /> | |
126 <param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz" /> | |
127 </repeat> | 175 </repeat> |
128 <repeat name="samples"> | 176 <repeat name="rep_group"> |
129 <param name="sample_id" value="BT4742" /> | 177 <param name="groupName" value="Responsive"/> |
130 <param name="tissue" value="BT474" /> | 178 <param name="peaks" value="MCF7_ER_1.bed.gz,MCF7_ER_2.bed.gz"/> |
131 <param name="factor" value="ER" /> | 179 <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" /> |
132 <param name="condition" value="Resistant" /> | |
133 <param name="replicate" value="2" /> | |
134 <param name="bamreads" ftype="bam" value="BT474_ER_2.bam" /> | |
135 <param name="peaks" ftype="bed" value="BT474_ER_2.bed.gz" /> | |
136 </repeat> | 180 </repeat> |
137 <repeat name="samples"> | 181 <param name="scorecol" value="5" /> |
138 <param name="sample_id" value="MCF71" /> | |
139 <param name="tissue" value="MCF7" /> | |
140 <param name="factor" value="ER" /> | |
141 <param name="condition" value="Responsive" /> | |
142 <param name="replicate" value="1" /> | |
143 <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam" /> | |
144 <param name="peaks" ftype="bed" value="MCF7_ER_1.bed.gz" /> | |
145 </repeat> | |
146 <repeat name="samples"> | |
147 <param name="sample_id" value="MCF72" /> | |
148 <param name="tissue" value="MCF7" /> | |
149 <param name="factor" value="ER" /> | |
150 <param name="condition" value="Responsive" /> | |
151 <param name="replicate" value="2" /> | |
152 <param name="bamreads" ftype="bam" value="MCF7_ER_2.bam" /> | |
153 <param name="peaks" ftype="bed" value="MCF7_ER_2.bed.gz" /> | |
154 </repeat> | |
155 <param name="pdf" value="True" /> | 182 <param name="pdf" value="True" /> |
156 <param name="binding_matrix" value="True" /> | 183 <param name="binding_matrix" value="True" /> |
157 <param name="rdata" value="True" /> | 184 <param name="rdata" value="True" /> |
185 <param name="rscript" value="True"/> | |
186 <param name="analysis_info" value="True"/> | |
158 <output name="outfile" value="out_diffbind.bed" /> | 187 <output name="outfile" value="out_diffbind.bed" /> |
159 <output name="plots" value="out_plots.pdf" compare="sim_size" /> | 188 <output name="plots" value="out_plots.pdf" compare="sim_size" /> |
160 <output name="binding_matrix" value="out_binding.matrix" /> | 189 <output name="binding_matrix" value="out_binding.matrix" /> |
161 <output name="rdata" value="DiffBind_analysis.RData" compare="sim_size"/> | 190 <output name="rdata" value="DiffBind_analysis.RData" compare="sim_size"/> |
191 <output name="rscript" value="out_rscript.txt"/> | |
192 <output name="analysis_info" value="out_analysis_info.txt" compare="sim_size" > | |
193 <assert_contents> | |
194 <has_text text="SessionInfo"/> | |
195 </assert_contents> | |
196 </output> | |
162 </test> | 197 </test> |
163 </tests> | 198 </tests> |
164 <help><![CDATA[ | 199 <help><![CDATA[ |
165 | 200 |
166 .. class:: infomark | 201 .. class:: infomark |
188 affinity data, the second working through the main plotting routines, the third discussing the | 223 affinity data, the second working through the main plotting routines, the third discussing the |
189 use of a blocking factor, and the fourth revisiting occupancy data (peak calls) in more detail, | 224 use of a blocking factor, and the fourth revisiting occupancy data (peak calls) in more detail, |
190 as well as comparing the results of an occupancy-based analysis with an affinity-based one. | 225 as well as comparing the results of an occupancy-based analysis with an affinity-based one. |
191 Finally, certain technical aspects of the how these analyses are accomplished are detailed. | 226 Finally, certain technical aspects of the how these analyses are accomplished are detailed. |
192 | 227 |
193 Note DiffBind requires a minimum of four samples (two groups with two replicates each). | 228 Note this DiffBind tool requires a minimum of four samples (two groups with two replicates each). |
194 | |
195 .. _DiffBind: https://bioconductor.org/packages/release/bioc/html/DiffBind.html | |
196 .. _`Bioconductor package`: https://bioconductor.org/packages/release/bioc/html/DiffBind.html | |
197 .. _`DiffBind User Guide`: https://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf | |
198 | 229 |
199 ----- | 230 ----- |
200 | 231 |
201 **Inputs** | 232 **Inputs** |
202 | 233 |
208 be associated with each peakset (one for the ChIP data, and optionally another representing | 239 be associated with each peakset (one for the ChIP data, and optionally another representing |
209 a control sample) | 240 a control sample) |
210 | 241 |
211 **Sample Information** | 242 **Sample Information** |
212 | 243 |
213 You have to specify your sample information in the tool form above, where Condition contains the groups you want to compare. | 244 You have to specify your sample information in the tool form above, where Factor is the groups you want to compare (e.g Resistant and Responsive). |
214 | 245 |
215 Example: | 246 Example: |
216 | 247 |
217 ============= ========== ========== ============= ============= | 248 ============= ============= |
218 **SampleID** **Tissue** **Factor** **Condition** **Replicate** | 249 **SampleID** **Group** |
219 ------------- ---------- ---------- ------------- ------------- | 250 ------------- ------------- |
220 BT4741 BT474 ER Resistant 1 | 251 BT4741 Resistant |
221 BT4742 BT474 ER Resistant 2 | 252 BT4742 Resistant |
222 MCF71 MCF7 ER Responsive 1 | 253 MCF71 Responsive |
223 MCF72 MCF7 ER Responsive 2 | 254 MCF72 Responsive |
224 MCF73 MCF7 ER Responsive 3 | 255 ============= ============= |
225 T47D1 T47D ER Responsive 1 | |
226 T47D2 T47D ER Responsive 2 | |
227 MCF7r1 MCF7 ER Resistant 1 | |
228 MCF7r2 MCF7 ER Resistant 2 | |
229 ZR751 ZR75 ER Responsive 1 | |
230 ZR752 ZR75 ER Responsive 2 | |
231 ============= ========== ========== ============= ============= | |
232 | 256 |
233 | 257 |
234 **Peak files** | 258 **Peak files** |
235 | 259 |
236 Result of your Peak calling experiment in bed format, one file for each sample is required. | 260 Result of your Peak calling experiment in bed format, one file for each sample is required. The peak caller, format and score column can be specified in the tool form above. The default settings expect narrowPeak bed format, which has the score in the 8th column (-log10pvalue), and can be output from MACS2. |
237 | 261 |
238 Example: | 262 Example (MACS.xls file in bed format): |
239 | 263 |
240 ======= ======= ======= =============== ======= | 264 ======= ======= ======= =============== ============== |
241 1 2 3 4 **5** | 265 1 2 3 4 **5 (Score)** |
242 ======= ======= ======= =============== ======= | 266 ======= ======= ======= =============== ============== |
243 chr18 215562 216063 MACS_peak_16037 56.11 | 267 chr18 215562 216063 MACS_peak_16037 56.11 |
244 chr18 311530 312105 MACS_peak_16038 222.49 | 268 chr18 311530 312105 MACS_peak_16038 222.49 |
245 chr18 356656 357315 MACS_peak_16039 92.06 | 269 chr18 356656 357315 MACS_peak_16039 92.06 |
246 chr18 371110 372092 MACS_peak_16040 123.86 | 270 chr18 371110 372092 MACS_peak_16040 123.86 |
247 chr18 395116 396464 MACS_peak_16041 1545.39 | 271 chr18 395116 396464 MACS_peak_16041 1545.39 |
248 chr18 399014 400382 MACS_peak_16042 1835.19 | 272 chr18 399014 400382 MACS_peak_16042 1835.19 |
249 chr18 499134 500200 MACS_peak_16043 748.32 | 273 chr18 499134 500200 MACS_peak_16043 748.32 |
250 chr18 503518 504552 MACS_peak_16044 818.30 | 274 chr18 503518 504552 MACS_peak_16044 818.30 |
251 chr18 531672 532274 MACS_peak_16045 159.30 | 275 chr18 531672 532274 MACS_peak_16045 159.30 |
252 chr18 568326 569282 MACS_peak_16046 601.11 | 276 chr18 568326 569282 MACS_peak_16046 601.11 |
253 ======= ======= ======= =============== ======= | 277 ======= ======= ======= =============== ============== |
254 | 278 |
255 * BAM file which contains the mapped sequencing reads can be associated with each peakset | 279 * BAM file which contains the mapped sequencing reads associated with each peakset, one file for each sample is required. |
256 * Control BAM file represents a control dataset and are optional, but have to specified for all when used. | 280 * Optional: Control BAM file representing a control dataset. If used, has to be specified for all samples. Note that the DiffBind authors say control reads are best utilized prior to running DiffBind, at the peak calling stage (e.g. with MACS2) and in blacklists, see this `Bioconductor post`_. |
257 | 281 |
258 ----- | 282 ----- |
259 | 283 |
260 **Outputs** | 284 **Outputs** |
261 | 285 |
263 | 287 |
264 * differentially bound sites in BED, WIG or GFF format | 288 * differentially bound sites in BED, WIG or GFF format |
265 | 289 |
266 Optionally, under **Output Options** you can choose to output | 290 Optionally, under **Output Options** you can choose to output |
267 | 291 |
268 * a correlation heatmap plot | 292 * a PDF of plots (Heatmap, PCA, MA, Volcano, Boxplots) |
269 * a binding affinity matrix | 293 * a binding affinity matrix |
270 * an RData file | 294 * the R script used by this tool |
295 * an RData file of the R objects generated | |
296 * a text file with information on the analysis (number of Intervals, FriP scores, method used) | |
271 | 297 |
272 **Differentially Bound Sites** | 298 **Differentially Bound Sites** |
273 | 299 |
274 As output format you can choose BED, GFF, WIG. | 300 As output format you can choose BED, GFF, WIG. |
275 | 301 |
276 Example - BED format: | 302 Example - BED format: |
277 | 303 |
278 ===== ====== ====== ===== ==== ==== ==== ==== ===== ======== ======== | 304 ======== ====== ====== ===== ====== ===== =============== ============== ======= ======== ======== |
279 1 2 3 4 5 6 7 8 9 10 **11** | 305 seqnames start end width strand Conc Conc_Responsive Conc_Resistant Fold p.value **FDR** |
280 ===== ====== ====== ===== ==== ==== ==== ==== ===== ======== ======== | 306 ======== ====== ====== ===== ====== ===== =============== ============== ======= ======== ======== |
281 chr18 394600 396513 1914 * 7.15 7.89 5.55 2.35 7.06e-24 9.84e-21 | 307 chr18 394600 396513 1914 * 7.15 5.55 7.89 -2.35 7.06e-24 9.84e-21 |
282 chr18 111567 112005 439 * 5.71 3.63 6.53 -2.89 1.27e-08 8.88e-06 | 308 chr18 111567 112005 439 * 5.71 6.53 3.63 2.89 1.27e-08 8.88e-06 |
283 chr18 346464 347342 879 * 5 3.24 5.77 -2.52 6.51e-06 0.00303 | 309 chr18 346464 347342 879 * 5 5.77 3.24 2.52 6.51e-06 0.00303 |
284 chr18 399014 400382 1369 * 7.62 8.05 7 1.04 1.04e-05 0.00364 | 310 chr18 399014 400382 1369 * 7.62 7 8.05 -1.04 1.04e-05 0.00364 |
285 chr18 371110 372102 993 * 4.63 5.36 3.07 2.3 8.1e-05 0.0226 | 311 chr18 371110 372102 993 * 4.63 3.07 5.36 -2.3 8.1e-05 0.0226 |
286 ===== ====== ====== ===== ==== ==== ==== ==== ===== ======== ======== | 312 ======== ====== ====== ===== ====== ===== =============== ============== ======= ======== ======== |
287 | 313 |
288 Columns contain the following data: | 314 Columns contain the following data: |
289 | 315 |
290 * **1st**: Chromosome name | 316 * **1st**: Chromosome name |
291 * **2nd**: Start position of site | 317 * **2nd**: Start position of site |
305 The final result of counting is a binding affinity matrix containing a (normalized) read count for each sample at every potential binding site. With this matrix, the samples can be re-clustered using affinity, rather than occupancy, data. The binding affinity matrix can be used for QC plotting as well as for subsequent | 331 The final result of counting is a binding affinity matrix containing a (normalized) read count for each sample at every potential binding site. With this matrix, the samples can be re-clustered using affinity, rather than occupancy, data. The binding affinity matrix can be used for QC plotting as well as for subsequent |
306 differential analysis. | 332 differential analysis. |
307 | 333 |
308 Example: | 334 Example: |
309 | 335 |
310 ====== ====== ====== ========== ========== ========= ====== ========= ==== | 336 ===== ====== ====== ================ ================ ================ ================ |
311 ID Tissue Factor Condition Treatment Replicate Caller Intervals FRiP | 337 CHR START END MCF7_ER_1.bed MCF7_ER_2.bed BT474_ER_1.bed BT474_ER_2.bed |
312 ====== ====== ====== ========== ========== ========= ====== ========= ==== | 338 ===== ====== ====== ================ ================ ================ ================ |
313 BT4741 BT474 ER Resistant Full-Media 1 counts 2845 0.16 | 339 chr18 111567 112005 137.615208000375 59.878372946728 29.4139375878664 19.9594576489093 |
314 BT4742 BT474 ER Resistant Full-Media 2 counts 2845 0.15 | 340 chr18 189223 189652 19.9594576489093 12.6059732519427 11.5554754809475 23.110950961895 |
315 MCF71 MCF7 ER Responsive Full-Media 1 counts 2845 0.27 | 341 chr18 215232 216063 11.5554754809475 15.7574665649284 31.5149331298568 72.4843461986707 |
316 MCF72 MCF7 ER Responsive Full-Media 2 counts 2845 0.17 | 342 chr18 311530 312172 17.8584621069189 11.5554754809475 54.6258840917518 43.0704086108043 |
317 MCF73 MCF7 ER Responsive Full-Media 3 counts 2845 0.23 | 343 chr18 346464 347342 75.6358395116564 40.9694130688139 21.0099554199046 16.8079643359236 |
318 T47D1 T47D ER Responsive Full-Media 1 counts 2845 0.10 | 344 chr18 356560 357362 11.5554754809475 14.7069687939332 57.7773774047375 53.5753863207566 |
319 T47D2 T47D ER Responsive Full-Media 2 counts 2845 0.06 | 345 chr18 371110 372102 8.40398216796182 9.45447993895705 81.9388261376278 82.989323908623 |
320 MCF7r1 MCF7 ER Resistant Full-Media 1 counts 2845 0.20 | 346 chr18 394600 396513 56.7268796337423 43.0704086108043 510.541916703681 438.05757050501 |
321 MCF7r2 MCF7 ER Resistant Full-Media 2 counts 2845 0.13 | 347 chr18 399014 400382 156.524167878289 117.655750351465 558.864814169461 496.885445680743 |
322 ZR751 ZR75 ER Responsive Full-Media 1 counts 2845 0.32 | 348 chr18 498906 500200 767.913870597511 278.381909313735 196.443083176108 181.736114382174 |
323 ZR752 ZR75 ER Responsive Full-Media 2 counts 2845 0.22 | 349 ===== ====== ====== ================ ================ ================ ================ |
324 ====== ====== ====== ========== ========== ========= ====== ========= ==== | |
325 | 350 |
326 ----- | 351 ----- |
327 | 352 |
328 **More Information** | 353 **More Information** |
329 | 354 |
390 overview of the results of the analysis, while correlation heatmaps and PCA plots show | 415 overview of the results of the analysis, while correlation heatmaps and PCA plots show |
391 how the groups cluster based on differentially bound sites. Boxplots show the distribution | 416 how the groups cluster based on differentially bound sites. Boxplots show the distribution |
392 of reads within differentially bound sites corresponding to whether they gain or | 417 of reads within differentially bound sites corresponding to whether they gain or |
393 lose affinity between the two sample groups. A reporting mechanism enables differentially | 418 lose affinity between the two sample groups. A reporting mechanism enables differentially |
394 bound sites to be extracted for further processing, such as annotation, motif, and | 419 bound sites to be extracted for further processing, such as annotation, motif, and |
395 pathway analyses. *Note that currently only the correlation plot is implemented in this Galaxy tool.* | 420 pathway analyses. |
396 | 421 |
397 ----- | 422 ----- |
398 | 423 |
399 **References** | 424 **References** |
400 | 425 |
401 DiffBind Authors: Rory Stark, Gordon Brown (2011) | 426 DiffBind Authors: Rory Stark, Gordon Brown (2011) |
402 Wrapper authors: Bjoern Gruening, Pavankumar Videm | 427 Wrapper authors: Bjoern Gruening, Pavankumar Videm |
428 | |
429 .. _DiffBind: https://bioconductor.org/packages/release/bioc/html/DiffBind.html | |
430 .. _`Bioconductor package`: https://bioconductor.org/packages/release/bioc/html/DiffBind.html | |
431 .. _`DiffBind User Guide`: https://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf | |
432 .. _`Bioconductor post`: https://support.bioconductor.org/p/69924/ | |
403 | 433 |
404 ]]> | 434 ]]> |
405 </help> | 435 </help> |
406 <citations> | 436 <citations> |
407 <citation type="doi">doi:10.1038/nature10730</citation> | 437 <citation type="doi">doi:10.1038/nature10730</citation> |