5
|
1 <tool id="bismark_methylation_extractor" name="Bismark Meth. Extractor" version="0.10.2">
|
3
|
2 <!-- Wrapper compatible with Bismark version 0.10 -->
|
|
3 <description>Reports on methylation status of reads mapped by Bismark</description>
|
0
|
4 <!--<version_command>bismark_methylation_extractor version</version_command>-->
|
|
5 <requirements>
|
|
6 <requirement type="set_environment">SCRIPT_PATH</requirement>
|
|
7 <requirement type="package" version="0.12.8">bowtie</requirement>
|
3
|
8 <requirement type="package" version="2.1.0">bowtie2</requirement>
|
0
|
9 </requirements>
|
|
10 <parallelism method="basic"></parallelism>
|
|
11 <command interpreter="python">
|
4
|
12 <![CDATA[
|
0
|
13 bismark_methylation_extractor.py
|
|
14
|
|
15 --infile $input
|
|
16
|
4
|
17 #--bismark_path \$SCRIPT_PATH
|
0
|
18
|
|
19 #if $singlePaired.sPaired == "single":
|
|
20 --single-end
|
|
21 #else:
|
|
22 --paired-end
|
|
23 $singlePaired.no_overlap
|
|
24 #end if
|
|
25
|
|
26 #if str($ignore_bps) != "0":
|
|
27 --ignore $ignore_bps
|
|
28 #end if
|
|
29
|
|
30 #if $report:
|
|
31 --report-file $o_report
|
|
32 #end if
|
|
33
|
|
34 #if $comprehensive:
|
|
35 --comprehensive
|
|
36 #end if
|
|
37
|
|
38 #if $merge_non_cpg:
|
|
39 --merge-non-cpg
|
|
40 #end if
|
|
41
|
|
42 #if $compress:
|
|
43 --compress $compressed_output
|
|
44 #else:
|
|
45 #if $comprehensive == False and $merge_non_cpg == False:
|
|
46 ##twelfe files
|
|
47 --cpg_ot $cpg_ot
|
|
48 --chg_ot $chg_ot
|
|
49 --chh_ot $chh_ot
|
|
50 --cpg_ctot $cpg_ctot
|
|
51 --chg_ctot $chg_ctot
|
|
52 --chh_ctot $chh_ctot
|
|
53 --cpg_ob $cpg_ob
|
|
54 --chg_ob $chg_ob
|
|
55 --chh_ob $chh_ob
|
|
56 --cpg_ctob $cpg_ctob
|
|
57 --chg_ctob $chg_ctob
|
|
58 --chh_ctob $chh_ctob
|
|
59 #elif $merge_non_cpg and $comprehensive:
|
|
60 ## two files
|
|
61 --non_cpg_context $non_cpg_context
|
|
62 --cpg_context $cpg_context
|
|
63 #elif $comprehensive:
|
|
64 ## three files
|
|
65 --cpg_context $cpg_context
|
|
66 --chg_context $chg_context
|
|
67 --chh_context $chh_context
|
|
68 #elif $merge_non_cpg:
|
|
69 ## eight files
|
|
70 --non_cpg_context_ctot $non_cpg_context_ctot
|
|
71 --non_cpg_context_ot $non_cpg_context_ot
|
|
72 --non_cpg_context_ob $non_cpg_context_ob
|
|
73 --non_cpg_context_ctob $non_cpg_context_ctob
|
|
74 --cpg_ot $cpg_ot
|
|
75 --cpg_ctot $cpg_ctot
|
|
76 --cpg_ob $cpg_ob
|
|
77 --cpg_ctob $cpg_ctob
|
|
78 #end if
|
|
79 ## end compress
|
|
80 #end if
|
|
81
|
4
|
82 ]]>
|
0
|
83 </command>
|
|
84 <inputs>
|
|
85 <!-- Input Parameters -->
|
3
|
86 <param name="input" type="data" format="sam,bam" label="SAM/BAM file from Bismark bisulfite mapper" />
|
0
|
87 <conditional name="singlePaired">
|
|
88 <param name="sPaired" type="select" label="Is this library mate-paired?">
|
|
89 <option value="single">Single-end</option>
|
|
90 <option value="paired">Paired-end</option>
|
|
91 </param>
|
|
92 <when value="single" />
|
|
93 <when value="paired">
|
|
94 <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="This option avoids scoring overlapping methylation calls twice, in case of overlapping read one and read two" help="" />
|
|
95 </when>
|
|
96 </conditional>
|
|
97 <param name="ignore_bps" type="integer" value="0" label="Ignore the first N bp when processing the methylation call string" />
|
|
98 <param name="comprehensive" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Merge all four possible strand-specific methylation info
|
|
99 into context-dependent output files" help="" />
|
|
100 <param name="merge_non_cpg" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Merge all non-CpG contexts into one file" help="This will produce eight strand-specific output files, or two output files in comprehensive mode." />
|
|
101 <param name="report" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Short methylation summary output" />
|
|
102 <param name="compress" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Compress all result files and output one single file" />
|
|
103
|
|
104 </inputs>
|
|
105 <outputs>
|
|
106 <!--
|
|
107 OT – original top strand
|
|
108 CTOT – complementary to original top strand
|
|
109 OB – original bottom strand
|
|
110 CTOB – complementary to original bottom strand
|
|
111 -->
|
|
112 <data format="tabular" name="o_report" label="${tool.name} on ${on_string}: Report file">
|
|
113 <filter> ( report is True ) </filter>
|
|
114 </data>
|
|
115
|
|
116 <!-- default output 12 files -->
|
|
117 <data format="tabular" name="cpg_ot" label="${tool.name} on ${on_string}: CpG original top strand">
|
|
118 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
|
|
119 </data>
|
|
120 <data format="tabular" name="chg_ot" label="${tool.name} on ${on_string}: CHG original top strand">
|
|
121 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
|
|
122 </data>
|
|
123 <data format="tabular" name="chh_ot" label="${tool.name} on ${on_string}: CHH original top strand">
|
|
124 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
|
|
125 </data>
|
|
126 <data format="tabular" name="cpg_ctot" label="${tool.name} on ${on_string}: CpG complementary to top strand">
|
|
127 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
|
|
128 </data>
|
|
129 <data format="tabular" name="chg_ctot" label="${tool.name} on ${on_string}: CHG complementary to top strand">
|
|
130 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
|
|
131 </data>
|
|
132 <data format="tabular" name="chh_ctot" label="${tool.name} on ${on_string}: CHH complementary to top strand">
|
|
133 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
|
|
134 </data>
|
|
135
|
|
136 <data format="tabular" name="cpg_ob" label="${tool.name} on ${on_string}: CpG original bottom strand">
|
|
137 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
|
|
138 </data>
|
|
139 <data format="tabular" name="chg_ob" label="${tool.name} on ${on_string}: CHG original bottom strand">
|
|
140 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
|
|
141 </data>
|
|
142 <data format="tabular" name="chh_ob" label="${tool.name} on ${on_string}: CHH original bottom strand">
|
|
143 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
|
|
144 </data>
|
|
145 <data format="tabular" name="cpg_ctob" label="${tool.name} on ${on_string}: CpG complementary to bottom strand">
|
|
146 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
|
|
147 </data>
|
|
148 <data format="tabular" name="chg_ctob" label="${tool.name} on ${on_string}: CHG complementary to bottom strand">
|
|
149 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
|
|
150 </data>
|
|
151 <data format="tabular" name="chh_ctob" label="${tool.name} on ${on_string}: CHH complementary to bottom strand">
|
|
152 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
|
|
153 </data>
|
|
154
|
|
155 <!-- Context-dependent methylation output files (comprehensive option) -->
|
|
156 <data format="tabular" name="cpg_context" label="${tool.name} on ${on_string}: CpG context dependent">
|
|
157 <filter> ( compress == False and comprehensive) </filter>
|
|
158 </data>
|
|
159 <data format="tabular" name="chg_context" label="${tool.name} on ${on_string}: CHG context dependent">
|
|
160 <filter> ( compress == False and comprehensive and merge_non_CpG == False) </filter>
|
|
161 </data>
|
|
162 <data format="tabular" name="chh_context" label="${tool.name} on ${on_string}: CHH context dependent">
|
|
163 <filter> ( compress == False and comprehensive and merge_non_CpG == False) </filter>
|
|
164 </data>
|
|
165
|
|
166 <data format="tabular" name="non_cpg_context" label="${tool.name} on ${on_string}: Non CpG context dependent">
|
|
167 <filter> ( compress == False and comprehensive and merge_non_cpg) </filter>
|
|
168 </data>
|
|
169
|
|
170 <data format="tabular" name="non_cpg_context_ot" label="${tool.name} on ${on_string}: Non CpG context dependent on original top strand">
|
|
171 <filter> ( compress == False and comprehensive == False and merge_non_cpg) </filter>
|
|
172 </data>
|
|
173 <data format="tabular" name="non_cpg_context_ctot" label="${tool.name} on ${on_string}: Non CpG context dependent on complementary to top strand">
|
|
174 <filter> ( compress == False and comprehensive == False and merge_non_cpg) </filter>
|
|
175 </data>
|
|
176 <data format="tabular" name="non_cpg_context_ob" label="${tool.name} on ${on_string}: Non CpG context dependent on bottom top strand">
|
|
177 <filter> ( compress == False and comprehensive == False and merge_non_cpg) </filter>
|
|
178 </data>
|
|
179 <data format="tabular" name="non_cpg_context_ctob" label="${tool.name} on ${on_string}: Non CpG context dependent on complementary to bottom strand">
|
|
180 <filter> ( compress == False and comprehensive == False and merge_non_cpg) </filter>
|
|
181 </data>
|
|
182
|
|
183 <data format="gzipped" name="compressed_output" label="${tool.name} on ${on_string}: Result archive.">
|
|
184 <filter> ( compress ) </filter>
|
|
185 </data>
|
|
186 </outputs>
|
|
187
|
|
188 <tests>
|
|
189 </tests>
|
|
190
|
|
191 <help>
|
4
|
192 <![CDATA[
|
0
|
193
|
|
194 **What it does**
|
|
195
|
|
196 The following is a brief description of all options to control the Bismark_
|
4
|
197 methylation extractor. The script reads in a bisulfite read alignment results file
|
0
|
198 produced by the Bismark bisulfite mapper and extracts the methylation information
|
|
199 for individual cytosines. This information is found in the methylation call field
|
|
200 which can contain the following characters:
|
|
201
|
|
202
|
|
203 - X = for methylated C in CHG context (was protected)
|
|
204 - x = for not methylated C CHG (was converted)
|
|
205 - H = for methylated C in CHH context (was protected)
|
|
206 - h = for not methylated C in CHH context (was converted)
|
|
207 - Z = for methylated C in CpG context (was protected)
|
|
208 - z = for not methylated C in CpG context (was converted)
|
|
209 - . = for any bases not involving cytosines
|
|
210
|
|
211
|
|
212 The methylation extractor outputs result files for cytosines in CpG, CHG and CHH
|
|
213 context (this distinction is actually already made in Bismark itself). As the methylation
|
|
214 information for every C analysed can produce files which easily have tens or even hundreds of
|
|
215 millions of lines, file sizes can become very large and more difficult to handle. The C
|
|
216 methylation info additionally splits cytosine methylation calls up into one of the four possible
|
|
217 strands a given bisulfite read aligned against:
|
|
218
|
|
219 - OT = original top strand
|
|
220 - CTOT = complementary to original top strand
|
|
221
|
|
222 - OB = original bottom strand
|
|
223 - CTOB = complementary to original bottom strand
|
|
224
|
|
225 Thus, by default twelve individual output files are being generated per input file (unless
|
|
226 --comprehensive is specified, see below). The output files can be imported into a genome
|
|
227 viewer, such as SeqMonk, and re-combined into a single data group if desired (in fact
|
|
228 unless the bisulfite reads were generated preserving directionality it doesn't make any
|
|
229 sense to look at the data in a strand-specific manner). Strand-specific output files can
|
|
230 optionally be skipped, in which case only three output files for CpG, CHG or CHH context
|
|
231 will be generated. For both the strand-specific and comprehensive outputs there is also
|
|
232 the option to merge both non-CpG contexts (CHG and CHH) into one single non-CpG context.
|
|
233
|
|
234
|
|
235 .. _Bismark: http://www.bioinformatics.babraham.ac.uk/projects/bismark/
|
|
236
|
|
237
|
|
238 It is developed by Krueger F and Andrews SR. at the Babraham Institute. Krueger F, Andrews SR. (2011) Bismark: a flexible aligner and methylation caller for Bisulfite-Seq applications. Bioinformatics, 27, 1571-2.
|
|
239
|
|
240 -------
|
|
241
|
|
242 **Bismark settings**
|
|
243
|
|
244 All of the options have a default value. You can change any of them. If any Bismark function is missing please contact the tool author or your Galaxy admin.
|
|
245
|
|
246 ------
|
|
247
|
|
248 **Outputs**
|
|
249
|
|
250 The output files are in the following format (tab delimited)::
|
|
251
|
|
252
|
|
253 Column Description
|
|
254 -------- --------------------------------------------------------
|
|
255 1 seq-ID
|
|
256 2 strand
|
|
257 3 chromosome
|
|
258 4 position
|
|
259 5 methylation call
|
|
260
|
|
261
|
|
262 * Methylated cytosines receive a '+' orientation,
|
|
263 * Unmethylated cytosines receive a '-' orientation.
|
|
264
|
|
265 ------
|
|
266
|
|
267 **OPTIONS**
|
|
268
|
|
269 Input::
|
|
270
|
|
271 -s/--single-end Input file(s) are Bismark result file(s) generated from single-end
|
|
272 read data. Specifying either --single-end or --paired-end is
|
|
273 mandatory.
|
|
274
|
|
275 -p/--paired-end Input file(s) are Bismark result file(s) generated from paired-end
|
|
276 read data. Specifying either --paired-end or --single-end is
|
|
277 mandatory.
|
|
278
|
|
279 --no_overlap For paired-end reads it is theoretically possible that read_1 and
|
|
280 read_2 overlap. This option avoids scoring overlapping methylation
|
|
281 calls twice. Whilst this removes a bias towards more methylation calls
|
|
282 towards the center of sequenced fragments it can de facto remove
|
|
283 a good proportion of the data.
|
|
284
|
|
285 --ignore INT Ignore the first INT bp at the 5' end of each read when processing the
|
|
286 methylation call string. This can remove e.g. a restriction enzyme site
|
|
287 at the start of each read.
|
|
288
|
|
289 Output::
|
|
290
|
4
|
291 --comprehensive Specifying this option will merge all four possible strand-specific
|
|
292 methylation info into context-dependent output files. The default
|
0
|
293 contexts are:
|
|
294 - CpG context
|
|
295 - CHG context
|
|
296 - CHH context
|
|
297
|
|
298 --merge_non_CpG This will produce two output files (in --comprehensive mode) or eight
|
|
299 strand-specific output files (default) for Cs in
|
|
300 - CpG context
|
|
301 - non-CpG context
|
|
302
|
|
303 --report Prints out a short methylation summary as well as the paramaters used to run
|
|
304 this script.
|
|
305
|
|
306
|
4
|
307 ]]>
|
0
|
308 </help>
|
|
309 </tool>
|