comparison MethylDackel.xml @ 3:f112bf3dd5ff draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/pileometh commit 5468fb89708be679e4e580074734e671f60a9648
author bgruening
date Mon, 13 Feb 2017 22:44:01 -0500
parents
children 906db57d5d65
comparison
equal deleted inserted replaced
2:cda51d96a9bc 3:f112bf3dd5ff
1 <tool id="pileometh" name="MethylDackel" version="0.2.1">
2 <description>A tool for processing bisulfite sequencing alignments</description>
3 <requirements>
4 <requirement type="package" version="0.2.1">methyldackel</requirement>
5 </requirements>
6 <stdio>
7 <!-- Anything other than zero is an error -->
8 <exit_code range="1:" />
9 <exit_code range=":-1" />
10 <!-- In case the return code has not been set propery check stderr too -->
11 <regex match="Error:" />
12 <regex match="Exception:" />
13 </stdio>
14 <version_command><![CDATA[MethylDackel --version]]></version_command>
15 <command><![CDATA[
16 #if $reference_source.reference_source_selector == "cached":
17 ln -s $reference_source.ref_file.fields.path reference.fasta &&
18 #else:
19 ln -s $reference_source.ref_file reference.fasta &&
20 #end if
21
22 MethylDackel
23 $main_task.task
24
25 #if $main_task.task == "extract":
26 -o output
27 $main_task.mergeContext
28 #if str($main_task.OT).strip() != "":
29 --OT $main_task.OT
30 #end if
31 #if str($main_task.OB).strip() != "":
32 --OB $main_task.OB
33 #end if
34 #if str($main_task.CTOT).strip() != "":
35 --CTOT $main_task.CTOT
36 #end if
37 #if str($main_task.CTOB).strip() != "":
38 --CTOB $main_task.CTOB
39 #end if
40 #end if
41
42 #if $advanced_options.options=="yes":
43 #if $advanced_options.mbias_regionString:
44 -r $advanced_options.mbias_regionString
45 #end if
46 $advanced_options.keepDupes
47 $advanced_options.keepSingleton
48 $advanced_options.keepDiscordant
49 -q $advanced_options.min_mapq
50 -p $advanced_options.min_phred
51 -D $advanced_options.max_pbdepth
52 #if $main_task.task == "extract":
53 -d $advanced_options.min_pbdepth
54 --ignoreFlags $advanced_options.ignoreFlags
55 --requireFlags $advanced_options.requireFlags
56 $advanced_options.fraction
57 $advanced_options.counts
58 $advanced_options.methylKit
59 $advanced_options.logit
60 #if str($advanced_options.nOT).strip() != "":
61 --nOT $advanced_options.nOT
62 #end if
63 #if str($advanced_options.nOB).strip() != "":
64 --nOB $advanced_options.nOB
65 #end if
66 #if str($advanced_options.nCTOT).strip() != "":
67 --nCTOT $advanced_options.nCTOT
68 #end if
69 #if str($advanced_options.nCTOB).strip() != "":
70 --nCTOB $advanced_options.nCTOB
71 #end if
72 #end if
73 $advanced_options.CHG
74 $advanced_options.CHH
75 #end if
76
77 reference.fasta
78
79 $input_sortedAlignBAM
80
81 #if $main_task.task == "mbias":
82 out_mbias &&
83 touch out_mbias_OT.svg &&
84 touch out_mbias_OB.svg &&
85 touch out_mbias_CTOT.svg &&
86 touch out_mbias_CTOB.svg
87 #end if
88 ]]></command>
89 <inputs>
90 <conditional name="reference_source">
91 <param name="reference_source_selector" type="select" label="Load reference genome from">
92 <option value="cached">Local cache</option>
93 <option value="history">History</option>
94 </param>
95 <when value="cached">
96 <param name="ref_file" type="select" label="Using reference genome" help="Reference sequence">
97 <options from_data_table="all_fasta"/>
98 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
99 </param>
100 </when>
101 <when value="history">
102 <param name="ref_file" type="data" format="fasta" label="Use the folloing dataset as the reference sequence" help="REFERENCE_SEQUENCE; You can upload a FASTA sequence to the history and use it as reference" />
103 </when>
104 </conditional>
105
106 <param name="input_sortedAlignBAM" multiple="False" type="data" format="bam" label="sorted_alignments.bam"/>
107 <conditional name="main_task">
108 <param name="task" type="select" label="What do you want to do?" >
109 <option value="extract">Extract methylation metrics from an alignment file in BAM/CRAM format</option>
110 <option value="mbias">Determine the position-dependent methylation bias in a dataset, producing diagnostic SVG images</option>
111 </param>
112 <when value="extract">
113 <param name="mergeContext" type="boolean" checked="false" truevalue="--mergeContext" falsevalue=""
114 label="Merge per-Cytosine metrics from CpG and CHG contexts into per-CPG or per-CHG metrics" help="(--mergeContext)" />
115 <param name="OT" type="text" value="" label="Original top strand bounds (comma-separated, no spaces)"
116 help="Inclusion bounds for methylation calls from reads/pairs
117 origination from the original top strand. Suggested values can
118 be obtained from the MBias program. Each integer represents a
119 1-based position on a read. For example --OT A,B,C,D
120 translates to, 'Include calls at positions from A through B
121 on read #1 and C through D on read #2'. If a 0 is used a any
122 position then that is translated to mean start/end of the
123 alignment, as appropriate. For example, --OT 5,0,0,0 would
124 include all but the first 4 bases on read #1. Users are
125 strongly advised to consult a methylation bias plot, for
126 example by using the MBias program." />
127 <param name="OB" type="text" value="" label="Original bottom strand bounds (comma-separated, no spaces)" />
128 <param name="CTOT" type="text" value="" label="Complementary to the original bottom strand bounds (comma-separated, no spaces)" />
129 <param name="CTOB" type="text" value="" label="Complementary to the original bottom strand bounds (comma-separated, no spaces)" />
130 </when>
131 <when value="mbias"/>
132 </conditional>
133 <conditional name="advanced_options">
134 <param name="options" type="select" label="Advanced options">
135 <option value="">Hide advanced options</option>
136 <option value="yes">Display advanced options</option>
137 </param>
138 <when value="yes">
139 <param name="mbias_regionString" type="text" value="" label="Region string in which to extract methylation"/>
140 <param name="keepDupes" type="boolean" checked="false" truevalue="--keepDupes" falsevalue=""
141 label="By default, any alignment marked as a duplicate is ignored. This option causes them to be incorporated" />
142 <param name="keepSingleton" type="boolean" checked="false" truevalue="--keepSingleton" falsevalue=""
143 label="By default, if only one read in a pair aligns (a singleton) then it's ignored." />
144 <param name="keepDiscordant" type="boolean" checked="false" truevalue="--keepDiscordant" falsevalue=""
145 label="By default, paired-end alignments with the properly-paired bit unset in the FLAG field are ignored. Note that the definition of concordant and discordant is based on your aligner settings." />
146 <param name="min_mapq" type="integer" value="10" label="Minimum MAPQ threshold to include an alignment (default 10)"/>
147 <param name="min_phred" type="integer" value="5" label="Minimum Phred threshold to include a base (default 5). This must be >0."/>
148 <param name="max_pbdepth" type="integer" value="2000" label="Maximum per-base depth (default 2000)"/>
149 <param name="min_pbdepth" type="integer" value="1" min="1" label="Minimum per-base depth"
150 help="Minimum per-base dpeth for reporting output. If you use --mergeContext (above), then this applies to the merged CpG/CHG (default 1). (-d)" />
151
152 <param name="ignoreFlags" type="integer" value="3840" label="Ignore certain alignments"
153 help="By deault, any alignment marked as secondary (bit 0x100), failing QC (bit 0x200), a PCR/optical duplicate (0x400) or supplemental (0x800) is ignored. This equates to a value of 0xF00 or 3840 in decimal. If you would like to change that, you can specify a new value here." />
154 <param name="requireFlags" type="integer" value="0" label="Require alignment flags"
155 help="Require each alignment to have all bits in this value present, or else the alignment is ignored. This is equivalent to the -f option in samtools. The default is 0, which includes all alignments." />
156
157 <param name="fraction" type="boolean" checked="false" truevalue="--fraction" falsevalue=""
158 label="Extract fractional methylation (only) at each position. This is mutually exclusive with --counts, --logit, and --methylKit" />
159 <param name="counts" type="boolean" checked="false" truevalue="--counts" falsevalue=""
160 label="Extract base counts (only) at each position. This is mutually exclusive with --fraction, --logit, and --methylKit" />
161 <param name="logit" type="boolean" checked="false" truevalue="--logit" falsevalue=""
162 label="Extract logit(M/(M+U)) (only) at each position. This is mutually exclusive with --counts, --fraction, and --methylKit" />
163 <param name="methylKit" type="boolean" checked="false" truevalue="--methylKit" falsevalue=""
164 label="Output in the format required by methylKit. This is mutually exclusive with --counts, --fraction, and --logit" />
165
166 <param name="CHG" type="boolean" checked="false" truevalue="--CHG" falsevalue=""
167 label="Additional output file with CHG methylation metrics" />
168 <param name="CHH" type="boolean" checked="false" truevalue="--CHH" falsevalue=""
169 label="Additional output file with CHH methylation metrics" />
170
171 <param name="nOT" type="text" value="" label="Original top strand absolute bounds (comma-separated, no spaces)"
172 help="Like --OT, but always exclude INT bases from a given end
173 from inclusion,regardless of the length of an alignment. This
174 is useful in cases where reads may have already been trimmed
175 to different lengths, but still none-the-less contain a
176 certain length bias at one or more ends." />
177 <param name="nOB" type="text" value="" label="Original bottom strand absolute bounds (comma-separated, no spaces)" />
178 <param name="nCTOT" type="text" value="" label="Complementary to the original bottom strand absolute bounds (comma-separated, no spaces)" />
179 <param name="nCTOB" type="text" value="" label="Complementary to the original bottom strand absolute bounds (comma-separated, no spaces)" />
180 </when>
181 <when value=""/>
182 </conditional>
183 </inputs>
184 <outputs>
185 <data name="outFileExtractCpG" format="bedgraph" from_work_dir="output_CpG.bedGraph"
186 label="${tool.name} on ${on_string}">
187 <filter>main_task['task'] == "extract"</filter>
188 <filter>not advanced_options['logit']</filter>
189 <filter>not advanced_options['methylKit']</filter>
190 <filter>not advanced_options['counts']</filter>
191 <filter>not advanced_options['fraction']</filter>
192 </data>
193 <data name="outFileExtractCpGLogit" format="bedgraph" from_work_dir="output_CpG.logit.bedGraph"
194 label="${tool.name} on ${on_string} (logit)">
195 <filter>main_task['task'] == "extract" and advanced_options['logit']</filter>
196 </data>
197 <data name="outFileExtractCpGMethylKit" format="text" from_work_dir="output_CpG.methylKit"
198 label="${tool.name} on ${on_string} (MethylKit)">
199 <filter>main_task['task'] == "extract" and advanced_options['methylKit']</filter>
200 </data>
201 <data name="outFileExtractCpGCounts" format="bedgraph" from_work_dir="output_CpG.counts.bedGraph"
202 label="${tool.name} on ${on_string} (counts)">
203 <filter>main_task['task'] == "extract" and advanced_options['counts']</filter>
204 </data>
205 <data name="outFileExtractCpGFraction" format="bedgraph" from_work_dir="output_CpG.meth.bedGraph"
206 label="${tool.name} on ${on_string} (fraction)">
207 <filter>main_task['task'] == "extract" and advanced_options['fraction']</filter>
208 </data>
209 <data name="outFileExtractCHG" format="bedgraph" from_work_dir="output_CHG.bedGraph"
210 label="${tool.name} on ${on_string} (CHG)">
211 <filter>main_task['task'] == 'extract'</filter>
212 <filter>advanced_options['options'] == "yes"</filter>
213 <filter>advanced_options['CHG']</filter>
214 <filter>not advanced_options['logit']</filter>
215 <filter>not advanced_options['methylKit']</filter>
216 <filter>not advanced_options['counts']</filter>
217 <filter>not advanced_options['fraction']</filter>
218 </data>
219 <data name="outFileExtractCHGLogit" format="bedgraph" from_work_dir="output_CHG.logit.bedGraph"
220 label="${tool.name} on ${on_string} (CHG logit)">
221 <filter>main_task['task'] == "extract" and advanced_options['logit']</filter>
222 <filter>advanced_options['options'] == "yes"</filter>
223 <filter>advanced_options['CHG']</filter>
224 </data>
225 <data name="outFileExtractCHGMethylKit" format="text" from_work_dir="output_CHG.methylKit"
226 label="${tool.name} on ${on_string} (CHG MethylKit)">
227 <filter>main_task['task'] == "extract" and advanced_options['methylKit']</filter>
228 <filter>advanced_options['options'] == "yes"</filter>
229 <filter>advanced_options['CHG']</filter>
230 </data>
231 <data name="outFileExtractCHGCounts" format="bedgraph" from_work_dir="output_CHG.counts.bedGraph"
232 label="${tool.name} on ${on_string} (CHG counts)">
233 <filter>main_task['task'] == "extract" and advanced_options['counts']</filter>
234 <filter>advanced_options['options'] == "yes"</filter>
235 <filter>advanced_options['CHG']</filter>
236 </data>
237 <data name="outFileExtractCHGFraction" format="bedgraph" from_work_dir="output_CHG.meth.bedGraph"
238 label="${tool.name} on ${on_string} (CHG fraction)">
239 <filter>main_task['task'] == "extract" and advanced_options['fraction']</filter>
240 <filter>advanced_options['options'] == "yes"</filter>
241 <filter>advanced_options['CHG']</filter>
242 </data>
243 <data name="outFileExtractCHH" format="bedgraph" from_work_dir="output_CHH.bedGraph"
244 label="${tool.name} on ${on_string} (CHH)">
245 <filter>main_task['task'] == 'extract'</filter>
246 <filter>advanced_options['options'] == "yes"</filter>
247 <filter>advanced_options['CHH']</filter>
248 <filter>not advanced_options['logit']</filter>
249 <filter>not advanced_options['methylKit']</filter>
250 <filter>not advanced_options['counts']</filter>
251 <filter>not advanced_options['fraction']</filter>
252 </data>
253 <data name="outFileExtractCHHLogit" format="bedgraph" from_work_dir="output_CHH.logit.bedGraph"
254 label="${tool.name} on ${on_string} (CHH logit)">
255 <filter>main_task['task'] == "extract" and advanced_options['logit']</filter>
256 <filter>advanced_options['options'] == "yes"</filter>
257 <filter>advanced_options['CHH']</filter>
258 </data>
259 <data name="outFileExtractCHHMethylKit" format="text" from_work_dir="output_CHH.methylKit"
260 label="${tool.name} on ${on_string} (CHH MethylKit)">
261 <filter>main_task['task'] == "extract" and advanced_options['methylKit']</filter>
262 <filter>advanced_options['options'] == "yes"</filter>
263 <filter>advanced_options['CHH']</filter>
264 </data>
265 <data name="outFileExtractCHHCounts" format="bedgraph" from_work_dir="output_CHH.counts.bedGraph"
266 label="${tool.name} on ${on_string} (CHH counts)">
267 <filter>main_task['task'] == "extract" and advanced_options['counts']</filter>
268 <filter>advanced_options['options'] == "yes"</filter>
269 <filter>advanced_options['CHH']</filter>
270 </data>
271 <data name="outFileExtractCHHFraction" format="bedgraph" from_work_dir="output_CHH.meth.bedGraph"
272 label="${tool.name} on ${on_string} (CHH fraction)">
273 <filter>main_task['task'] == "extract" and advanced_options['fraction']</filter>
274 <filter>advanced_options['options'] == "yes"</filter>
275 <filter>advanced_options['CHH']</filter>
276 </data>
277 <data name="outFileMbiasCpGOT" format="svg" from_work_dir="out_mbias_OT.svg"
278 label="${tool.name} on ${on_string} (methylation bias, original top strand)">
279 <filter>main_task['task'] == 'mbias'</filter>
280 </data>
281 <data name="outFileMbiasCpGOB" format="svg" from_work_dir="out_mbias_OB.svg"
282 label="${tool.name} on ${on_string} (methylation bias, original bottom strand)">
283 <filter>main_task['task'] == 'mbias'</filter>
284 </data>
285 <data name="outFileMbiasCpGCTOT" format="svg" from_work_dir="out_mbias_CTOT.svg"
286 label="${tool.name} on ${on_string} (methylation bias, complementary to the original top strand)">
287 <filter>main_task['task'] == 'mbias'</filter>
288 </data>
289 <data name="outFileMbiasCpGCTOB" format="svg" from_work_dir="out_mbias_CTOB.svg"
290 label="${tool.name} on ${on_string} (methylation bias, complementary to the original bottom strand)">
291 <filter>main_task['task'] == 'mbias'</filter>
292 </data>
293 </outputs>
294 <tests>
295 <test>
296 <param name="task" value="extract" />
297 <param name="min_mapq" value="2" />
298 <param name="reference_source_selector" value="history" />
299 <param name="ref_file" value="cg100.fa" ftype="fasta" />
300 <param name="input_sortedAlignBAM" value="cg_aln.bam" ftype="bam"/>
301 <param name="mergeContext" value="false"/>
302 <param name="options" value="yes"/>
303 <output name="outFileExtractCpG" file="test_1.bedGraph" ftype="bedgraph" compare="diff"/>
304 </test>
305 <test>
306 <param name="task" value="mbias" />
307 <param name="min_mapq" value="2" />
308 <param name="options" value="yes"/>
309 <param name="reference_source_selector" value="history" />
310 <param name="ref_file" value="cg100.fa" ftype="fasta" />
311 <param name="input_sortedAlignBAM" value="cg_aln.bam" ftype="bam"/>
312 <output name="outFileMbiasCpG" file="test_2_output.svg" ftype="svg" compare="diff"/>
313 </test>
314 <test>
315 <param name="task" value="extract" />
316 <param name="min_mapq" value="2" />
317 <param name="options" value="yes"/>
318 <param name="CHH" value="True"/>
319 <param name="reference_source_selector" value="history" />
320 <param name="ref_file" value="cg100.fa" ftype="fasta" />
321 <param name="input_sortedAlignBAM" value="cg_aln.bam" ftype="bam"/>
322 <param name="mergeContext" value="false"/>
323 <param name="options" value="yes"/>
324 <output name="outFileExtractCpG" file="test_1.bedGraph" ftype="bedgraph" compare="diff"/>
325 <output name="outFileExtractCHH" file="test_2.bedGraph" ftype="bedgraph" compare="diff"/>
326 </test>
327 <test>
328 <param name="task" value="extract" />
329 <param name="min_mapq" value="2" />
330 <param name="reference_source_selector" value="history" />
331 <param name="ref_file" value="cg100.fa" ftype="fasta" />
332 <param name="input_sortedAlignBAM" value="cg_aln.bam" ftype="bam"/>
333 <param name="options" value="yes"/>
334 <param name="fraction" value="true" />
335 <output name="outFileExtractCpGFraction" file="test_3.bedGraph" ftype="bedgraph" compare="diff"/>
336 </test>
337 <test>
338 <param name="task" value="extract" />
339 <param name="min_mapq" value="2" />
340 <param name="reference_source_selector" value="history" />
341 <param name="ref_file" value="cg100.fa" ftype="fasta" />
342 <param name="input_sortedAlignBAM" value="cg_aln.bam" ftype="bam"/>
343 <param name="options" value="yes"/>
344 <param name="logit" value="true" />
345 <output name="outFileExtractCpGLogit" file="test_4.bedGraph" ftype="bedgraph" compare="diff"/>
346 </test>
347 <test>
348 <param name="task" value="extract" />
349 <param name="min_mapq" value="2" />
350 <param name="reference_source_selector" value="history" />
351 <param name="ref_file" value="cg100.fa" ftype="fasta" />
352 <param name="input_sortedAlignBAM" value="cg_aln.bam" ftype="bam"/>
353 <param name="options" value="yes"/>
354 <param name="methylKit" value="true" />
355 <output name="outFileExtractCpGMethylKit" file="test_5.methylKit" ftype="text" compare="diff"/>
356 </test>
357 </tests>
358 <help><![CDATA[
359
360 .. image:: $PATH_TO_IMAGES/MethylDackelLogo.png
361 :align: left
362
363 **What it does**
364
365 MethylDackel (formerly named PileOMeth, which was a temporary name derived due to it using a PILEup to extract METHylation metrics) will process a coordinate-sorted and indexed BAM or CRAM file containing some form of BS-seq alignments and extract per-base methylation metrics from them. MethylDackel requires an indexed fasta file containing the reference genome as well.
366
367 By default, MethylDackel will only calculate metrics for Cytosines in a CpG context, but metrics for those in CHG and CHH contexts are supported as well.
368
369 **Methylation context**
370
371 MethylDackel groups all Cytosines into one of three sequence contexts: CpG, CHG, and CHH. Here, H is the IUPAC ambiguity code for any nucleotide other than G. If an N is encountered in the reference sequence, then the context will be assigned to CHG or CHH, as appropriate (e.g., CNG would be categorized as in a CHG context and CNC as in a CHH context). If a Cytosine is close enough to the end of a chromosome/contig such that its context can't be inferred, then it is categorized as CHH (e.g., a Cytosine as the last base of a chromosome is considered as being in a CHH context).
372
373
374 **Output information**
375
376 If no methylation can be found, the output will be empty.
377
378 Otherwise a variant of bedGraph that's similar to the "coverage" file is produced. In short, each line consists of 6 tab separated columns:
379
380 1. The chromosome/contig/scaffold name
381 2. The start coordinate
382 3. The end coordinate
383 4. The methylation percentage rounded to an integer
384 5. The number of alignments/pairs reporting methylated bases
385 6. The number of alignments/pairs reporting unmethylated bases
386
387 All coordinates are 0-based half open, which conforms to the bedGraph definition. When paired-end reads are aligned, it can often occur that their alignments overlap. In such cases, MethylDackel will not count both reads of the pair in its output, as doing so would lead to incorrect downstream statistical results.
388
389 An example of the output is below::
390
391 #track type="bedGraph" description="SRR1182519.sorted CpG methylation levels"
392 #1 25115 25116 100 3 0
393 #1 29336 29337 50 1 1
394
395 Note the header line, which starts with "track". The "description" field is used as a label in programs such as IGV. Each of the subsequent lines describe single Cytosines, the 25116th and 29337th base on chromosome 1, respectively. The first position has 3 alignments (or pairs of alignments) indicating methylation and 0 indicating unmethylation (100% methylation) and the second position has 1 alignment each supporting methylation and unmethylation (50% methylation).
396
397 **Per-CpG/CHG metrics**
398
399 In many circumstances, it's desireable for metrics from individual Cytosines in a CpG to be merged, producing per-CpG metrics rather than per-Cytosine metrics. This can be accomplished with the **Merge per-Cytosine** parameter. If this is used, then this output::
400
401 #track type="bedGraph" description="SRR1182519.sorted CpG methylation levels"
402 #1 25114 25115 100 2 1
403 #1 25115 25116 100 3 0
404
405 is changed to this::
406
407 #track type="bedGraph" description="SRR1182519.sorted merged CpG methylation levels"
408 #1 25114 25116 100 5 1
409
410 This also works for CHG-level metrics. If bedGraph files containing per-Cytosine metrics already exist, they can be converted to instead contain per-CpG/CHG metrics with MethylDackel mergeContext.
411
412 **Methylation bias plotting and correction**
413
414 In an ideal experiment, we expect that the probability of observing a methylated C is constant across the length of any given read. In practice, however, there are often increases/decreases in observed methylation rate at the ends of reads and/or more global changes. These are termed methylation bias and including such regions in the extracted methylation metrics will result in noisier and less accurate data. For this reason, users are strongly encouraged to make a methylation bias plot.
415
416 That command will create a methylation bias (mbias for short) plot for each of the strands for which there are valid alignments.
417 The resulting mbias graphs are in SVG format and can be viewed in most modern web browsers:
418
419 .. image:: $PATH_TO_IMAGES/example.svg
420
421
422 If you have paired-end data, both reads in the pair will be shown separately, as is the case above. The program will suggest regions for inclusion ("--OT 2,0,0,98" above) and mark them on the plot, if applicable. The format of this output is described in MethylDackel extract -h. These suggestions should not be accepted blindly; users are strongly encouraged to have a look for themselves and tweak the actual bounds as appropriate. The lines indicate the average methylation percentage at a given position and the shaded regions the 99.9% confidence interval around it. This is useful in gauging how many methylation calls a given position has relative to its neighbors. Note the spike in methylation at the end of read #2 and the corresponding dip at the beginning of read #1. This is common and these regions can be ignored with the suggested trimming bounds. Note also that the numbers refer to the first and last base that should be included during methylation extraction, not the last and first base to ignore!.
423
424 **Excluding low-coverage regions**
425
426 If your downstream analysis requires an absolute minimum coverage (here, defined as the number of methylation calls kept after filtering for MAPQ, phred score, etc.), you can use the `--minDepth` option to achieve this. By default, `MethylDackel extract` will output all methylation metrics as long as the coverage is at least 1. If you use `--minDepth 10`, then only sites covered at least 10x will be output. This works in conjunction with the `--mergeContext` option, above. So if you request per-CpG context output (i.e., with `--mergeContext`) and `--minDepth 10` then only CpGs with a minimum coverage of 10 will be output.
427
428 **Logit, fraction, and counts only output**
429
430 The standard output described above can be modified if you supply the `--fraction`, `--counts`, or `--logit` options to `MethylDackel extract`.
431
432 The `--fraction` option essentially produces the first 4 columns of the standard output described above. The only other difference is that the range of the 4th column is now between 0 and 1, instead of 0 and 100. Instead of producing a file ending simply in `.bedGraph`, one ending in `.meth.bedGraph` will instead be produced.
433
434 The `--counts` option produces the first three columns of the standard output followed by a column of total coverage counts. This last column is equivalent to the sum of the 5th and 6th columns of the standard output. The resulting file ends in `.counts.bedGraph` rather than simply `.bedGraph`.
435
436 The `--logit` option produces the first three columns of the standard output followed by the logit transformed methylation fraction. The logit transformation is log(Methylation fraction/(1-Methylation fraction)). Note that log uses base e. Logit transformed methylation values range between +/- infinity, rather than [0,1]. The resulting file ends in `.logit.bedGraph` rather than simply `.bedGraph`.
437
438 Note that these options may be combined with `--mergeContext`. However, `MethylDackel mergeContext` can not be used after the fact to combine these.
439
440 **methylKit-compatible output**
441
442 methylKit has its own format, which can be produced with the `--methylKit` option. Merging Cs into CpGs or CHGs is forbidden in this format. Likewise, this option is mutually exclusive with `--logit` et al.
443
444
445 -----
446
447 **MethylDackel** is a Free and Open Source Software, see more details on the MethylDackel_ Website.
448
449 .. _MethylDackel: https://github.com/dpryan79/MethylDackel
450 ]]></help>
451 <citations>
452 </citations>
453 </tool>