comparison moabs.xml @ 0:26d7ec4af119 draft

"planemo upload for repository https://github.com/sunnyisgalaxy/moabs commit fca680a439f168971afc9944ccbbdd9b3b65c845"
author iuc
date Fri, 06 Sep 2019 09:54:27 -0400
parents
children 8c8cc81b34cd
comparison
equal deleted inserted replaced
-1:000000000000 0:26d7ec4af119
1 <tool id="moabs" name="MOABS" profile="16.04" version="@VERSION@">
2 <description>MOdel based Analysis of Bisulfite Sequencing data</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <command detect_errors="exit_code">
8 <![CDATA[
9 #if str( $mcomp_advanced.doComp.compare_selector ) == "0":
10 cp -f '$mcomp_advanced.doComp.compFile' comp.g1.vs.g2.txt &&
11 #end if
12 moabs -v 1 --def MMAP.p="\${GALAXY_SLOTS:-4}" --def MCALL.p="\${GALAXY_SLOTS:-4}" --def MCOMP.p="\${GALAXY_SLOTS:-4}" --cf '$cfg_file' &&
13 #if "1" in $output_selector:
14 cp -f dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr '$output1' &&
15 #end if
16 #if "2" in $output_selector:
17 cp -f comp.g1.vs.g2.txt '$output2' &&
18 #end if
19 echo Done
20 ]]>
21 </command>
22 <configfiles>
23 <configfile name="cfg_file">
24 #if str( $reference_source.reference_source_selector ) == "history":
25 #set $reference_fasta_filename = $reference_source.ref_file
26 #else:
27 #set $reference_fasta_filename = $reference_source.ref_file.fields.path
28 #end if
29 [INPUT]
30 #for $i, $s in enumerate( $g1_fastq )
31 #if str( $s.fastq_input.fastq_input_selector ) == "paired":
32 s1_r${i+1}_1='$s.fastq_input.fastq_input1'
33 s1_r${i+1}_2='$s.fastq_input.fastq_input2'
34 #elif str( $s.fastq_input.fastq_input_selector ) == "paired_collection":
35 s1_r${i+1}_1='$s.fastq_input.fastq_input1.forward'
36 s1_r${i+1}_2='$s.fastq_input.fastq_input1.reverse'
37 #else:
38 s1_r${i+1}='$s.fastq_input.fastq_input1'
39 #end if
40 #end for
41
42 #for $i, $s in enumerate( $g2_fastq )
43 #if str( $s.fastq_input.fastq_input_selector ) == "paired":
44 s2_r${i+1}_1='$s.fastq_input.fastq_input1'
45 s2_r${i+1}_2='$s.fastq_input.fastq_input2'
46 #elif str( $s.fastq_input.fastq_input_selector ) == "paired_collection":
47 s2_r${i+1}_1='$s.fastq_input.fastq_input1.forward'
48 s2_r${i+1}_2='$s.fastq_input.fastq_input1.reverse'
49 #else:
50 s2_r${i+1}='$s.fastq_input.fastq_input1'
51 #end if
52 #end for
53
54 [TASK]
55 Program=MMAP
56 Label=g1,g2
57 Parallel=NONE
58
59 [MMAP]
60 Path=bsmap
61 d='${reference_fasta_filename}'
62 #if str( $bsmap_advanced.bsmap_mismatch.bsmap_mismatch_selector ) != "0":
63 v=$bsmap_advanced.bsmap_mismatch.v
64 #end if
65 n=$bsmap_advanced.n
66 r=$bsmap_advanced.r
67 R=''
68
69 [MCALL]
70 Path=mcall
71 r='${reference_fasta_filename}'
72
73 [MCOMP]
74 Path=mcomp
75 reference='${reference_fasta_filename}'
76 doComp=$mcomp_advanced.doComp.compare_selector
77 </configfile>
78 </configfiles>
79
80 <inputs>
81 <conditional name="reference_source">
82 <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a cache FASTA?" help="Cached FASTA">
83 <option value="cached">Use a cached genome FASTA</option>
84 <option value="history">Use a genome FASTA from history</option>
85 </param>
86 <when value="cached">
87 <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
88 <options from_data_table="all_fasta">
89 <filter type="sort_by" column="2" />
90 <validator type="no_options" message="No genome FASTA are available" />
91 </options>
92 </param>
93 </when>
94 <when value="history">
95 <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" />
96 </when>
97 </conditional>
98 <repeat name="g1_fastq" title="Group1: fastq files" min="1">
99 <conditional name="fastq_input">
100 <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
101 <option value="single">Single</option>
102 <option value="paired">Paired</option>
103 <option value="paired_collection">Paired Collection</option>
104 </param>
105 <when value="paired">
106 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select first set of reads" help="Specify dataset with forward reads"/>
107 <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz" label="Select second set of reads" help="Specify dataset with reverse reads"/>
108 </when>
109 <when value="single">
110 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select fastq dataset" help="Specify dataset with single reads"/>
111 </when>
112 <when value="paired_collection">
113 <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
114 </when>
115 </conditional>
116 </repeat>
117 <repeat name="g2_fastq" title="Group2: fastq files" min="1">
118 <conditional name="fastq_input">
119 <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
120 <option value="single">Single</option>
121 <option value="paired">Paired</option>
122 <option value="paired_collection">Paired Collection</option>
123 </param>
124 <when value="paired">
125 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select first set of reads" help="Specify dataset with forward reads"/>
126 <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz" label="Select second set of reads" help="Specify dataset with reverse reads"/>
127 </when>
128 <when value="single">
129 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select fastq dataset" help="Specify dataset with single reads"/>
130 </when>
131 <when value="paired_collection">
132 <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
133 </when>
134 </conditional>
135 </repeat>
136 <section name="bsmap_advanced" title="Advanced options for BSMAP" expanded="False">
137 <conditional name="bsmap_mismatch">
138 <param name="bsmap_mismatch_selector" type="select" label="Set the mismatch rate or number?" help="">
139 <option value="0">Do not set</option>
140 <option value="1">Set the mismatch rate</option>
141 <option value="2">Set the mismatch number</option>
142 </param>
143 <when value="1">
144 <param argument="-v" type="float" value="0.08" min="0" max="1" label="Mismatch rate" help="The mismatch rate w.r.t to the read length"/>
145 </when>
146 <when value="2">
147 <param argument="-v" type="integer" value="3" min="0" label="Mismatch number" help="The maximum number of mismatches allowed on a read"/>
148 </when>
149 </conditional>
150 <param argument="-n" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Mapping to four strands?" help="Yes: map SE or PE reads to all 4 strands, i.e. ++, +-, -+, --; No: only map to 2 forward strands, i.e. BSW(++) and BSC(-+)"/>
151 <param argument="-r" type="select" label="How to report repeat hits" help="0=none(unique hit/pair); 1=random one; 2=all(slow)">
152 <option value="0" selected="true">0</option>
153 <option value="1">1</option>
154 <option value="2">2</option>
155 </param>
156 </section>
157 <section name="mcomp_advanced" title="Advanced options for MCOMP" expanded="False">
158 <conditional name="doComp">
159 <param name="compare_selector" type="select" label="Run the comparison or not" help="Yes: compare; No: do not compare, using the comparison result by `-c`">
160 <option value="1">Yes</option>
161 <option value="0">No</option>
162 </param>
163 <when value="0">
164 <param argument="-c" name="compFile" type="data" format="txt" label="Input comparison results" help="Previously generated comparison file from history"/>
165 </when>
166 </conditional>
167 </section>
168 <param name="output_selector" type="select" multiple="true" optional="true" label="Select output files" help="">
169 <option value="1"> dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr </option>
170 <option value="2"> comp.g1.vs.g2.txt </option>
171 <option value="3"> BAM files </option>
172 <option value="4"> Methylation calling BED files </option>
173 </param>
174 </inputs>
175 <outputs>
176 <data name="output1" format="interval" label="${tool.name} on ${on_string} : dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr">
177 <filter> "1" in output_selector </filter>
178 </data>
179 <data name="output2" format="interval" label="${tool.name} on ${on_string} : comp.g1.vs.g2.txt">
180 <filter> "2" in output_selector </filter>
181 </data>
182 <collection name="output_collection_bam" type="list" label="BAM files">
183 <filter> "3" in output_selector </filter>
184 <discover_datasets pattern="(?P&lt;designation&gt;.+\.bam$)" ext='bam'/>
185 </collection>
186 <collection name="output_collection_bed" type="list" label="Methylation calling BED files">
187 <filter> "4" in output_selector </filter>
188 <discover_datasets pattern="(?P&lt;designation&gt;g[12]\.G\.bed$)" ext='interval'/>
189 </collection>
190 </outputs>
191 <tests>
192 <test>
193 <!-- test single-end reads -->
194 <param name="reference_source_selector" value="history"/>
195 <param name="ref_file" ftype="fasta" value="chr11.fa"/>
196 <repeat name="g1_fastq">
197 <conditional name="fastq_input">
198 <param name="fastq_input_selector" value="single"/>
199 <param name="fastq_input1" value="WTPE1.fastq.gz"/>
200 </conditional>
201 </repeat>
202 <repeat name="g1_fastq">
203 <conditional name="fastq_input">
204 <param name="fastq_input_selector" value="single"/>
205 <param name="fastq_input1" value="WTPE2.fastq.gz"/>
206 </conditional>
207 </repeat>
208 <repeat name="g2_fastq">
209 <conditional name="fastq_input">
210 <param name="fastq_input_selector" value="single"/>
211 <param name="fastq_input1" value="TKO2PE1.fastq.gz"/>
212 </conditional>
213 </repeat>
214 <repeat name="g2_fastq">
215 <conditional name="fastq_input">
216 <param name="fastq_input_selector" value="single"/>
217 <param name="fastq_input1" value="TKO2PE2.fastq.gz"/>
218 </conditional>
219 </repeat>
220 <conditional name="doComp">
221 <param name="compare_selector" value="0"/>
222 <param name="compFile" value="SE_comp.g1.vs.g2.txt"/>
223 </conditional>
224 <!--
225 <conditional name="doComp">
226 <param name="compare_selector" value="1"/>
227 </conditional>
228 -->
229 <param name="output_selector" value="1,2,3,4"/>
230 <output name="output1" file="SE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/>
231 <output name="output2" file="SE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
232 <output_collection name="output_collection_bam" count="4">
233 <element name="g1_r1.bam" file="SE_g1_r1.bam" compare="sim_size"/>
234 <element name="g1_r2.bam" file="SE_g1_r2.bam" compare="sim_size"/>
235 <element name="g2_r1.bam" file="SE_g2_r1.bam" compare="sim_size"/>
236 <element name="g2_r2.bam" file="SE_g2_r2.bam" compare="sim_size"/>
237 </output_collection>
238 <output_collection name="output_collection_bed" count="2">
239 <element name="g1.G.bed" file="SE_g1.G.bed" ftype="interval" lines_diff="1"/>
240 <element name="g2.G.bed" file="SE_g2.G.bed" ftype="interval" lines_diff="1"/>
241 </output_collection>
242 </test>
243 <test>
244 <!-- test paired-end reads -->
245 <param name="reference_source_selector" value="history"/>
246 <param name="ref_file" ftype="fasta" value="seg.fa"/>
247 <repeat name="g1_fastq">
248 <conditional name="fastq_input">
249 <param name="fastq_input_selector" value="paired"/>
250 <param name="fastq_input1" value="6_all_1.fq.gz"/>
251 <param name="fastq_input2" value="6_all_2.fq.gz"/>
252 </conditional>
253 </repeat>
254 <repeat name="g2_fastq">
255 <conditional name="fastq_input">
256 <param name="fastq_input_selector" value="paired"/>
257 <param name="fastq_input1" value="8_all_1.fq.gz"/>
258 <param name="fastq_input2" value="8_all_2.fq.gz"/>
259 </conditional>
260 </repeat>
261 <conditional name="doComp">
262 <param name="compare_selector" value="0"/>
263 <param name="compFile" value="PE_comp.g1.vs.g2.txt"/>
264 </conditional>
265 <!--
266 <conditional name="doComp">
267 <param name="compare_selector" value="1"/>
268 </conditional>
269 -->
270 <param name="output_selector" value="1,2"/>
271 <output name="output1" file="PE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/>
272 <output name="output2" file="PE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
273 </test>
274 <test>
275 <!-- test paired collection -->
276 <param name="reference_source_selector" value="history"/>
277 <param name="ref_file" ftype="fasta" value="seg.fa"/>
278 <repeat name="g1_fastq">
279 <conditional name="fastq_input">
280 <param name="fastq_input_selector" value="paired_collection"/>
281 <param name="fastq_input1">
282 <collection type="paired">
283 <element name="forward" value="6_all_1.fq.gz" />
284 <element name="reverse" value="6_all_2.fq.gz" />
285 </collection>
286 </param>
287 </conditional>
288 </repeat>
289 <repeat name="g2_fastq">
290 <conditional name="fastq_input">
291 <param name="fastq_input_selector" value="paired_collection"/>
292 <param name="fastq_input1">
293 <collection type="paired">
294 <element name="forward" value="8_all_1.fq.gz" />
295 <element name="reverse" value="8_all_2.fq.gz" />
296 </collection>
297 </param>
298 </conditional>
299 </repeat>
300 <conditional name="doComp">
301 <param name="compare_selector" value="0"/>
302 <param name="compFile" value="PE_comp.g1.vs.g2.txt"/>
303 </conditional>
304 <!--
305 <conditional name="doComp">
306 <param name="compare_selector" value="1"/>
307 </conditional>
308 -->
309 <param name="output_selector" value="1,2"/>
310 <output name="output1" file="PE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/>
311 <output name="output2" file="PE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
312 </test>
313 <test>
314 <!-- test data table reference -->
315 <param name="reference_source_selector" value="cached"/>
316 <param name="ref_file" value="chr11"/>
317 <repeat name="g1_fastq">
318 <conditional name="fastq_input">
319 <param name="fastq_input_selector" value="single"/>
320 <param name="fastq_input1" value="WTPE1.fastq.gz"/>
321 </conditional>
322 </repeat>
323 <repeat name="g1_fastq">
324 <conditional name="fastq_input">
325 <param name="fastq_input_selector" value="single"/>
326 <param name="fastq_input1" value="WTPE2.fastq.gz"/>
327 </conditional>
328 </repeat>
329 <repeat name="g2_fastq">
330 <conditional name="fastq_input">
331 <param name="fastq_input_selector" value="single"/>
332 <param name="fastq_input1" value="TKO2PE1.fastq.gz"/>
333 </conditional>
334 </repeat>
335 <repeat name="g2_fastq">
336 <conditional name="fastq_input">
337 <param name="fastq_input_selector" value="single"/>
338 <param name="fastq_input1" value="TKO2PE2.fastq.gz"/>
339 </conditional>
340 </repeat>
341 <conditional name="doComp">
342 <param name="compare_selector" value="0"/>
343 <param name="compFile" value="SE_comp.g1.vs.g2.txt"/>
344 </conditional>
345 <!--
346 <conditional name="doComp">
347 <param name="compare_selector" value="1"/>
348 </conditional>
349 -->
350 <param name="output_selector" value="1,2"/>
351 <output name="output1" file="SE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/>
352 <output name="output2" file="SE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
353 </test>
354 </tests>
355 <help>
356 <![CDATA[
357 **MOABS: MOdel based Analysis of Bisulfite Sequencing data**
358
359 MOABS is a comprehensive, accurate and efficient solution for analysis of large
360 scale base-resolution DNA methylation data, bisulfite sequencing or single
361 molecule direct sequencing.
362
363 MOABS seamlessly integrates alignment, methylation calling, identification of
364 hypomethylation for one sample and differential methylation for multiple
365 samples, and other downstream analysis.
366
367 For more information, check https://github.com/sunnyisgalaxy/moabs.
368
369 -----
370
371 **Input files**
372
373 MOABS needs to input Bisulfite sequencing reads in two groups of interest, e.g.
374 KO vs WT. Each group of reads may have combined sequencing library, i.e.
375 single-end reads and/or paired-end reads. Multiple replicates can be specified in each group.
376
377 **Outputs**
378
379 Four output files can be selected to report, namely
380
381 1. **DMR region file** - the major result file
382 2. **Comparison file between two groups** - the intermediate comparion result
383 3. **BAM files** - intermediate BAM files
384 4. **Methylation BED files** - intermediate methylation BED files
385
386 -----
387
388 MOABS will detect differential methylated regions (DMRs) using the input BS-Seq
389 reads. The output file is a tab-delimited text file (not strictly a BED
390 format), representing DMRs. It has 8 columns as below.
391
392 chrom<TAB>start<TAB>end<TAB>methylation_state<TAB>CpGsites<TAB>DMCcount<TAB>nonDMCcount<TAB>hidden_state
393
394 1. **chrom** - The chromosome of the region.
395 2. **start** - The start genomic locus of the region.
396 3. **end** - The end genomic locus of the region.
397 4. **methylation_state** - The methylation state of the region, "+"/"-" representing hyper- or hypo-methylation regions.
398 5. **CpGsites** - Total number of CpG sites in the region.
399 6. **DMCcount** - The number of differential methylated CpG sites (DMCs) in the region.
400 7. **nonDMCcount** - The number of non-DMCs in the region.
401 8. **hidden_state** - The hidden state prediced by Hidden Markov Model (HMM), "1"/"-1" representing hyper- or hypo-methylation states.
402
403 For example, six DMRs are identified in the following format.
404
405 @DMRExample@
406
407 -----
408
409 The intermediate comparison file summarizes methylation ratio comparison
410 results on CpG sites. It has 19 columns as below.
411
412 1. **chrom** - The chromosome of the GpG site.
413 2. **start** - The start position of the site.
414 3. **end** - The end position of the site.
415 4. **single** - The next two columns are attributes for the single position.
416 5. **totalC_0** - Total number of Cs in the first group.
417 6. **nominalRatio_0** - Nominal methylation ratio in the first group.
418 7. **ratioCI_0** - The confidence interval of the methylation ratio in the first group.
419 8. **single** - The next two columns are attributes for the single position.
420 9. **totalC_1** - Total number of Cs in the second group.
421 10. **nominalRatio_1** - Nominal methylation ratio in the second group.
422 11. **ratioCI_1** - The confidence interval of the methylation ratio in the second group.
423 12. **pair** - The next three columns are attributes for pairs of groups.
424 13. **nominalDif_1-0** - Nominal difference of methylation ratio between group 1 and group 0.
425 14. **credibleDif_1-0** - Credible methylation difference between group 1 and group 0.
426 15. **difCI_1-0** - Difference of confidence intervals between group 1 and group 0.
427 16. **p_sim** - The next column is the simulation p-value.
428 17. **p_sim_1_v_0** - Simulation p-value between group 1 and group 0.
429 18. **p_fet** - The next column is the FET p-value.
430 19. **p_fet_1_v_0** - FET p-value between group 1 and group 0.
431
432 The comparison result file can be reused for DMR calling.
433
434 -----
435
436 BAM files are intermediate mapping results of input reads to the referene
437 genome. These BAM files can be reused in downstream methylation analysis.
438
439 -----
440
441 Methylation calling BED files are intermediate methylation calling results of
442 Cs in two groups of input reads. These methyation calling results can be easily
443 reused in downstream DMR calling and visualization. The BED file has 15 columns
444 as below.
445
446 1. **chrom** - The chromosome of the site.
447 2. **start** - The start position of the site.
448 3. **end** - The end position of the site.
449 4. **ratio** - Methylation ratio in the site
450 5. **totalC** - Total number of reads in current Cs.
451 6. **methC** - Methylated Cs.
452 7. **strand** - The strand information for prevous three columns.
453 8. **next** - The next base.
454 9. **Plus** - Next two columns are for forward strand.
455 10. **totalC** - Total number of Cs.
456 11. **methC** - Methylated Cs.
457 12. **Minus** - Next two columns are for reverse strand.
458 13. **totalC** - Total number of Cs.
459 14. **methC** - Methylated Cs.
460 15. **localSeq** - Local sequences.
461
462 ]]>
463 </help>
464 <expand macro="citations"/>
465 </tool>