comparison wtdbg.xml @ 0:6a060928f7ff draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/wtdbg commit c97be39112be9dc6118a3e12e51dcb15ed554274
author bgruening
date Tue, 12 Jun 2018 13:40:49 -0400
parents
children e100f3f4d80e
comparison
equal deleted inserted replaced
-1:000000000000 0:6a060928f7ff
1 <tool id="wtdbg" name="WTDBG" version="1.2.8.1">
2 <description>De novo assembler AND consensuser for long noisy sequences</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <version_command>wtdbg -help | grep 'Version:'</version_command>
8 <command detect_errors="exit_code"><![CDATA[
9 wtdbg
10 -t \${GALAXY_SLOTS:-4}
11 -i $i
12 -o 'dbg'
13 #if $I:
14 -I '$I'
15 #end if
16 #if $load_alignments:
17 --load-alignments '$load_alignments'
18 #end if
19
20 -k $k
21 -p $p
22 -K $K
23 -E $E
24 $F
25 -S $S
26 -X $X
27 -Y $Y
28 -x $x
29 -y $y
30 -l $l
31 -m $m
32 -s $s
33 --tidy-reads $tidy_reads
34 --edge-min $edge_min
35 $rescue_low_cov_edges
36 &&
37 wtdbg-cns
38 -t \${GALAXY_SLOTS:-4}
39 -o dbg.ctg.lay.fa
40 -i dbg.ctg.lay
41 -j $cns.j
42 -k $cns.k
43 -Z $cns.Z
44 -W $cns.W
45 -H $cns.H
46 -L $cns.L
47 -c $cns.c
48 -M $cns.M
49 -X $cns.X
50 -I $cns.I
51 -D $cns.D
52 -E $cns.E
53 -m $cns.m
54 -S $cns.S
55
56 ]]></command>
57 <inputs>
58 <param type="data" argument="-i" format="fasta,fasta.gz" label="Long reads sequences file"/>
59 <param type="data" argument="-I" format="fasta,fasta.gz" optional="True" label="Error-free sequences file"/>
60 <param type="data" argument="--load-alignments" name="load_alignments" format="tabular" optional="True" label="Load pre-computed alignments"/>
61
62 <param argument="k" type="integer" value="0" min="0" max="25" label="Kmer fsize" />
63 <param argument="p" type="integer" value="21" min="0" max="25" label="Kmer psize" />
64 <param argument="K" type="float" value="1000" min="0" max="65535" label="Filter high frequency kmers" />
65 <param argument="E" type="integer" value="2" label="Min kmer frequency" />
66 <param argument="F" type="boolean" truevalue="-F" falsevalue="" checked="False" label="Filter low frequency kmers by a 4G-bytes array" />
67 <param argument="S" type="integer" value="4" label="Subsampling kmers, 1/S kmers are indexed" />
68 <param argument="X" type="integer" value="4" label="Max number of bin (256bp) in one gap" />
69 <param argument="Y" type="integer" value="4" label="Max number of bin (256bp) in one deviation" />
70 <param argument="x" type="integer" value="-7" label="penalty for BIN gap" />
71 <param argument="y" type="integer" value="-21" label="penalty for BIN deviation" />
72 <param argument="l" type="float" value="2048" min="1" label="Min length of alignment" />
73 <param argument="m" type="float" value="200" label="Min matched" />
74 <param argument="s" type="float" value="0.2" label="Max length variation of two aligned fragments" />
75
76 <param argument="--tidy-reads" name="tidy_reads" type="integer" value="0" label="Filter reads less than tidy-reads" />
77 <param argument="--edge-min" name="edge_min" type="integer" value="3" label="The minimal depth of a valid edge set to" />
78 <param argument="--rescue-low-cov-edges" name="rescue_low_cov_edges" type="boolean" truevalue="--rescue-low-cov-edges"
79 falsevalue="" label="Try to rescue low coverage edges" />
80
81 <section name="cns" title="Consensus options">
82 <!-- optional inputs -->
83 <!-- <param argument="-i" type="data" format="utg.cns" label="Input file(s) *.utg.cns" /> -->
84
85 <param argument="-j" type="integer" value="1000" label="Expected length of node" />
86 <param argument="-k" type="integer" value="15" label="Kmer size for long reads" />
87 <param argument="-Z" type="integer" value="4" label="Z-cutoff, drop the lower" />
88 <param argument="-W" type="integer" value="48" label="W-cutoff, drop the lagger (position)" />
89 <param argument="-H" type="integer" value="1" label="High coverage bonus" />
90 <param argument="-L" type="integer" value="10" label="High coverage cutoff" />
91 <param argument="-c" type="select" label="Candidate strategy">
92 <option value="0" selected="true">best-kmers</option>
93 <option value="1" >median length</option>
94 <option value="2" >first (include)</option>
95 <option value="3" >first (exclude)</option>
96 <option value="4" >longest</option>
97 <option value="5" >shortest</option>
98 </param>
99
100 <param argument="-M" type="integer" value="2" label="Match score" />
101 <param argument="-X" type="integer" value="-7" label="Mismatch score" />
102 <param argument="-I" type="integer" value="-3" label="Insertion score" />
103 <param argument="-D" type="integer" value="-4" label="Deletion score" />
104 <param argument="-E" type="integer" value="-2" label="Gap extension score" />
105 <param argument="-m" type="select" label="Correction mode">
106 <option value="1" selected="true">DBG correction</option>
107 <option value="2" >DAG correction</option>
108 </param>
109 <param argument="-S" type="integer" value="1" label="Correct structure before error correction" />
110 </section>
111
112 </inputs>
113 <outputs>
114 <data name="output_alignments" format="fasta" label="${tool.name} alignments" from_work_dir="dbg.alignments" />
115 <data name="output_ctglay" format="txt" label="${tool.name} contigs layout" from_work_dir="dbg.ctg.lay" />
116 <data name="output_consensus" format="fasta" label="${tool.name} consensus" from_work_dir="dbg.ctg.lay.fa" />
117 </outputs>
118 <tests>
119 <test>
120 <param name="i" value="ecoli-reads.fa"/>
121 <output name="output_alignments" file="result1.alignments"/>
122 <output name="output_ctglay" file="result1.ctg.lay"/>
123 <output name="output_consensus" file="consensus_result1.fa"/>
124 </test>
125 <test>
126 <param name="i" value="ecoli-reads.fa"/>
127 <param name="tidy_reads" value="5000"/>
128 <param name="edge_min" value="2"/>
129 <param name="rescue_low_cov_edges" value="True"/>
130 <output name="output_consensus" file="consensus_result2.fa"/>
131 </test>
132 <test>
133 <param name="i" value="ecoli-reads.fa"/>
134 <param name="cns.c" value="1"/>
135 <param name="cns.E" value="-3"/>
136 <param name="cns.j" value="500"/>
137 <param name="cns.m" value="2"/>
138 <param name="cns.k" value="5"/>
139 <output name="output_consensus" file="consensus_result3.fa"/>
140 </test>
141
142 </tests>
143
144 <help><![CDATA[
145 **What it does**
146
147 WTDBG is a de novo assembler for long noisy sequences, based on fuzzy Bruijn graphs (FBG).
148
149 **Alignment**
150
151 KBM (Kmer-BIN-Mapping) groups k-mers from each non-overlapped sliding 256 bp fragments in long reads into bins.
152 Bins of which most k-mers are high frequency, are filtered as highly repetitive ones.
153 Then, KBM searches synteny of matched bin pairs in sequences in a dynamic programming way.
154 A matched bin pair in two sequences is defined as two bins different by original but share a set of k-mers.
155 The result of alignments in KBM have the same features of traditional sequence alignment, excepting the unit of
156 KBM alignments is 256 bp bin instead of single base.
157
158 **Assembly**
159
160 FBG (Fuzzy Bruijn Graph) is composed of vertices in length of 1024 bp from reads, and edges connecting vertices
161 in their order on read paths. Comparing with DBG, the size of vertices in FBG are much bigger, thus won't be
162 sensitive to small repeat. To tolerate high sequencing errors, FBG's vertices are found using gapped
163 sequence alignments from KBM or other aligners, comparing with searching identical k-mers in DBG.
164
165 ]]></help>
166 <expand macro="citations" />
167 </tool>