Mercurial > repos > bgruening > flye
comparison flye.xml @ 9:276f5d8712d5 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flye commit 7c956f5b897dc366b2e5b7e37a2fea4b51a814f3"
author | bgruening |
---|---|
date | Tue, 23 Nov 2021 08:35:32 +0000 |
parents | e27815e82dd4 |
children | cb8dfd28c16f |
comparison
equal
deleted
inserted
replaced
8:e27815e82dd4 | 9:276f5d8712d5 |
---|---|
18 #set $ext = 'fasta' | 18 #set $ext = 'fasta' |
19 #end if | 19 #end if |
20 ln -s '$input' ./input_${counter}.${ext} && | 20 ln -s '$input' ./input_${counter}.${ext} && |
21 #end for | 21 #end for |
22 flye | 22 flye |
23 $mode | 23 $mode_conditional.mode |
24 #for $counter, $input in enumerate($inputs): | 24 #for $counter, $input in enumerate($inputs): |
25 ./input_${counter}.$ext | 25 ./input_${counter}.$ext |
26 #end for | 26 #end for |
27 -o out_dir | 27 -o out_dir |
28 -t \${GALAXY_SLOTS:-4} | 28 -t \${GALAXY_SLOTS:-4} |
29 -i $iterations | 29 -i $iterations |
30 #if $hifi_error: | 30 #if $mode_conditional.mode == '--pacbio-hifi' and $mode_conditional.hifi_error: |
31 --hifi-error $hifi_error | 31 --hifi-error $mode_conditional.hifi_error |
32 #end if | 32 #end if |
33 #if $min_overlap: | 33 #if $min_overlap: |
34 -m $min_overlap | 34 -m $min_overlap |
35 #end if | 35 #end if |
36 #if $asm.asm_select == 'true': | 36 #if $asm.asm_select == 'true': |
37 --asm-coverage $asm.asm_coverage | 37 --asm-coverage $asm.asm_coverage |
38 -g '${asm.genome_size}' | 38 -g '${asm.genome_size}' |
39 #end if | 39 #end if |
40 $plasmids | |
41 $meta | 40 $meta |
42 $trestle | 41 $scaffold |
43 ]]></command> | 42 ]]></command> |
44 <inputs> | 43 <inputs> |
45 <param name="inputs" type="data" format="fasta,fasta.gz,fastq,fastq.gz,fastqsanger.gz,fastqsanger" multiple="true" label="Input reads" /> | 44 <param name="inputs" type="data" format="fasta,fasta.gz,fastq,fastq.gz,fastqsanger.gz,fastqsanger" multiple="true" label="Input reads" /> |
46 <param name="mode" type="select" label="Mode"> | 45 <conditional name="mode_conditional"> |
47 <option value="--nano-raw">Nanopore raw</option> | 46 <param name="mode" type="select" label="Mode"> |
48 <option value="--nano-corr">Nanopore corrected</option> | 47 <option value="--nano-raw">Nanopore raw (--nano-raw)</option> |
49 <option value="--pacbio-hifi">PacBio HiFi</option> | 48 <option value="--nano-corr">Nanopore corrected (--nano-corr)</option> |
50 <option value="--pacbio-raw">PacBio raw</option> | 49 <option value="--nano-hq">Nanopore HQ (--nano-hq)</option> |
51 <option value="--pacbio-corr">PacBio corrected</option> | 50 <option value="--pacbio-raw">PacBio raw (--pacbio-raw)</option> |
52 <option value="--subassemblies">High-quality contig-like input</option> | 51 <option value="--pacbio-corr">PacBio corrected (--pacbio-corr)</option> |
53 </param> | 52 <option value="--pacbio-hifi">PacBio HiFi (--pacbio-hifi)</option> |
54 <param argument="--iterations" type="integer" value="0" label="Number of polishing iterations" | 53 </param> |
54 <when value="--nano-raw"/> | |
55 <when value="--nano-corr"/> | |
56 <when value="--nano-hq"/> | |
57 <when value="--pacbio-raw"/> | |
58 <when value="--pacbio-corr"/> | |
59 <when value="--pacbio-hifi"> | |
60 <param argument="--hifi-error" type="float" min="0" max="1" optional="true" label="Expected HiFi reads error rate" help="Default: 0.01"/> | |
61 </when> | |
62 </conditional> | |
63 <param argument="--iterations" type="integer" value="1" label="Number of polishing iterations" | |
55 help="Polishing is performed as the final assembly stage. By default, Flye runs one polishing iteration. Additional iterations | 64 help="Polishing is performed as the final assembly stage. By default, Flye runs one polishing iteration. Additional iterations |
56 might correct a small number of extra errors (due to improvements on how reads may align to the corrected assembly). If the | 65 might correct a small number of extra errors (due to improvements on how reads may align to the corrected assembly). If the |
57 parameter is set to 0, the polishing is not performed."/> | 66 parameter is set to 0, the polishing is not performed"/> |
58 <param argument="--min-overlap" type="integer" optional="true" label="Minimum overlap between reads" | 67 <param argument="--min-overlap" type="integer" min="1000" max="10000" optional="true" label="Minimum overlap between reads" |
59 help="This sets a minimum overlap length for two reads to be considered overlapping. By default it is chosen | 68 help="This sets a minimum overlap length for two reads to be considered overlapping. By default it is chosen |
60 automatically based on the read length distribution (reads N90) and does not require manual setting. Typical | 69 automatically based on the read length distribution (reads N90) and does not require manual setting. Typical |
61 value is 3k-5k (and down to 1k for datasets with shorter read length). Intuitively, we want to set this | 70 value is 3k-5k (and down to 1k for datasets with shorter read length). Intuitively, we want to set this |
62 parameter as high as possible, so the repeat graph is less tangled. However, higher values might lead to assembly gaps. | 71 parameter as high as possible, so the repeat graph is less tangled. However, higher values might lead to assembly gaps. |
63 In some rare cases it makes sense to manually increase minimum overlap for assemblies of big genomes with long reads and high coverage." /> | 72 In some rare cases it makes sense to manually increase minimum overlap for assemblies of big genomes with long reads and high coverage." /> |
64 <param argument="--hifi-error" type="float" min="0" max="1" optional="true" label="Expected HiFi reads error rate" help="Default: 0.01"/> | |
65 <param argument="--plasmids" type="boolean" truevalue="--plasmids" falsevalue="" checked="False" label="Rescue short unassembled plasmids" /> | |
66 <param argument="--keep-haplotypes" type="boolean" truevalue="--keep-haplotypes" falsevalue="" checked="False" label="Keep haplotypes" | 73 <param argument="--keep-haplotypes" type="boolean" truevalue="--keep-haplotypes" falsevalue="" checked="False" label="Keep haplotypes" |
67 help="By default, Flye collapses graph structures caused by alternative haplotypes (bubbles, superbubbles, roundabouts) to produce longer | 74 help="By default, Flye collapses graph structures caused by alternative haplotypes (bubbles, superbubbles, roundabouts) to produce longer |
68 consensus contigs. This option retains the alternative paths on the graph, producing less contigouos, but more detailed assembly."/> | 75 consensus contigs. This option retains the alternative paths on the graph, producing less contigouos, but more detailed assembly."/> |
69 <param argument="--trestle" type="boolean" truevalue="--trestle" falsevalue="" | 76 <param argument="--scaffold" type="boolean" truevalue="--scaffold" falsevalue="" label="Enable scaffolding using graph" |
70 checked="False" label="Enable Trestle" | 77 help="Starting from the version 2.9 Flye does not perform scaffolding by default, which guarantees that all assembled sequences do not have any gaps" /> |
71 help="Trestle is an extra module that resolves simple repeats of multipicity 2 that were not bridged by reads. Depending on the datasets, it might | |
72 resolve a few extra repeats, which is helpful for small (bacterial genomes). On large genomes, the contiguity improvements are usually minimal, | |
73 but the computation might take a lot of time" /> | |
74 <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="False" label="Perform metagenomic assembly" | 78 <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="False" label="Perform metagenomic assembly" |
75 help="It is designed for highly non-uniform coverage and is sensitive to underrepresented sequence at low coverage (as low as 2x). | 79 help="It is designed for highly non-uniform coverage and is sensitive to underrepresented sequence at low coverage (as low as 2x). |
76 In some examples of simple metagenomes, we observed that the normal mode assembled more contigious bacterial | 80 In some examples of simple metagenomes, we observed that the normal mode assembled more contigious bacterial |
77 consensus sequence, while the metagenome mode was slightly more fragmented, but revealed strain mixtures"/> | 81 consensus sequence, while the metagenome mode was slightly more fragmented, but revealed strain mixtures"/> |
78 <conditional name="asm"> | 82 <conditional name="asm"> |
79 <param name="asm_select" type="select" label="Reduced contig assembly coverage"> | 83 <param name="asm_select" type="select" label="Reduced contig assembly coverage" help="Typically, assemblies of large genomes at high coverage require a hundreds of RAM. For high coverage assemblies, |
84 you can reduce memory usage by using only a subset of longest reads for initial contig extension stage (usually, the memory bottleneck)"> | |
80 <option value="true">Enable reduced coverage for initial disjointing assembly</option> | 85 <option value="true">Enable reduced coverage for initial disjointing assembly</option> |
81 <option value="false" selected="true">Disable reduced coverage for initial disjointing assembly</option> | 86 <option value="false" selected="true">Disable reduced coverage for initial disjointing assembly</option> |
82 </param> | 87 </param> |
83 <when value="true"> | 88 <when value="true"> |
84 <param argument="--asm-coverage" type="integer" min="0" value="30" | 89 <param argument="--asm-coverage" type="integer" min="0" value="30" |
85 label="Reduced coverage for initial disjointing assembly" | 90 label="Reduced coverage for initial disjointing assembly" |
86 help="Typically, assemblies of large genomes at high coverage require a hundreds of RAM. For high coverage assemblies, | 91 help="This parameter specifies the target coverage of the longest reads. For a typical assembly, 30x is enough to produce good |
87 you can reduce memory usage by using only a subset of longest reads for initial contig extension stage (usually, the memory bottleneck). | |
88 The parameter --asm-coverage specifies the target coverage of the longest reads. For a typical assembly, 30x is enough to produce good | |
89 initial contigs. Regardless of this parameter, all reads will be used at the later pipeline stages."/> | 92 initial contigs. Regardless of this parameter, all reads will be used at the later pipeline stages."/> |
90 <param argument="--genome-size" type="text" optional="true" label="Estimated genome size" | 93 <param argument="--genome-size" type="text" optional="true" label="Estimated genome size" |
91 help="For example, 5m or 2.6g. No longer required as input. However, it must be used in conjunction with --asm-coverage option."> | 94 help="For example, 5m or 2.6g. No longer required as input. However, it must be used in conjunction with --asm-coverage option."> |
92 <validator type="regex" message="Genome size must be a float or integer, optionally followed by the a unit prefix (kmg)">^([0-9]*[.])?[0-9]+[kmg]?$</validator> | 95 <validator type="regex" message="Genome size must be a float or integer, optionally followed by the a unit prefix (kmg)">^([0-9]*[.])?[0-9]+[kmg]?$</validator> |
93 </param> | 96 </param> |
104 <data name="flye_log" format="txt" from_work_dir="out_dir/flye.log" label="${tool.name} on ${on_string}: log"> | 107 <data name="flye_log" format="txt" from_work_dir="out_dir/flye.log" label="${tool.name} on ${on_string}: log"> |
105 <filter>generate_log</filter> | 108 <filter>generate_log</filter> |
106 </data> | 109 </data> |
107 </outputs> | 110 </outputs> |
108 <tests> | 111 <tests> |
109 <!--Test 01--> | 112 <!--Test 01: pacbio-raw--> |
110 <test expect_num_outputs="5"> | 113 <test expect_num_outputs="5"> |
111 <param name="inputs" ftype="fastq.gz" value="ecoli_01.fastq.gz,ecoli_02.fastq.gz,ecoli_03.fastq.gz,ecoli_04.fastq.gz,ecoli_05.fastq.gz,ecoli_06.fastq.gz,ecoli_07.fastq.gz"/> | 114 <param name="inputs" ftype="fastq.gz" value="ecoli_01.fastq.gz,ecoli_02.fastq.gz,ecoli_03.fastq.gz,ecoli_04.fastq.gz,ecoli_05.fastq.gz,ecoli_06.fastq.gz,ecoli_07.fastq.gz"/> |
112 <param name="mode" value="--pacbio-raw"/> | 115 <param name="mode" value="--pacbio-raw"/> |
116 <param name="iterations" value="0"/> | |
113 <param name="generate_log" value="true"/> | 117 <param name="generate_log" value="true"/> |
114 <output name="assembly_info" file="result1_assembly_info.txt" ftype="tabular" compare="sim_size"/> | 118 <output name="assembly_info" file="result1_assembly_info.txt" ftype="tabular" compare="sim_size"/> |
115 <output name="assembly_graph" file="result1_assembly_graph.dot" ftype="graph_dot" compare="sim_size"/> | 119 <output name="assembly_graph" file="result1_assembly_graph.dot" ftype="graph_dot" compare="sim_size"/> |
116 <output name="assembly_gfa" file="result1_assembly_graph.gfa" ftype="txt" compare="sim_size"/> | 120 <output name="assembly_gfa" file="result1_assembly_graph.gfa" ftype="txt" compare="sim_size"/> |
117 <output name="consensus" file="result1_assembly.fasta" ftype="fasta" compare="sim_size"/> | 121 <output name="consensus" file="result1_assembly.fasta" ftype="fasta" compare="sim_size"/> |
118 <output name="flye_log" file="result1.log" ftype="txt" compare="sim_size"/> | 122 <output name="flye_log" file="result1.log" ftype="txt" compare="sim_size"/> |
119 </test> | 123 </test> |
120 <!--Test 02--> | 124 <!--Test 02: nano raw--> |
121 <test expect_num_outputs="4"> | 125 <test expect_num_outputs="4"> |
122 <param name="inputs" ftype="fasta.gz" value="nanopore.fasta.gz"/> | 126 <param name="inputs" ftype="fasta.gz" value="nanopore.fasta.gz"/> |
123 <param name="mode" value="--nano-raw"/> | 127 <param name="mode" value="--nano-raw"/> |
128 <param name="iterations" value="0"/> | |
124 <output name="assembly_info" ftype="tabular"> | 129 <output name="assembly_info" ftype="tabular"> |
125 <assert_contents> | 130 <assert_contents> |
126 <has_size value="95" delta="100"/> | 131 <has_size value="95" delta="100"/> |
127 </assert_contents> | 132 </assert_contents> |
128 </output> | 133 </output> |
140 <assert_contents> | 145 <assert_contents> |
141 <has_size value="35573" delta="100"/> | 146 <has_size value="35573" delta="100"/> |
142 </assert_contents> | 147 </assert_contents> |
143 </output> | 148 </output> |
144 </test> | 149 </test> |
145 <!--Test 03--> | 150 <!--Test 03: reduce coverage--> |
146 <test expect_num_outputs="4"> | 151 <test expect_num_outputs="4"> |
147 <param name="inputs" ftype="fastq.gz" value="ecoli_hifi_01.fastq.gz,ecoli_hifi_02.fastq.gz,ecoli_hifi_03.fastq.gz,ecoli_hifi_04.fastq.gz,ecoli_hifi_05.fastq.gz,ecoli_hifi_06.fastq.gz,ecoli_hifi_07.fastq.gz,ecoli_hifi_08.fastq.gz,ecoli_hifi_09.fastq.gz"/> | 152 <param name="inputs" ftype="fastq.gz" value="ecoli_hifi_01.fastq.gz,ecoli_hifi_02.fastq.gz,ecoli_hifi_03.fastq.gz,ecoli_hifi_04.fastq.gz,ecoli_hifi_05.fastq.gz,ecoli_hifi_06.fastq.gz,ecoli_hifi_07.fastq.gz,ecoli_hifi_08.fastq.gz,ecoli_hifi_09.fastq.gz"/> |
148 <param name="mode" value="--pacbio-hifi"/> | 153 <conditional name="mode_conditional"> |
149 <param name="iterations" value="1"/> | 154 <param name="mode" value="--nano-raw"/> |
155 </conditional> | |
150 <conditional name="asm"> | 156 <conditional name="asm"> |
151 <param name="asm_select" value="true" /> | 157 <param name="asm_select" value="true" /> |
152 <param name="asm" value="100"/> | 158 <param name="asm" value="30"/> |
153 <param name="genome_size" value="3980000"/> | 159 <param name="genome_size" value="3980000"/> |
154 </conditional> | 160 </conditional> |
155 <output name="assembly_info" ftype="tabular"> | 161 <output name="assembly_info" ftype="tabular"> |
156 <assert_contents> | 162 <assert_contents> |
157 <has_size value="286" delta="100"/> | 163 <has_size value="286" delta="100"/> |
158 </assert_contents> | 164 </assert_contents> |
159 </output> | 165 </output> |
160 <output name="assembly_graph" ftype="graph_dot"> | 166 <output name="assembly_graph" ftype="graph_dot"> |
161 <assert_contents> | 167 <assert_contents> |
162 <has_size value="2135" delta="100"/> | 168 <has_size value="1840" delta="100"/> |
163 </assert_contents> | 169 </assert_contents> |
164 </output> | 170 </output> |
165 <output name="assembly_gfa" ftype="txt"> | 171 <output name="assembly_gfa" ftype="txt"> |
166 <assert_contents> | 172 <assert_contents> |
167 <has_size value="114351" delta="100"/> | 173 <has_size value="420752" delta="100"/> |
168 </assert_contents> | 174 </assert_contents> |
169 </output> | 175 </output> |
170 <output name="consensus" ftype="fasta"> | 176 <output name="consensus" ftype="fasta"> |
171 <assert_contents> | 177 <assert_contents> |
172 <has_size value="116191" delta="100"/> | 178 <has_size value="427580" delta="100"/> |
173 </assert_contents> | 179 </assert_contents> |
174 </output> | 180 </output> |
175 </test> | 181 </test> |
176 <!--Test 04--> | 182 <!--Test 04: metagenomic mode--> |
177 <test expect_num_outputs="4"> | 183 <test expect_num_outputs="4"> |
178 <param name="inputs" ftype="fastq.gz" value="ecoli_01.fastq.gz,ecoli_02.fastq.gz,ecoli_03.fastq.gz,ecoli_04.fastq.gz,ecoli_05.fastq.gz,ecoli_06.fastq.gz,ecoli_07.fastq.gz"/> | 184 <param name="inputs" ftype="fastq.gz" value="ecoli_01.fastq.gz,ecoli_02.fastq.gz,ecoli_03.fastq.gz,ecoli_04.fastq.gz,ecoli_05.fastq.gz,ecoli_06.fastq.gz,ecoli_07.fastq.gz"/> |
179 <param name="mode" value="--pacbio-raw"/> | 185 <conditional name="mode_conditional"> |
180 <param name="iterations" value="1"/> | 186 <param name="mode" value="--pacbio-raw"/> |
187 </conditional> | |
181 <param name="meta" value="true"/> | 188 <param name="meta" value="true"/> |
182 <param name="plasmids" value="true"/> | |
183 <output name="assembly_info" ftype="tabular"> | 189 <output name="assembly_info" ftype="tabular"> |
184 <assert_contents> | 190 <assert_contents> |
185 <has_size value="95" delta="100"/> | 191 <has_size value="95" delta="100"/> |
186 </assert_contents> | 192 </assert_contents> |
187 </output> | 193 </output> |
190 <has_size value="367" delta="100"/> | 196 <has_size value="367" delta="100"/> |
191 </assert_contents> | 197 </assert_contents> |
192 </output> | 198 </output> |
193 <output name="assembly_gfa" ftype="txt"> | 199 <output name="assembly_gfa" ftype="txt"> |
194 <assert_contents> | 200 <assert_contents> |
195 <has_size value="418051" delta="100"/> | 201 <has_size value="418729" delta="100"/> |
196 </assert_contents> | 202 </assert_contents> |
197 </output> | 203 </output> |
198 <output name="consensus" ftype="fasta"> | 204 <output name="consensus" ftype="fasta"> |
199 <assert_contents> | 205 <assert_contents> |
200 <has_size value="425000" delta="100"/> | 206 <has_size value="425667" delta="100"/> |
201 </assert_contents> | 207 </assert_contents> |
202 </output> | 208 </output> |
203 </test> | 209 </test> |
204 <!--Test 05--> | 210 <!--Test 05: nanopore HQ mode--> |
205 <test expect_num_outputs="4"> | 211 <test expect_num_outputs="4"> |
206 <param name="inputs" ftype="fastq.gz" value="ecoli_hifi_01.fastq.gz,ecoli_hifi_02.fastq.gz,ecoli_hifi_03.fastq.gz,ecoli_hifi_04.fastq.gz,ecoli_hifi_05.fastq.gz,ecoli_hifi_06.fastq.gz,ecoli_hifi_07.fastq.gz,ecoli_hifi_08.fastq.gz,ecoli_hifi_09.fastq.gz"/> | 212 <param name="inputs" ftype="fastq.gz" value="ecoli_hifi_01.fastq.gz,ecoli_hifi_02.fastq.gz,ecoli_hifi_03.fastq.gz,ecoli_hifi_04.fastq.gz,ecoli_hifi_05.fastq.gz,ecoli_hifi_06.fastq.gz,ecoli_hifi_07.fastq.gz,ecoli_hifi_08.fastq.gz,ecoli_hifi_09.fastq.gz"/> |
207 <param name="mode" value="--pacbio-hifi"/> | 213 <conditional name="mode_conditional"> |
208 <param name="iterations" value="1"/> | 214 <param name="mode" value="--nano-hq"/> |
215 </conditional> | |
216 <param name="min_overlap" value="1000"/> | |
209 <output name="assembly_info" ftype="tabular"> | 217 <output name="assembly_info" ftype="tabular"> |
210 <assert_contents> | 218 <assert_contents> |
211 <has_size value="286" delta="100"/> | 219 <has_size value="286" delta="100"/> |
212 </assert_contents> | 220 </assert_contents> |
213 </output> | 221 </output> |
214 <output name="assembly_graph" ftype="graph_dot"> | 222 <output name="assembly_graph" ftype="graph_dot"> |
215 <assert_contents> | 223 <assert_contents> |
216 <has_size value="2135" delta="100"/> | 224 <has_size value="1248" delta="100"/> |
217 </assert_contents> | 225 </assert_contents> |
218 </output> | 226 </output> |
219 <output name="assembly_gfa" ftype="txt"> | 227 <output name="assembly_gfa" ftype="txt"> |
220 <assert_contents> | 228 <assert_contents> |
221 <has_size value="114351" delta="100"/> | 229 <has_size value="420252" delta="100"/> |
222 </assert_contents> | 230 </assert_contents> |
223 </output> | 231 </output> |
224 <output name="consensus" ftype="fasta"> | 232 <output name="consensus" ftype="fasta"> |
225 <assert_contents> | 233 <assert_contents> |
226 <has_size value="116191" delta="100"/> | 234 <has_size value="427129" delta="100"/> |
227 </assert_contents> | 235 </assert_contents> |
228 </output> | 236 </output> |
229 </test> | 237 </test> |
230 <!--Test 06--> | 238 <!--Test 06: hifi error option--> |
231 <test expect_num_outputs="4"> | 239 <test expect_num_outputs="4"> |
232 <param name="inputs" ftype="fastq.gz" value="ecoli_hifi_01.fastq.gz,ecoli_hifi_02.fastq.gz,ecoli_hifi_03.fastq.gz,ecoli_hifi_04.fastq.gz,ecoli_hifi_05.fastq.gz,ecoli_hifi_06.fastq.gz,ecoli_hifi_07.fastq.gz,ecoli_hifi_08.fastq.gz,ecoli_hifi_09.fastq.gz"/> | 240 <param name="inputs" ftype="fastq.gz" value="ecoli_hifi_01.fastq.gz,ecoli_hifi_02.fastq.gz,ecoli_hifi_03.fastq.gz,ecoli_hifi_04.fastq.gz,ecoli_hifi_05.fastq.gz,ecoli_hifi_06.fastq.gz,ecoli_hifi_07.fastq.gz,ecoli_hifi_08.fastq.gz,ecoli_hifi_09.fastq.gz"/> |
233 <param name="mode" value="--pacbio-hifi"/> | 241 <conditional name="mode_conditional"> |
234 <param name="iterations" value="1"/> | 242 <param name="mode" value="--pacbio-hifi"/> |
235 <param name="hifi-error" value="0.02"/> | 243 <param name="hifi_error" value="0.21"/> |
244 </conditional> | |
245 <param name="min_overlap" value="1000"/> | |
236 <output name="assembly_info" ftype="tabular"> | 246 <output name="assembly_info" ftype="tabular"> |
237 <assert_contents> | 247 <assert_contents> |
238 <has_size value="286" delta="100"/> | 248 <has_size value="286" delta="100"/> |
239 </assert_contents> | 249 </assert_contents> |
240 </output> | 250 </output> |
241 <output name="assembly_graph" ftype="graph_dot"> | 251 <output name="assembly_graph" ftype="graph_dot"> |
242 <assert_contents> | 252 <assert_contents> |
243 <has_size value="2135" delta="100"/> | 253 <has_size value="1273" delta="100"/> |
244 </assert_contents> | 254 </assert_contents> |
245 </output> | 255 </output> |
246 <output name="assembly_gfa" ftype="txt"> | 256 <output name="assembly_gfa" ftype="txt"> |
247 <assert_contents> | 257 <assert_contents> |
248 <has_size value="114351" delta="100"/> | 258 <has_size value="420252" delta="100"/> |
249 </assert_contents> | 259 </assert_contents> |
250 </output> | 260 </output> |
251 <output name="consensus" ftype="fasta"> | 261 <output name="consensus" ftype="fasta"> |
252 <assert_contents> | 262 <assert_contents> |
253 <has_size value="116191" delta="100"/> | 263 <has_size value="427129" delta="100"/> |
254 </assert_contents> | 264 </assert_contents> |
255 </output> | 265 </output> |
256 </test> | 266 </test> |
257 <!--Test 07--> | 267 <!--Test 07: keep haplotypes--> |
258 <test expect_num_outputs="4"> | 268 <test expect_num_outputs="4"> |
259 <param name="inputs" ftype="fastq.gz" value="ecoli_hifi_01.fastq.gz,ecoli_hifi_02.fastq.gz,ecoli_hifi_03.fastq.gz,ecoli_hifi_04.fastq.gz,ecoli_hifi_05.fastq.gz,ecoli_hifi_06.fastq.gz,ecoli_hifi_07.fastq.gz,ecoli_hifi_08.fastq.gz,ecoli_hifi_09.fastq.gz"/> | 269 <param name="inputs" ftype="fastq.gz" value="ecoli_hifi_01.fastq.gz,ecoli_hifi_02.fastq.gz,ecoli_hifi_03.fastq.gz,ecoli_hifi_04.fastq.gz,ecoli_hifi_05.fastq.gz,ecoli_hifi_06.fastq.gz,ecoli_hifi_07.fastq.gz,ecoli_hifi_08.fastq.gz,ecoli_hifi_09.fastq.gz"/> |
260 <param name="mode" value="--pacbio-hifi"/> | 270 <conditional name="mode_conditional"> |
261 <param name="iterations" value="1"/> | 271 <param name="mode" value="--pacbio-corr"/> |
272 <param name="hifi_error" value="0.21"/> | |
273 </conditional> | |
274 <param name="min_overlap" value="1000"/> | |
262 <param name="keep-haplotypes" value="true"/> | 275 <param name="keep-haplotypes" value="true"/> |
263 <output name="assembly_info" ftype="tabular"> | 276 <output name="assembly_info" ftype="tabular"> |
264 <assert_contents> | 277 <assert_contents> |
265 <has_size value="286" delta="100"/> | 278 <has_size value="286" delta="100"/> |
266 </assert_contents> | 279 </assert_contents> |
267 </output> | 280 </output> |
268 <output name="assembly_graph" ftype="graph_dot"> | 281 <output name="assembly_graph" ftype="graph_dot"> |
269 <assert_contents> | 282 <assert_contents> |
270 <has_size value="2135" delta="100"/> | 283 <has_size value="1273" delta="100"/> |
271 </assert_contents> | 284 </assert_contents> |
272 </output> | 285 </output> |
273 <output name="assembly_gfa" ftype="txt"> | 286 <output name="assembly_gfa" ftype="txt"> |
274 <assert_contents> | 287 <assert_contents> |
275 <has_size value="114351" delta="100"/> | 288 <has_size value="420252" delta="100"/> |
276 </assert_contents> | 289 </assert_contents> |
277 </output> | 290 </output> |
278 <output name="consensus" ftype="fasta"> | 291 <output name="consensus" ftype="fasta"> |
279 <assert_contents> | 292 <assert_contents> |
280 <has_size value="116191" delta="100"/> | 293 <has_size value="427129" delta="100"/> |
294 </assert_contents> | |
295 </output> | |
296 </test> | |
297 <!--Test 08: scaffolding mode--> | |
298 <test expect_num_outputs="4"> | |
299 <param name="inputs" ftype="fastq.gz" value="ecoli_hifi_01.fastq.gz,ecoli_hifi_02.fastq.gz,ecoli_hifi_03.fastq.gz,ecoli_hifi_04.fastq.gz,ecoli_hifi_05.fastq.gz,ecoli_hifi_06.fastq.gz,ecoli_hifi_07.fastq.gz,ecoli_hifi_08.fastq.gz,ecoli_hifi_09.fastq.gz"/> | |
300 <param name="mode" value="--nano-hq"/> | |
301 <param name="min_overlap" value="1000"/> | |
302 <param name="scaffolding" value="true"/> | |
303 <output name="assembly_info" ftype="tabular"> | |
304 <assert_contents> | |
305 <has_size value="286" delta="100"/> | |
306 </assert_contents> | |
307 </output> | |
308 <output name="assembly_graph" ftype="graph_dot"> | |
309 <assert_contents> | |
310 <has_size value="1248" delta="100"/> | |
311 </assert_contents> | |
312 </output> | |
313 <output name="assembly_gfa" ftype="txt"> | |
314 <assert_contents> | |
315 <has_size value="420252" delta="100"/> | |
316 </assert_contents> | |
317 </output> | |
318 <output name="consensus" ftype="fasta"> | |
319 <assert_contents> | |
320 <has_size value="427129" delta="100"/> | |
281 </assert_contents> | 321 </assert_contents> |
282 </output> | 322 </output> |
283 </test> | 323 </test> |
284 </tests> | 324 </tests> |
285 <help><![CDATA[ | 325 <help><![CDATA[ |
299 | 339 |
300 **Quick usage** | 340 **Quick usage** |
301 | 341 |
302 Input reads can be in FASTA or FASTQ format, uncompressed or compressed with gz. Currently, PacBio (raw, corrected, HiFi) and ONT reads | 342 Input reads can be in FASTA or FASTQ format, uncompressed or compressed with gz. Currently, PacBio (raw, corrected, HiFi) and ONT reads |
303 (raw, corrected) are supported. Expected error rates are <30% for raw, <3% for corrected, and <1% for HiFi. Note that Flye was primarily | 343 (raw, corrected) are supported. Expected error rates are <30% for raw, <3% for corrected, and <1% for HiFi. Note that Flye was primarily |
304 developed to run on raw reads. Additionally, the *--subassemblies* option performs a consensus assembly of multiple sets of high-quality | 344 developed to run on raw reads. You may specify multiple files with reads (separated by spaces). Mixing different read types is not yet supported. The *--meta* o |
305 contigs. You may specify multiple files with reads (separated by spaces). Mixing different read types is not yet supported. The *--meta* o | |
306 ption enables the mode for metagenome/uneven coverage assembly. | 345 ption enables the mode for metagenome/uneven coverage assembly. |
307 | 346 |
308 Genome size estimate is no longer a required option. You need to provide an estimate if using *--asm-coverage* option. | 347 Genome size estimate is no longer a required option. You need to provide an estimate if using *--asm-coverage* option. |
309 | 348 |
310 To reduce memory consumption for large genome assemblies, you can use a subset of the longest reads for initial disjointig assembly by | 349 To reduce memory consumption for large genome assemblies, you can use a subset of the longest reads for initial disjointig assembly by |