Mercurial > repos > bgruening > hifiasm
comparison hifiasm.xml @ 5:045c7c3d8e59 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/hifiasm commit 1e4c6c2e48f5e95beed75bb76134cbf7fa55dc8a"
author | bgruening |
---|---|
date | Wed, 06 Oct 2021 20:11:11 +0000 |
parents | 3f7be05a1597 |
children | 5bec28269d95 |
comparison
equal
deleted
inserted
replaced
4:3f7be05a1597 | 5:045c7c3d8e59 |
---|---|
1 <tool id="hifiasm" name="Hifiasm" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> | 1 <tool id="hifiasm" name="Hifiasm" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> |
2 <description>haplotype-resolved de novo assembler for PacBio Hifi reads</description> | 2 <description>haplotype-resolved de novo assembler for PacBio Hifi reads</description> |
3 <macros> | 3 <macros> |
4 <token name="@TOOL_VERSION@">0.16.1</token> | 4 <token name="@TOOL_VERSION@">0.16.1</token> |
5 <token name="@VERSION_SUFFIX@">0</token> | 5 <token name="@VERSION_SUFFIX@">1</token> |
6 <token name="@FORMATS@">fasta,fasta.gz,fastq,fastq.gz</token> | 6 <token name="@FORMATS@">fasta,fasta.gz,fastq,fastq.gz</token> |
7 <xml name="reads"> | 7 <xml name="reads"> |
8 <param name="reads" type="data" format="@FORMATS@" multiple="true" label="Input reads" /> | 8 <param name="reads" type="data" format="@FORMATS@" multiple="true" label="Input reads" /> |
9 </xml> | 9 </xml> |
10 </macros> | 10 </macros> |
17 | 17 |
18 <![CDATA[ | 18 <![CDATA[ |
19 #set $input_files = list() | 19 #set $input_files = list() |
20 #set $hap1_inputs = list() | 20 #set $hap1_inputs = list() |
21 #set $hap2_inputs = list() | 21 #set $hap2_inputs = list() |
22 #set $hic1_inputs = list() | |
23 #set $hic2_inputs = list() | |
22 #for idx, read in enumerate($mode.reads): | 24 #for idx, read in enumerate($mode.reads): |
23 #set $inputfile = 'input_%d.%s' % ($idx, $read.dataset.extension) | 25 #set $inputfile = 'input_%d.%s' % ($idx, $read.dataset.extension) |
24 ln -s '$read' $inputfile && | 26 ln -s '$read' $inputfile && |
25 $input_files.append($inputfile) | 27 $input_files.append($inputfile) |
26 #end for | 28 #end for |
27 #set $input_filenames = ' '.join($input_files) | 29 #set $input_filenames = ' '.join($input_files) |
30 #if str($hic_partition.hic_partition_selector) == 'set' | |
31 mkdir HiCF HiCR && | |
32 #for idx, read in enumerate($hic_partition.h1): | |
33 #set $inputfile = './HiCF/input_%d.%s' % ($idx, $read.dataset.extension) | |
34 ln -s '$read' $inputfile && | |
35 $hic1_inputs.append($inputfile) | |
36 #end for | |
37 #for idx, read in enumerate($hic_partition.h2): | |
38 #set $inputfile = './HiCR/input_%d.%s' % ($idx, $read.dataset.extension) | |
39 ln -s '$read' $inputfile && | |
40 $hic2_inputs.append($inputfile) | |
41 #end for | |
42 #end if | |
28 #if str($mode.mode_selector) == 'trio': | 43 #if str($mode.mode_selector) == 'trio': |
29 #for idx, read in enumerate($mode.hap1_reads): | 44 #for idx, read in enumerate($mode.hap1_reads): |
30 #set $inputfile = 'hap1_input_%d.%s' % ($idx, $read.dataset.extension) | 45 #set $inputfile = 'hap1_input_%d.%s' % ($idx, $read.dataset.extension) |
31 ln -s '$read' $inputfile && | 46 ln -s '$read' $inputfile && |
32 $hap1_inputs.append($inputfile) | 47 $hap1_inputs.append($inputfile) |
91 --n-hap $purge_options.n_hap | 106 --n-hap $purge_options.n_hap |
92 #end if | 107 #end if |
93 #end if | 108 #end if |
94 | 109 |
95 #if str($hic_partition.hic_partition_selector) == 'set': | 110 #if str($hic_partition.hic_partition_selector) == 'set': |
96 --h1 '${ ' '.join(["%s" % $x for $x in $hic_partition.h1]) }' | 111 --h1 '${ ' '.join(["%s" % $x for $x in $hic1_inputs]) }' |
97 --h2 '${ ' '.join(["%s" % $x for $x in $hic_partition.h2]) }' | 112 --h2 '${ ' '.join(["%s" % $x for $x in $hic2_inputs]) }' |
98 #if $hic_partition.seed: | 113 #if $hic_partition.seed: |
99 --seed $hic_partition.seed | 114 --seed $hic_partition.seed |
100 #end if | 115 #end if |
101 #if $hic_partition.n_weight: | 116 #if $hic_partition.n_weight: |
102 --n-weight $hic_partition.n_weight | 117 --n-weight $hic_partition.n_weight |
134 <param name="max_kmers" argument="-c" type="integer" value="2" label="Lower bound of the binned k-mer's frequency" /> | 149 <param name="max_kmers" argument="-c" type="integer" value="2" label="Lower bound of the binned k-mer's frequency" /> |
135 <param name="min_kmers" argument="-d" type="integer" value="5" label="Upper bound of the binned k-mer's frequency" /> | 150 <param name="min_kmers" argument="-d" type="integer" value="5" label="Upper bound of the binned k-mer's frequency" /> |
136 <param name="yak_kmer_length" type="integer" min="0" max="64" value="31" label="Yak counter k-mer length" /> | 151 <param name="yak_kmer_length" type="integer" min="0" max="64" value="31" label="Yak counter k-mer length" /> |
137 </when> | 152 </when> |
138 </conditional> | 153 </conditional> |
139 <param name="filter_bits" argument="-f" type="integer" min="0" value="37" label="Bits for bloom filter" help="A value of 0 disables the bloom filter" /> | 154 <param name="filter_bits" argument="-f" type="integer" min="0" value="37" label="Bits for bloom filter" help="A value of 0 disables the bloom filter" /> |
155 <conditional name="assembly_options"> | |
156 <param name="assembly_selector" type="select" label="Assembly options"> | |
157 <option value="blank">Leave default</option> | |
158 <option value="set">Specify</option> | |
159 </param> | |
160 <when value="blank" /> | |
161 <when value="set"> | |
162 <param name="cleaning_rounds" argument="-a" type="integer" value="4" label="Cleaning rounds" /> | |
163 <param name="adapter_length" argument="-z" type="integer" min="0" value="0" label="Length of adapters to be removed" /> | |
164 <param name="pop_contigs" argument="-m" type="integer" value="10000000" label="Minimum contig bubble size" help="Pop contig graph bubbles smaller than this value" /> | |
165 <param name="pop_unitigs" argument="-p" type="integer" value="100000" label="Minimum unitig bubble size" help="Pop unitig graph bubbles smaller than this value" /> | |
166 <param name="remove_tips" argument="-n" type="integer" value="3" label="Tip unitigs" help="Keep only tip unitigs with a number of reads greater than or equal to this value" /> | |
167 <param name="max_overlap" argument="-x" type="float" min="0" max="1" value="0.8" label="Maximum overlap drop ratio" help="This option is used with -r. Given a node N in the assembly graph, let max(N) be the length of the largest overlap of N. Hifiasm iteratively drops overlaps of N if their length/max(N) are below a threshold controlled by -x. Hifiasm applies -r rounds of short overlap removal with an increasing threshold between -x and -y"/> | |
168 <param name="min_overlap" argument="-y" type="float" min="0" max="1" value="0.2" label="Minimum overlap drop ratio" help="This option is used with -r. Given a node N in the assembly graph, let max(N) be the length of the largest overlap of N. Hifiasm iteratively drops overlaps of N if their length/max(N) are over a threshold controlled by -y. Hifiasm applies -r rounds of short overlap removal with an increasing threshold between -x and -y"/> | |
169 <param name="disable_post_join" argument="-u" type="boolean" truevalue="-u" falsevalue="" label="Skip post join contigs step" help="May improve N50" /> | |
170 <param name="ignore_error_corrected" argument="-i" type="boolean" truevalue="-i" falsevalue="" value="False" label="Ignore error corrected reads and overlaps" help="Ignore error corrected reads and overlaps saved in prefix.*.bin files. Apart from assembly graphs, hifiasm also outputs three binary files that save alloverlap information during assembly step. With these files, hifiasm can avoid the time-consuming all-to-all overlap calculation step, and do the assembly directly and quickly. This might be helpful when users want to get an optimized assembly by multiple rounds of experiments with different parameters." /> | |
171 <param argument="--hom-cov" type="integer" optional="True" value="" label="Homozygous read coverage" /> | |
172 </when> | |
173 </conditional> | |
174 <conditional name="purge_options"> | |
175 <param name="purge_selector" type="select" label="Options for purging duplicates"> | |
176 <option value="blank">Leave default</option> | |
177 <option value="set">Specify</option> | |
178 </param> | |
179 <when value="blank" /> | |
180 <when value="set"> | |
181 <param name="purge_level" argument="-l" type="select" label="Purge level"> | |
182 <option value="0" selected="true">None (0)</option> | |
183 <option value="1">Light (1)</option> | |
184 <option value="2">Aggressive (2)</option> | |
185 <option value="3">Aggressive - high heterozygosity rate (3)</option> | |
186 </param> | |
187 <param name="similarity_threshold" argument="-s" type="float" min="0" max="1" value="0.75" label="Similarity threshold for duplicate haplotigs" /> | |
188 <param name="minimum_overlap" argument="-O" type="integer" value="1" label="Minimum overlapped reads for duplicate haplotigs" /> | |
189 <param argument="--purge-max" type="integer" optional="true" label="Coverage upper bound" help="If not set, this will be determined automatically" /> | |
190 <param argument="--n-hap" type="integer" min="0" value="" optional="true" label="Assumtion of haplotype number" help="A haplotype is defined as the combination of alleles for different polymorphisms that occur on the same chromosome." /> | |
191 </when> | |
192 </conditional> | |
193 <conditional name="hic_partition"> | |
194 <param name="hic_partition_selector" type="select" label="Options for Hi-C-partition"> | |
195 <option value="blank">Leave default</option> | |
196 <option value="set">Specify</option> | |
197 </param> | |
198 <when value="blank" /> | |
199 <when value="set"> | |
200 <param argument="--h1" type="data" format="fastq,fastq.gz" multiple="true" label="Hi-C R1 reads" /> | |
201 <param argument="--h2" type="data" format="fastq,fastq.gz" multiple="true" label="Hi-C R2 reads" /> | |
202 <param argument="--seed" type="integer" min="1" value="" optional="true" label="RNG seed" /> | |
203 <param argument="--n-weight" type="integer" min="1" value="" optional="true" label="Rounds of reweighting Hi-C links. Increasing this may improves phasing results but takes longer time" /> | |
204 <param argument="--n-perturb" type="integer" min="1" value="" optional="true" label="Rounds of perturbation. Increasing this may improves phasing results but takes longer time" /> | |
205 <param argument="--f-perturb" type="float" min="0" max="1" value="" optional="true" label="Fraction to flip for perturbation. Increasing this may improves phasing results but takes longer time" /> | |
206 <param argument="--l-msjoin" type="integer" min="0" value="500000" label="Detect misjoined unitigs of greater than or equal to specified size" help="A value of 0 disables this filter"/> | |
207 </when> | |
208 </conditional> | |
140 <conditional name="advanced_options"> | 209 <conditional name="advanced_options"> |
141 <param name="advanced_selector" type="select" label="Advanced options"> | 210 <param name="advanced_selector" type="select" label="Advanced options"> |
142 <option value="blank">Leave default</option> | 211 <option value="blank">Leave default</option> |
143 <option value="set">Specify</option> | 212 <option value="set">Specify</option> |
144 </param> | 213 </param> |
164 </sanitizer> | 233 </sanitizer> |
165 <validator type="regex">[0-9kKmMGg]+</validator> | 234 <validator type="regex">[0-9kKmMGg]+</validator> |
166 </param> | 235 </param> |
167 </when> | 236 </when> |
168 </conditional> | 237 </conditional> |
169 | |
170 <conditional name="assembly_options"> | |
171 <param name="assembly_selector" type="select" label="Assembly options"> | |
172 <option value="blank">Leave default</option> | |
173 <option value="set">Specify</option> | |
174 </param> | |
175 <when value="blank" /> | |
176 <when value="set"> | |
177 <param name="cleaning_rounds" argument="-a" type="integer" value="4" label="Cleaning rounds" /> | |
178 <param name="adapter_length" argument="-z" type="integer" min="0" value="0" label="Length of adapters to be removed" /> | |
179 <param name="pop_contigs" argument="-m" type="integer" value="10000000" label="Minimum contig bubble size" help="Pop contig graph bubbles smaller than this value" /> | |
180 <param name="pop_unitigs" argument="-p" type="integer" value="100000" label="Minimum unitig bubble size" help="Pop unitig graph bubbles smaller than this value" /> | |
181 <param name="remove_tips" argument="-n" type="integer" value="3" label="Tip unitigs" help="Keep only tip unitigs with a number of reads greater than or equal to this value" /> | |
182 <param name="max_overlap" argument="-x" type="float" min="0" max="1" value="0.8" label="Maximum overlap drop ratio" help="This option is used with -r. Given a node N in the assembly graph, let max(N) be the length of the largest overlap of N. Hifiasm iteratively drops overlaps of N if their length/max(N) are below a threshold controlled by -x. Hifiasm applies -r rounds of short overlap removal with an increasing threshold between -x and -y"/> | |
183 <param name="min_overlap" argument="-y" type="float" min="0" max="1" value="0.2" label="Minimum overlap drop ratio" help="This option is used with -r. Given a node N in the assembly graph, let max(N) be the length of the largest overlap of N. Hifiasm iteratively drops overlaps of N if their length/max(N) are over a threshold controlled by -y. Hifiasm applies -r rounds of short overlap removal with an increasing threshold between -x and -y"/> | |
184 <param name="disable_post_join" argument="-u" type="boolean" truevalue="-u" falsevalue="" label="Skip post join contigs step" help="May improve N50" /> | |
185 <param name="ignore_error_corrected" argument="-i" type="boolean" truevalue="-i" falsevalue="" value="False" label="Ignore error corrected reads and overlaps" help="Ignore error corrected reads and overlaps saved in prefix.*.bin files. Apart from assembly graphs, hifiasm also outputs three binary files that save alloverlap information during assembly step. With these files, hifiasm can avoid the time-consuming all-to-all overlap calculation step, and do the assembly directly and quickly. This might be helpful when users want to get an optimized assembly by multiple rounds of experiments with different parameters." /> | |
186 <param argument="--hom-cov" type="integer" optional="True" value="" label="Homozygous read coverage" /> | |
187 </when> | |
188 </conditional> | |
189 | |
190 <conditional name="purge_options"> | |
191 <param name="purge_selector" type="select" label="Options for purging duplicates"> | |
192 <option value="blank">Leave default</option> | |
193 <option value="set">Specify</option> | |
194 </param> | |
195 <when value="blank" /> | |
196 <when value="set"> | |
197 <param name="purge_level" argument="-l" type="select" label="Purge level"> | |
198 <option value="0" selected="true">None (0)</option> | |
199 <option value="1">Light (1)</option> | |
200 <option value="2">Aggressive (2)</option> | |
201 <option value="3">Aggressive - high heterozygosity rate (3)</option> | |
202 </param> | |
203 <param name="similarity_threshold" argument="-s" type="float" min="0" max="1" value="0.75" label="Similarity threshold for duplicate haplotigs" /> | |
204 <param name="minimum_overlap" argument="-O" type="integer" value="1" label="Minimum overlapped reads for duplicate haplotigs" /> | |
205 <param argument="--purge-max" type="integer" optional="true" label="Coverage upper bound" help="If not set, this will be determined automatically" /> | |
206 <param argument="--n-hap" type="integer" min="0" value="" optional="true" label="Assumtion of haplotype number" help="A haplotype is defined as the combination of alleles for different polymorphisms that occur on the same chromosome." /> | |
207 </when> | |
208 </conditional> | |
209 | |
210 <conditional name="hic_partition"> | |
211 <param name="hic_partition_selector" type="select" label="Options for Hi-C-partition"> | |
212 <option value="blank">Leave default</option> | |
213 <option value="set">Specify</option> | |
214 </param> | |
215 <when value="blank" /> | |
216 <when value="set"> | |
217 <param argument="--h1" type="data" format="fastq,fastq.gz" multiple="true" label="Hi-C R1 reads" /> | |
218 <param argument="--h2" type="data" format="fastq,fastq.gz" multiple="true" label="Hi-C R2 reads" /> | |
219 <param argument="--seed" type="integer" min="1" value="" optional="true" label="RNG seed" /> | |
220 <param argument="--n-weight" type="integer" min="1" value="" optional="true" label="Rounds of reweighting Hi-C links" /> | |
221 <param argument="--n-perturb" type="integer" min="1" value="" optional="true" label="Rounds of perturbation" /> | |
222 <param argument="--f-perturb" type="float" min="0" max="1" value="" optional="true" label="Fraction to flip for perturbation" /> | |
223 <param argument="--l-msjoin" type="integer" min="0" value="500000" label="Detect misjoined unitigs of greater than or equal to specified size" help="A value of 0 disables this filter"/> | |
224 </when> | |
225 </conditional> | |
226 <param name="log_out" type="boolean" label="Output log file?" truevalue="yes" falsevalue="no"/> | 238 <param name="log_out" type="boolean" label="Output log file?" truevalue="yes" falsevalue="no"/> |
227 </inputs> | 239 </inputs> |
228 <outputs> | 240 <outputs> |
229 <data name="raw_unitigs" format="gfa1" from_work_dir="output.r_utg.gfa" label="${tool.name} on ${on_string}, haplotype-resolved raw unitig graph"> | 241 <!--Standard mode--> |
230 <filter>mode['mode_selector'] == 'standard'</filter> | 242 <data name="raw_unitigs" format="gfa1" from_work_dir="output.r_utg.gfa" label="${tool.name} on ${on_string}: haplotype-resolved raw unitig graph"> |
231 </data> | 243 <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'blank'</filter> |
232 <data name="raw_unitigs_trio" format="gfa1" from_work_dir="output.dip.r_utg.gfa" label="${tool.name} on ${on_string}, haplotype-resolved raw unitig graph"> | 244 </data> |
233 <filter>mode['mode_selector'] == 'trio'</filter> | 245 <data name="processed_unitigs" format="gfa1" from_work_dir="output.p_utg.gfa" label="${tool.name} on ${on_string}: processed unitig graph"> |
234 </data> | 246 <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'blank'</filter> |
235 <data name="processed_unitigs" format="gfa1" from_work_dir="output.p_utg.gfa" label="${tool.name} on ${on_string}, processed unitig graph"> | 247 </data> |
236 <filter>mode['mode_selector'] == 'standard'</filter> | 248 <data name="primary_contig_graph" format="gfa1" from_work_dir="output.p_ctg.gfa" label="${tool.name} on ${on_string}: primary assembly contig graph"> |
237 </data> | 249 <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'blank'</filter> |
238 <data name="primary_contig_graph" format="gfa1" from_work_dir="output.p_ctg.gfa" label="${tool.name} on ${on_string}, primary assembly contig graph"> | 250 </data> |
239 <filter>mode['mode_selector'] == 'standard'</filter> | 251 <data name="alternate_contig_graph" format="gfa1" from_work_dir="output.a_ctg.gfa" label="${tool.name} on ${on_string}: alternate assembly contig graph"> |
240 </data> | 252 <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'blank'</filter> |
241 <data name="alternate_contig_graph" format="gfa1" from_work_dir="output.a_ctg.gfa" label="${tool.name} on ${on_string}, alternate assembly contig graph"> | 253 </data> |
242 <filter>mode['mode_selector'] == 'standard'</filter> | 254 <!--Trio outputs without Hi-c reads--> |
243 </data> | 255 <data name="hap1_contigs" format="gfa1" from_work_dir="output.dip.hap1.p_ctg.gfa" label="${tool.name} on ${on_string}: hap1.p_ctg contig graph"> |
244 <data name="hap1_contigs" format="gfa1" from_work_dir="output.hap1.p_ctg.gfa" label="${tool.name} on ${on_string}, hap1.p_ctg contig graph"> | 256 <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'blank'</filter> |
245 <filter>mode['mode_selector'] == 'trio'</filter> | 257 </data> |
246 </data> | 258 <data name="hap2_contigs" format="gfa1" from_work_dir="output.dip.hap2.p_ctg.gfa" label="${tool.name} on ${on_string}: hap2.p_ctg contig graph"> |
247 <data name="hap2_contigs" format="gfa1" from_work_dir="output.hap2.p_ctg.gfa" label="${tool.name} on ${on_string}, hap2.p_ctg contig graph"> | 259 <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'blank'</filter> |
248 <filter>mode['mode_selector'] == 'trio'</filter> | 260 </data> |
249 </data> | 261 <data name="raw_unitigs_trio" format="gfa1" from_work_dir="output.dip.r_utg.gfa" label="${tool.name} on ${on_string}: haplotype-resolved raw unitig graph"> |
250 <!-- Hi-C partition outputs --> | 262 <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'blank'</filter> |
251 <data name="hic_contig_graph" format="gfa1" from_work_dir="output.hic.p_ctg.gfa" label="${tool.name} ${on_string}, HI-C contig graph"> | 263 </data> |
252 <filter>hic_partition['hic_partition_selector'] == 'set'</filter> | 264 <data name="processed_unitigs_trio" format="gfa1" from_work_dir="output.dip.p_utg.gfa" label="${tool.name} on ${on_string}: haplotype-resolved processed unitig graph"> |
253 </data> | 265 <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'blank'</filter> |
254 <data name="hic_balanced_contig_hap1_graph" format="gfa1" from_work_dir="output.bp.hap1.p_ctg.gfa" label="${tool.name} ${on_string}, HI-C hap1 balanced contig graph hap1"> | 266 </data> |
255 <filter>hic_partition['hic_partition_selector'] == 'set'</filter> | 267 <!-- Stardand mode with Hi-C partition outputs --> |
256 </data> | 268 <data name="hic_pcontig_graph" format="gfa1" from_work_dir="output.hic.p_ctg.gfa" label="${tool.name} ${on_string}: Hi-C primary contig graph"> |
257 <data name="hic_balanced_contig_hap2_graph" format="gfa1" from_work_dir="output.bp.hap2.p_ctg.gfa" label="${tool.name} ${on_string}, HI-C hap2 balanced contig graph hap2"> | 269 <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'set'</filter> |
258 <filter>hic_partition['hic_partition_selector'] == 'set'</filter> | 270 </data> |
259 </data> | 271 <data name="hic_acontig_graph" format="gfa1" from_work_dir="output.hic.a_ctg.gfa" label="${tool.name} ${on_string}: Hi-C alternate contig graph"> |
260 <data name="log_file" format="txt" from_work_dir="output.log" label="${tool.name} ${on_string}, log file"> | 272 <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'set'</filter> |
273 </data> | |
274 <data name="hic_balanced_contig_hap1_graph" format="gfa1" from_work_dir="output.bp.hap1.p_ctg.gfa" label="${tool.name} ${on_string}: Hi-C hap1 balanced contig graph hap1"> | |
275 <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'set'</filter> | |
276 </data> | |
277 <data name="hic_balanced_contig_hap2_graph" format="gfa1" from_work_dir="output.bp.hap2.p_ctg.gfa" label="${tool.name} ${on_string}: Hi-C hap2 balanced contig graph hap2"> | |
278 <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'set'</filter> | |
279 </data> | |
280 <!--Trio outputs with Hi-c reads--> | |
281 <data name="hap1_contigs_hic" format="gfa1" from_work_dir="output.hic.bench.r_utg.gfa" label="${tool.name} on ${on_string}: raw initig graph"> | |
282 <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'set'</filter> | |
283 </data> | |
284 <data name="hap2_contigs_hic" format="gfa1" from_work_dir="output.hic.bench.p_utg.gfa" label="${tool.name} on ${on_string}: processsed initig graph"> | |
285 <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'set'</filter> | |
286 </data> | |
287 <!--Log output--> | |
288 <data name="log_file" format="txt" from_work_dir="output.log" label="${tool.name} ${on_string}: log file"> | |
261 <filter>log_out</filter> | 289 <filter>log_out</filter> |
262 </data> | 290 </data> |
263 </outputs> | 291 </outputs> |
264 <tests> | 292 <tests> |
265 <test> | 293 <test expect_num_outputs="4"> |
266 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> | 294 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> |
267 <param name="filter_bits" value="0" /> | 295 <param name="filter_bits" value="0" /> |
268 <param name="mode_selector" value="standard" /> | 296 <param name="mode_selector" value="standard" /> |
269 <output name="raw_unitigs" file="hifiasm-out1-raw.gfa" ftype="gfa1" /> | 297 <output name="raw_unitigs" file="hifiasm-out1-raw.gfa" ftype="gfa1" /> |
270 <output name="processed_unitigs" file="hifiasm-out1-processed.gfa" ftype="gfa1" /> | 298 <output name="processed_unitigs" file="hifiasm-out1-processed.gfa" ftype="gfa1" /> |
273 <assert_contents> | 301 <assert_contents> |
274 <has_size value="0"/> | 302 <has_size value="0"/> |
275 </assert_contents> | 303 </assert_contents> |
276 </output> | 304 </output> |
277 </test> | 305 </test> |
278 <test> | 306 <test expect_num_outputs="4"> |
279 <param name="reads" value="hifiasm-in2-0.fa.gz,hifiasm-in2-1.fa.gz,hifiasm-in2-2.fa.gz,hifiasm-in2-3.fa.gz,hifiasm-in2-4.fa.gz" ftype="fasta.gz" /> | 307 <param name="reads" value="hifiasm-in2-0.fa.gz,hifiasm-in2-1.fa.gz,hifiasm-in2-2.fa.gz,hifiasm-in2-3.fa.gz,hifiasm-in2-4.fa.gz" ftype="fasta.gz" /> |
280 <param name="filter_bits" value="0" /> | 308 <param name="filter_bits" value="0" /> |
281 <param name="mode_selector" value="standard" /> | 309 <param name="mode_selector" value="standard" /> |
282 <output name="raw_unitigs" file="hifiasm-out2-raw.gfa" ftype="gfa1" /> | 310 <output name="raw_unitigs" file="hifiasm-out2-raw.gfa" ftype="gfa1" /> |
283 <output name="processed_unitigs" file="hifiasm-out2-processed.gfa" ftype="gfa1" /> | 311 <output name="processed_unitigs" file="hifiasm-out2-processed.gfa" ftype="gfa1" /> |
287 <has_size value="0"/> | 315 <has_size value="0"/> |
288 </assert_contents> | 316 </assert_contents> |
289 </output> | 317 </output> |
290 </test> | 318 </test> |
291 <!-- Test logfile out--> | 319 <!-- Test logfile out--> |
292 <test> | 320 <test expect_num_outputs="5"> |
293 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> | 321 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> |
294 <param name="filter_bits" value="0" /> | 322 <param name="filter_bits" value="0" /> |
295 <param name="mode_selector" value="standard" /> | 323 <param name="mode_selector" value="standard" /> |
296 <param name="log_out" value="yes"/> | 324 <param name="log_out" value="yes"/> |
297 <output name="raw_unitigs" file="hifiasm-out1-raw.gfa" ftype="gfa1" /> | 325 <output name="raw_unitigs" file="hifiasm-out1-raw.gfa" ftype="gfa1" /> |
306 <assert_contents> | 334 <assert_contents> |
307 <has_line line="[M::main] CMD: hifiasm -t 1 -o output -f 0 --primary input_0.fasta.gz"/> | 335 <has_line line="[M::main] CMD: hifiasm -t 1 -o output -f 0 --primary input_0.fasta.gz"/> |
308 </assert_contents> | 336 </assert_contents> |
309 </output> | 337 </output> |
310 </test> | 338 </test> |
311 | 339 <!--Test Hi-C reads--> |
312 <!-- Test Hi-C | 340 <test expect_num_outputs="4"> |
313 <test> | |
314 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> | 341 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> |
315 <param name="filter_bits" value="0" /> | 342 <param name="filter_bits" value="0" /> |
316 <param name="mode_selector" value="standard" /> | 343 <param name="mode_selector" value="standard" /> |
317 <conditional name="hic_partition"> | 344 <conditional name="hic_partition"> |
318 <param name="hic_partition_selector" value="set"/> | 345 <param name="hic_partition_selector" value="set"/> |
319 <param name="h1" value="r1_1.fq"/> | 346 <param name="h1" value="hic_1.fastq.gz"/> |
320 <param name="h2" value="r2_1.fq"/> | 347 <param name="h2" value="hic_2.fastq.gz"/> |
321 </conditional> | 348 <param name="n_weight" value="1"/> |
322 <output name="raw_unitigs" file="hifiasm-out3-raw.gfa" ftype="gfa1" /> | 349 <param name="n_perturb" value="1"/> |
323 <output name="processed_unitigs" file="hifiasm-out3-processed.gfa" ftype="gfa1" /> | 350 <param name="l_perturb" value="0"/> |
324 <output name="primary_contig_graph" file="hifiasm-out3-primary.gfa" ftype="gfa1" /> | 351 <param name="l_msjoin" value="0"/> |
325 <output name="alternate_contig_graph" ftype="gfa1"> | 352 </conditional> |
326 <assert_contents> | 353 <output name="hic_pcontig_graph" file="hifiasm-out-hifi-p.gfa" ftype="gfa1" /> |
327 <has_size value="0"/> | 354 <output name="hic_acontig_graph" file="hifiasm-out-hifi-a.gfa" ftype="gfa1" /> |
328 </assert_contents> | 355 <output name="hic_balanced_contig_hap1_graph" ftype="gfa1"> |
329 </output> | 356 <assert_contents> |
330 </test> | 357 <has_size value="0"/> |
331 --> | 358 </assert_contents> |
359 </output> | |
360 <output name="hic_balanced_contig_hap1_graph" ftype="gfa1" > | |
361 <assert_contents> | |
362 <has_size value="0"/> | |
363 </assert_contents> | |
364 </output> | |
365 </test> | |
332 <!-- Test trio mode --> | 366 <!-- Test trio mode --> |
333 <test> | 367 <test expect_num_outputs="4"> |
334 <param name="filter_bits" value="0"/> | 368 <param name="filter_bits" value="0"/> |
335 <conditional name="mode"> | 369 <conditional name="mode"> |
336 <param name="mode_selector" value="trio"/> | 370 <param name="mode_selector" value="trio"/> |
337 <param name="reads" value="child.fasta.gz"/> | 371 <param name="reads" value="child.fasta.gz"/> |
338 <param name="hap1_reads" value="paternal.fasta.gz"/> | 372 <param name="hap1_reads" value="paternal.fasta.gz"/> |
343 <output name="raw_unitigs_trio" ftype="gfa1"> | 377 <output name="raw_unitigs_trio" ftype="gfa1"> |
344 <assert_contents> | 378 <assert_contents> |
345 <has_size value="0"/> | 379 <has_size value="0"/> |
346 </assert_contents> | 380 </assert_contents> |
347 </output> | 381 </output> |
382 <output name="processed_unitigs_trio" ftype="gfa1"> | |
383 <assert_contents> | |
384 <has_size value="0"/> | |
385 </assert_contents> | |
386 </output> | |
348 <output name="hap1_contigs"> | 387 <output name="hap1_contigs"> |
349 <assert_contents> | 388 <assert_contents> |
350 <has_size value="0"/> | 389 <has_size value="0"/> |
351 </assert_contents> | 390 </assert_contents> |
352 </output> | 391 </output> |
355 <has_size value="0"/> | 394 <has_size value="0"/> |
356 </assert_contents> | 395 </assert_contents> |
357 </output> | 396 </output> |
358 </test> | 397 </test> |
359 <!-- Test ignore-error-corrected option --> | 398 <!-- Test ignore-error-corrected option --> |
360 <test> | 399 <test expect_num_outputs="4"> |
361 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> | 400 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> |
362 <param name="filter_bits" value="0" /> | 401 <param name="filter_bits" value="0" /> |
363 <param name="mode_selector" value="standard" /> | 402 <param name="mode_selector" value="standard" /> |
364 <conditional name="assembly_options"> | 403 <conditional name="assembly_options"> |
365 <param name="assembly_selector" value="set"/> | 404 <param name="assembly_selector" value="set"/> |
373 <has_size value="0"/> | 412 <has_size value="0"/> |
374 </assert_contents> | 413 </assert_contents> |
375 </output> | 414 </output> |
376 </test> | 415 </test> |
377 <!-- Test expected haplotype number --> | 416 <!-- Test expected haplotype number --> |
378 <test> | 417 <test expect_num_outputs="4"> |
379 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> | 418 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> |
380 <param name="filter_bits" value="0" /> | 419 <param name="filter_bits" value="0" /> |
381 <param name="mode_selector" value="standard" /> | 420 <param name="mode_selector" value="standard" /> |
382 <conditional name="purge_options"> | 421 <conditional name="purge_options"> |
383 <param name="purge_selector" value="set"/> | 422 <param name="purge_selector" value="set"/> |
391 <has_size value="0"/> | 430 <has_size value="0"/> |
392 </assert_contents> | 431 </assert_contents> |
393 </output> | 432 </output> |
394 </test> | 433 </test> |
395 <!-- Test min_hist_cnt option --> | 434 <!-- Test min_hist_cnt option --> |
396 <test> | 435 <test expect_num_outputs="4"> |
397 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> | 436 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> |
398 <param name="filter_bits" value="0" /> | 437 <param name="filter_bits" value="0" /> |
399 <param name="mode_selector" value="standard" /> | 438 <param name="mode_selector" value="standard" /> |
400 <conditional name="advanced_options"> | 439 <conditional name="advanced_options"> |
401 <param name="advanced_selector" value="set"/> | 440 <param name="advanced_selector" value="set"/> |
409 <has_size value="0"/> | 448 <has_size value="0"/> |
410 </assert_contents> | 449 </assert_contents> |
411 </output> | 450 </output> |
412 </test> | 451 </test> |
413 <!-- Test max_kooc option --> | 452 <!-- Test max_kooc option --> |
414 <test> | 453 <test expect_num_outputs="4"> |
415 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> | 454 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> |
416 <param name="filter_bits" value="0" /> | 455 <param name="filter_bits" value="0" /> |
417 <param name="mode_selector" value="standard" /> | 456 <param name="mode_selector" value="standard" /> |
418 <conditional name="advanced_options"> | 457 <conditional name="advanced_options"> |
419 <param name="advanced_selector" value="set"/> | 458 <param name="advanced_selector" value="set"/> |
427 <has_size value="0"/> | 466 <has_size value="0"/> |
428 </assert_contents> | 467 </assert_contents> |
429 </output> | 468 </output> |
430 </test> | 469 </test> |
431 <!-- Test hg-size option --> | 470 <!-- Test hg-size option --> |
432 <test> | 471 <test expect_num_outputs="4"> |
433 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> | 472 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> |
434 <param name="filter_bits" value="0" /> | 473 <param name="filter_bits" value="0" /> |
435 <param name="mode_selector" value="standard" /> | 474 <param name="mode_selector" value="standard" /> |
436 <conditional name="advanced_options"> | 475 <conditional name="advanced_options"> |
437 <param name="advanced_selector" value="set"/> | 476 <param name="advanced_selector" value="set"/> |
445 <has_size value="0"/> | 484 <has_size value="0"/> |
446 </assert_contents> | 485 </assert_contents> |
447 </output> | 486 </output> |
448 </test> | 487 </test> |
449 <!-- Test ignore-error-corrected option --> | 488 <!-- Test ignore-error-corrected option --> |
450 <test> | 489 <test expect_num_outputs="4"> |
451 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> | 490 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> |
452 <param name="filter_bits" value="0" /> | 491 <param name="filter_bits" value="0" /> |
453 <param name="mode_selector" value="standard" /> | 492 <param name="mode_selector" value="standard" /> |
454 <conditional name="assembly_options"> | 493 <conditional name="assembly_options"> |
455 <param name="assembly_selector" value="set"/> | 494 <param name="assembly_selector" value="set"/> |
464 </assert_contents> | 503 </assert_contents> |
465 </output> | 504 </output> |
466 </test> | 505 </test> |
467 </tests> | 506 </tests> |
468 <help><![CDATA[ | 507 <help><![CDATA[ |
469 *********************************** | 508 .. class:: infomark |
470 HiFiASM - a fast de novo assembler | 509 |
471 *********************************** | 510 **HiFiASM - a fast de novo assembler** |
511 | |
472 | 512 |
473 Hifiasm is a fast haplotype-resolved de novo assembler for PacBio Hifi reads. It can assemble a human genome in several hours and works with the California redwood genome, one of the most complex genomes sequenced so far. Hifiasm can produce primary/alternate assemblies of quality competitive with the best assemblers. It also introduces a new graph binning algorithm and achieves the best haplotype-resolved assembly given trio data. | 513 Hifiasm is a fast haplotype-resolved de novo assembler for PacBio Hifi reads. It can assemble a human genome in several hours and works with the California redwood genome, one of the most complex genomes sequenced so far. Hifiasm can produce primary/alternate assemblies of quality competitive with the best assemblers. It also introduces a new graph binning algorithm and achieves the best haplotype-resolved assembly given trio data. |
474 | 514 |
475 #### Assembly mode | 515 ---- |
516 | |
517 .. class:: infomark | |
518 | |
519 **Assembly mode** | |
520 | |
476 - *Standard* | 521 - *Standard* |
477 - *Trio* When parental short reads are available, hifiasm can generate a pair of haplotype-resolved assemblies with trio binning. | 522 - *Trio* When parental short reads are available, hifiasm can generate a pair of haplotype-resolved assemblies with trio binning. |
478 | 523 |
479 #### Trio Options | 524 ---- |
480 - *Haplotype 1 reads* : list of hap1/paternal read names | 525 |
481 - *Haplotype 2 reads* : list of hap2/maternal read names | 526 .. class:: infomark |
482 - *Lower bound of the binned k-mer's frequency* | 527 |
483 - *Upper bound of the binned k-mer's frequency* | 528 **Outputs** |
484 | 529 |
485 *Bits for bloom filter* (-f) - A value of 0 disables the bloom filter for small genomes. For genomes much larger than human, applying -f 38 or even - f39 is preferred to save memory on k-mer counting. | 530 Non Trio assembly: |
486 | 531 |
487 #### Advanced options | |
488 - *Length of adapters to be removed* Old HiFi reads may contain short adapter sequences at the ends of reads. You can specify 20 to trim both ends of reads by 20bp. | |
489 - *K-mer length* (must be <64) | |
490 - *Minimizer window size* | |
491 - *Drop K-mers* K-mers that occur more than this value multiplied by the coverage will be discarded | |
492 - *Maximum overlaps to consider* consider up to max(-D*coverage,-N) overlaps for each oriented read | |
493 - *Correction rounds* round of correction | |
494 | |
495 #### Assembly options | |
496 - *Cleaning rounds* round of assembly cleaning | |
497 - *Minimum contig bubble* size Pop contig graph bubbles smaller than this value | |
498 - *Minimum unitig bubble* size Pop unitig graph bubbles smaller than this value | |
499 - *Tip unitigs* Keep only tip unitigs with a number of reads greater than or equal to this value | |
500 - *Maximum overlap drop ratio* | |
501 - *Minimum overlap drop ratio* | |
502 - *Skip post join contigs step* disable post join contigs step which may improve N50 | |
503 | |
504 #### Options for purging duplicates | |
505 - *Purge level* 0: no purging; 1: light; 2: aggressive [0 for trio; 2 for unzip] | |
506 - *Similarity threshold for duplicate haplotigs* | |
507 - *Minimum overlapped reads for duplicate haplotigs* | |
508 - *Coverage upper bound* If not set, this will be determined automatically | |
509 - *Experimental high-heterozygosity mode* enable this mode for high heterozygosity sample NB: May be unstable | |
510 | |
511 #### Hi-C-partition options | |
512 | |
513 - *RNG seed* | |
514 - *Rounds of reweighting Hi-C links* : increasing this may improves phasing results but takes longer time. | |
515 - *Rounds of perturbation* : increasing this may improves phasing results but takes longer time. | |
516 - *Fraction to flip for perturbation* : increasing this may improves phasing results but takes longer time. | |
517 | |
518 ### Outputs | |
519 | |
520 Non Trio assembly | |
521 - Haplotype-resolved raw unitig graph in GFA format. This graph keeps all haplotype information, including somatic mutations and recurrent sequencing errors. | 532 - Haplotype-resolved raw unitig graph in GFA format. This graph keeps all haplotype information, including somatic mutations and recurrent sequencing errors. |
522 - Haplotype-resolved processed unitig graph without small bubbles : Small bubbles might be caused by somatic mutations or noise in data, which are not the real haplotype information. | 533 - Haplotype-resolved processed unitig graph without small bubbles : Small bubbles might be caused by somatic mutations or noise in data, which are not the real haplotype information. |
523 - Primary assembly contig graph : This graph collapses different haplotypes. | 534 - Primary assembly contig graph : This graph collapses different haplotypes. |
524 - Alternate assembly contig graph : This graph consists of all assemblies that are discarded in primary contig graph. | 535 - Alternate assembly contig graph : This graph consists of all assemblies that are discarded in primary contig graph. |
525 | 536 |
526 | 537 |
527 Trio assembly | 538 Trio assembly: |
539 | |
528 - Haplotype-resolved raw unitig graph in GFA format . This graph keeps all haplotype information. | 540 - Haplotype-resolved raw unitig graph in GFA format . This graph keeps all haplotype information. |
529 - Phased paternal/haplotype1 contig graph. This graph keeps the phased paternal/haplotype1 assembly. | 541 - Phased paternal/haplotype1 contig graph. This graph keeps the phased paternal/haplotype1 assembly. |
530 - Phased maternal/haplotype2 contig graph. This graph keeps the phased maternal/haplotype2 assembly. | 542 - Phased maternal/haplotype2 contig graph. This graph keeps the phased maternal/haplotype2 assembly. |
531 | |
532 | 543 |
533 | 544 |
534 ]]></help> | 545 ]]></help> |
535 <citations> | 546 <citations> |
536 <citation type="doi">10.1038/s41592-020-01056-5</citation> | 547 <citation type="doi">10.1038/s41592-020-01056-5</citation> |