Mercurial > repos > bgruening > hifiasm
comparison hifiasm.xml @ 0:bf0a4667e3ce draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/hifiasm commit a118fad06e94683813e214af107fedd5fd80500a"
author | bgruening |
---|---|
date | Tue, 02 Mar 2021 20:23:49 +0000 |
parents | |
children | 6505bd37670d |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:bf0a4667e3ce |
---|---|
1 <tool id="hifiasm" name="Hifiasm" version="@VERSION@"> | |
2 <description>de novo assembler</description> | |
3 <macros> | |
4 <token name="@VERSION@">0.14</token> | |
5 <token name="@FORMATS@">fasta,fasta.gz,fastq,fastq.gz</token> | |
6 <xml name="reads"> | |
7 <param name="reads" type="data" format="@FORMATS@" label="Input reads" /> | |
8 </xml> | |
9 </macros> | |
10 <requirements> | |
11 <requirement type="package" version="@VERSION@">hifiasm</requirement> | |
12 <requirement type="package" version="0.1">yak</requirement> | |
13 </requirements> | |
14 <version_command>hifiasm --version</version_command> | |
15 <command detect_errors="exit_code"> | |
16 <![CDATA[ | |
17 #if str($mode.mode_selector) == 'standard': | |
18 ln -s '$mode.reads' input.${mode.reads.extension} && | |
19 #else: | |
20 ln -s '$mode.hap1_reads' hap1_reads.${mode.hap1_reads.extension} && | |
21 ln -s '$mode.hap2_reads' hap2_reads.${mode.hap2_reads.extension} && | |
22 yak count -k31 -b37 -t\${GALAXY_SLOTS:-1} -o hap1.yak hap1_reads.${mode.hap1_reads.extension} && | |
23 yak count -k31 -b37 -t\${GALAXY_SLOTS:-1} -o hap2.yak hap2_reads.${mode.hap2_reads.extension} && | |
24 #end if | |
25 hifiasm -i input.${mode.reads.extension} -t \${GALAXY_SLOTS:-1} -o output -f $filter_bits | |
26 #if str($advanced_options.advanced_selector) == 'set': | |
27 -z $advanced_options.adapter_length | |
28 -k $advanced_options.kmer_length | |
29 -w $advanced_options.window_size | |
30 -D $advanced_options.drop_kmers | |
31 -N $advanced_options.max_overlaps | |
32 -r $advanced_options.correction_rounds | |
33 #end if | |
34 #if str($assembly_options.assembly_selector) == 'set': | |
35 -a $assembly_options.cleaning_rounds | |
36 -m $assembly_options.pop_contigs | |
37 -p $assembly_options.pop_unitigs | |
38 -n $assembly_options.remove_tips | |
39 -x $assembly_options.max_overlap | |
40 -y $assembly_options.min_overlap | |
41 $assembly_options.disable_post_join | |
42 --pb-range $assembly_options.pb_range | |
43 #end if | |
44 #if str($mode.mode_selector) == 'trio': | |
45 -1 hap1.yak | |
46 -2 hap2.yak | |
47 -c $mode.max_kmers | |
48 -d $mode.min_kmers | |
49 #end if | |
50 #if str($purge_options.purge_selector) == 'set': | |
51 -l $purge_options.purge_level | |
52 -s $purge_options.similarity_threshold | |
53 -O $purge_options.minimum_overlap | |
54 #if $purge_options.purge_cov: | |
55 --purge-cov $purge_options.purge_cov | |
56 #end if | |
57 $purge_options.high_het | |
58 #end if | |
59 ]]> | |
60 </command> | |
61 <inputs> | |
62 <conditional name="mode"> | |
63 <param name="mode_selector" type="select" label="Assembly mode"> | |
64 <option value="standard">Standard</option> | |
65 <option value="trio">Trio mode</option> | |
66 </param> | |
67 <when value="standard"> | |
68 <expand macro="reads" /> | |
69 </when> | |
70 <when value="trio"> | |
71 <expand macro="reads" /> | |
72 <param name="hap1_reads" type="data" format="fastq,fastq.gz" label="Haplotype 1 reads" /> | |
73 <param name="hap2_reads" type="data" format="fastq,fastq.gz" label="Haplotype 2 reads" /> | |
74 <param name="max_kmers" argument="-c" type="integer" value="2" label="Lower bound of the binned k-mer's frequency" /> | |
75 <param name="min_kmers" argument="-d" type="integer" value="5" label="Upper bound of the binned k-mer's frequency" /> | |
76 </when> | |
77 </conditional> | |
78 <param name="filter_bits" argument="-f" type="integer" min="0" value="37" label="Bits for bloom filter" help="A value of 0 disables the bloom filter" /> | |
79 <conditional name="advanced_options"> | |
80 <param name="advanced_selector" type="select" label="Advanced options"> | |
81 <option value="blank">Leave default</option> | |
82 <option value="set">Specify</option> | |
83 </param> | |
84 <when value="blank" /> | |
85 <when value="set"> | |
86 <param name="adapter_length" argument="-z" type="integer" min="0" value="0" label="Length of adapters to be removed" /> | |
87 <param name="kmer_length" argument="-k" type="integer" min="0" max="64" value="51" label="K-mer length" /> | |
88 <param name="window_size" argument="-w" type="integer" min="0" value="51" label="Minimizer window size" /> | |
89 <param name="drop_kmers" argument="-D" type="float" value="5.0" label="Drop k-mers" help="K-mers that occur more than this value multiplied by the coverage will be discarded" /> | |
90 <param name="max_overlaps" argument="-N" type="integer" value="100" label="Maximum overlaps to consider" help="The software selects the larger of this value and the k-mer count multiplied by coverage" /> | |
91 <param name="correction_rounds" argument="-r" type="integer" value="3" label="Correction rounds" /> | |
92 </when> | |
93 </conditional> | |
94 <conditional name="assembly_options"> | |
95 <param name="assembly_selector" type="select" label="Assembly options"> | |
96 <option value="blank">Leave default</option> | |
97 <option value="set">Specify</option> | |
98 </param> | |
99 <when value="blank" /> | |
100 <when value="set"> | |
101 <param name="cleaning_rounds" argument="-a" type="integer" value="4" label="Cleaning rounds" /> | |
102 <param name="pop_contigs" argument="-m" type="integer" value="10000000" label="Minimum contig bubble size" help="Pop contig graph bubbles smaller than this value" /> | |
103 <param name="pop_unitigs" argument="-p" type="integer" value="100000" label="Minimum unitig bubble size" help="Pop unitig graph bubbles smaller than this value" /> | |
104 <param name="remove_tips" argument="-n" type="integer" value="3" label="Tip unitigs" help="Keep only tip unitigs with a number of reads greater than or equal to this value" /> | |
105 <param name="max_overlap" argument="-x" type="float" min="0" max="1" value="0.8" label="Maximum overlap drop ratio" /> | |
106 <param name="min_overlap" argument="-y" type="float" min="0" max="1" value="0.2" label="Minimum overlap drop ratio" /> | |
107 <param name="disable_post_join" argument="-u" type="boolean" truevalue="-u" falsevalue="" label="Skip post join contigs step" help="May improve N50" /> | |
108 <param argument="--pb-range" type="integer" min="0" max="100" value="0" label="Inconsistent contigs" help="If this option is set to a value greater than zero the tool generates an additional BED dataset containing regions that are >= x% inconsistent." /> | |
109 </when> | |
110 </conditional> | |
111 <conditional name="purge_options"> | |
112 <param name="purge_selector" type="select" label="Options for purging duplicates"> | |
113 <option value="blank">Leave default</option> | |
114 <option value="set">Specify</option> | |
115 </param> | |
116 <when value="blank" /> | |
117 <when value="set"> | |
118 <param name="purge_level" argument="-l" type="select" label="Purge level"> | |
119 <option value="0" selected="true">None</option> | |
120 <option value="1">Light</option> | |
121 <option value="2">Aggressive</option> | |
122 </param> | |
123 <param name="similarity_threshold" argument="-s" type="float" min="0" max="1" value="0.75" label="Similarity threshold for duplicate haplotigs" /> | |
124 <param name="minimum_overlap" argument="-O" type="integer" value="1" label="Minimum overlapped reads for duplicate haplotigs" /> | |
125 <param argument="--purge-cov" type="integer" optional="true" label="Coverage upper bound" help="If not set, this will be determined automatically" /> | |
126 <param argument="--high-het" type="boolean" truevalue="--high-het" falsevalue="" label="Experimental high-heterozygosity mode" help="NB: May be unstable" /> | |
127 </when> | |
128 </conditional> | |
129 </inputs> | |
130 <outputs> | |
131 <data name="raw_unitigs" format="gfa1" from_work_dir="output.r_utg.gfa" label="${tool.name} on ${on_string} Haplotype-resolved raw unitig graph"> | |
132 <filter>mode['mode_selector'] == 'standard'</filter> | |
133 </data> | |
134 <data name="raw_unitigs" format="gfa1" from_work_dir="output.dip.r_utg.gfa" label="${tool.name} on ${on_string} Haplotype-resolved raw unitig graph"> | |
135 <filter>mode['mode_selector'] == 'trio'</filter> | |
136 </data> | |
137 <data name="processed_unitigs" format="gfa1" from_work_dir="output.p_utg.gfa" label="${tool.name} on ${on_string} Processed unitig graph"> | |
138 <filter>mode['mode_selector'] == 'standard'</filter> | |
139 </data> | |
140 <data name="primary_contig_graph" format="gfa1" from_work_dir="output.p_ctg.gfa" label="${tool.name} on ${on_string} Primary assembly contig graph"> | |
141 <filter>mode['mode_selector'] == 'standard'</filter> | |
142 </data> | |
143 <data name="alternate_contig_graph" format="gfa1" from_work_dir="output.a_ctg.gfa" label="${tool.name} on ${on_string} Alternate assembly contig graph"> | |
144 <filter>mode['mode_selector'] == 'standard'</filter> | |
145 </data> | |
146 <data name="hap1_contigs" format="gfa1" from_work_dir="output.hap1.p_ctg.gfa" label="${tool.name} ${mode.hap1_reads.name} contig graph"> | |
147 <filter>mode['mode_selector'] == 'trio'</filter> | |
148 </data> | |
149 <data name="hap2_contigs" format="gfa1" from_work_dir="output.hap2.p_ctg.gfa" label="${tool.name} ${mode.hap2_reads.name} contig graph"> | |
150 <filter>mode['mode_selector'] == 'trio'</filter> | |
151 </data> | |
152 </outputs> | |
153 <tests> | |
154 <test> | |
155 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> | |
156 <param name="filter_bits" value="0" /> | |
157 <param name="mode_selector" value="standard" /> | |
158 <output name="raw_unitigs" file="hifiasm-out1-raw.gfa" ftype="gfa1" /> | |
159 <output name="processed_unitigs" file="hifiasm-out1-processed.gfa" ftype="gfa1" /> | |
160 <output name="primary_contig_graph" file="hifiasm-out1-primary.gfa" ftype="gfa1" /> | |
161 <output name="alternate_contig_graph" file="hifiasm-out1-alternate.gfa" ftype="gfa1" /> | |
162 </test> | |
163 </tests> | |
164 <help><![CDATA[ | |
165 *********************************** | |
166 HiFiASM - a fast de novo assembler | |
167 *********************************** | |
168 | |
169 Hifiasm is a fast haplotype-resolved de novo assembler for PacBio Hifi reads. It can assemble a human genome in several hours and works with the California redwood genome, one of the most complex genomes sequenced so far. Hifiasm can produce primary/alternate assemblies of quality competitive with the best assemblers. It also introduces a new graph binning algorithm and achieves the best haplotype-resolved assembly given trio data. | |
170 | |
171 #### Assembly mode | |
172 - *Standard* | |
173 - *Trio* When parental short reads are available, hifiasm can generate a pair of haplotype-resolved assemblies with trio binning. | |
174 | |
175 #### Trio Options | |
176 - *Haplotype 1 reads* : list of hap1/paternal read names | |
177 - *Haplotype 2 reads* : list of hap2/maternal read names | |
178 - *Lower bound of the binned k-mer's frequency* | |
179 - *Upper bound of the binned k-mer's frequency* | |
180 | |
181 *Bits for bloom filter* (-f) - A value of 0 disables the bloom filter for small genomes. For genomes much larger than human, applying -f 38 or even - f39 is preferred to save memory on k-mer counting. | |
182 | |
183 #### Advanced options | |
184 - *Length of adapters to be removed* Old HiFi reads may contain short adapter sequences at the ends of reads. You can specify 20 to trim both ends of reads by 20bp. | |
185 - *K-mer length* (must be <64) | |
186 - *Minimizer window size* | |
187 - *Drop K-mers* K-mers that occur more than this value multiplied by the coverage will be discarded | |
188 - *Maximum overlaps to consider* consider up to max(-D*coverage,-N) overlaps for each oriented read | |
189 - *Correction rounds* round of correction | |
190 | |
191 #### Assembly options | |
192 - *Cleaning rounds* round of assembly cleaning | |
193 - *Minimum contig bubble* size Pop contig graph bubbles smaller than this value | |
194 - *Minimum unitig bubble* size Pop unitig graph bubbles smaller than this value | |
195 - *Tip unitigs* Keep only tip unitigs with a number of reads greater than or equal to this value | |
196 - *Maximum overlap drop ratio* | |
197 - *Minimum overlap drop ratio* | |
198 - *Skip post join contigs step* disable post join contigs step which may improve N50 | |
199 - *Inconsistent contigs* If this option is set to a value greater than zero the tool generates an additional BED dataset containing regions that are >= x% inconsistent. | |
200 | |
201 #### Options for purging duplicates | |
202 - *Purge level* 0: no purging; 1: light; 2: aggressive [0 for trio; 2 for unzip] | |
203 - *Similarity threshold for duplicate haplotigs* | |
204 - *Minimum overlapped reads for duplicate haplotigs* | |
205 - *Coverage upper bound* If not set, this will be determined automatically | |
206 - *Experimental high-heterozygosity mode* enable this mode for high heterozygosity sample NB: May be unstable | |
207 | |
208 | |
209 ### Outputs | |
210 | |
211 Non Trio assembly | |
212 - Haplotype-resolved raw unitig graph in GFA format. This graph keeps all haplotype information, including somatic mutations and recurrent sequencing errors. | |
213 - Haplotype-resolved processed unitig graph without small bubbles : Small bubbles might be caused by somatic mutations or noise in data, which are not the real haplotype information. | |
214 - Primary assembly contig graph : This graph collapses different haplotypes. | |
215 - Alternate assembly contig graph : This graph consists of all assemblies that are discarded in primary contig graph. | |
216 | |
217 | |
218 Trio assembly | |
219 - Haplotype-resolved raw unitig graph in GFA format . This graph keeps all haplotype information. | |
220 - Phased paternal/haplotype1 contig graph. This graph keeps the phased paternal/haplotype1 assembly. | |
221 - Phased maternal/haplotype2 contig graph. This graph keeps the phased maternal/haplotype2 assembly. | |
222 | |
223 | |
224 | |
225 ]]></help> | |
226 <citations> | |
227 <citation type="doi">10.1038/s41592-020-01056-5</citation> | |
228 </citations> | |
229 </tool> |