comparison merqury.xml @ 0:9d79beb19ac3 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/merqury commit 561c3bcec49ecef9384b565861ccee11a377f7cd"
author iuc
date Sun, 18 Apr 2021 23:47:56 +0000
parents
children 39edec572bae
comparison
equal deleted inserted replaced
-1:000000000000 0:9d79beb19ac3
1 <tool id="merqury" name="Merqury" version="@TOOL_VERSION@" profile="20.01">
2 <description>evaluate the assembly quality</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="edam_ontology"/>
7 <expand macro="requirements"/>
8 <version_command>merqury --version</version_command>
9 <command detect_errors="exit_code"><![CDATA[
10 mkdir -p output_files
11 #if $mode.options == 'default'
12 && mkdir -p read-db.meryl
13 && tar -zxf ${mode.meryldb_F1} -C read-db.meryl --strip-components=1
14 #if $mode.assembly_options.number_assemblies == 'one'
15 #if $mode.assembly_options.assembly_01.ext.endswith(".gz")
16 && gunzip -c $mode.assembly_options.assembly_01 > assembly.fasta
17 #else
18 && ln -s '$mode.assembly_options.assembly_01' assembly.fasta
19 #end if
20 && merqury.sh read-db.meryl assembly.fasta ${label}
21 && mv ${label}* output_files
22 && find . -maxdepth 1 -name 'assembly_only*' -type f -print0 | xargs -0r mv -t output_files
23 #else
24 #if $mode.assembly_options.assembly_01.ext.endswith(".gz")
25 && gunzip -c $mode.assembly_options.assembly_01 > assembly_01.fasta
26 #else
27 && ln -s '$mode.assembly_options.assembly_01' assembly_01.fasta
28 #end if
29 #if $mode.assembly_options.assembly_02.ext.endswith(".gz")
30 && gunzip -c $mode.assembly_options.assembly_02 > assembly_02.fasta
31 #else
32 && ln -s '$mode.assembly_options.assembly_02' assembly_02.fasta
33 #end if
34 && merqury.sh read-db.meryl assembly_01.fasta assembly_02.fasta ${label}
35 && mv ${label}* output_files
36 && find . -maxdepth 1 -name 'assembly_only*' -type f -print0 | xargs -0r mv -t output_files
37 #end if
38 #else
39 && mkdir -p read-db.meryl pat.meryl mat.meryl
40 && tar -zxf ${mode.meryldb_F1} -C read-db.meryl --strip-components=1
41 && tar -zxf ${mode.meryldb_PAT} -C pat.meryl --strip-components=1
42 && tar -zxf ${mode.meryldb_MAT} -C mat.meryl --strip-components=1
43 #if $mode.assembly_options.number_assemblies == 'one'
44 #if $mode.assembly_options.assembly_01.ext.endswith(".gz")
45 && gunzip -c $mode.assembly_options.assembly_01 > assembly.fasta
46 #else
47 && ln -s '$mode.assembly_options.assembly_01' assembly.fasta
48 #end if
49 && merqury.sh read-db.meryl pat.meryl mat.meryl assembly.fasta ${label}
50 && mv ${label}* output_files
51 && find . -maxdepth 1 -name 'assembly_only*' -type f -print0 | xargs -0r mv -t output_files
52 #else
53 #if $mode.assembly_options.assembly_01.ext.endswith(".gz")
54 && gunzip -c $mode.assembly_options.assembly_01 > assembly_01.fasta
55 #else
56 && ln -s '$mode.assembly_options.assembly_01' assembly_01.fasta
57 #end if
58 #if $mode.assembly_options.assembly_02.ext.endswith(".gz")
59 && gunzip -c $mode.assembly_options.assembly_02 > assembly_02.fasta
60 #else
61 && ln -s '$mode.assembly_options.assembly_02' assembly_02.fasta
62 #end if
63 && merqury.sh read-db.meryl pat.meryl mat.meryl assembly_01.fasta assembly_02.fasta ${label}
64 && mv ${label}* output_files
65 && find . -maxdepth 1 -name 'assembly_only*' -type f -print0 | xargs -0r mv -t output_files
66 #end if
67 #end if
68 ]]> </command>
69 <inputs>
70 <conditional name="mode">
71 <param name="options" type="select"
72 label="Evaluation mode" help="Merqury allows two operational modes: default and trio mode.">
73 <option value="default">Default mode</option>
74 <option value="trio">Trio mode</option>
75 </param>
76 <when value="default">
77 <param name="meryldb_F1" type="data" format="meryldb"
78 label="K-mer counts database"
79 help="Select the k-mer counts of the read set."/>
80 <conditional name="assembly_options">
81 <param name="number_assemblies" type="select"
82 label="Number of assemblies" help="Select the number of assemblies.">
83 <option value="one">One assembly (pseudo-haplotype or mixed-haplotype)</option>
84 <option value="two">Two assemblies (diploid)</option>
85 </param>
86 <when value="one">
87 <param name="assembly_01" type="data" format="fasta,fasta.gz"
88 label="Genome assembly"
89 help="Select the assembled genome (FASTA)."/>
90 </when>
91 <when value="two">
92 <param name="assembly_01" type="data" format="fasta,fasta.gz"
93 label="First genome assembly"
94 help="Select the first assembled genome (FASTA)."/>
95 <param name="assembly_02" type="data" format="fasta,fasta.gz"
96 label="Second genome assembly"
97 help="Select the second assembled genome (FASTA)."/>
98 </when>
99 </conditional>
100 </when>
101 <when value='trio'>
102 <param name="meryldb_F1" type="data" format="meryldb"
103 label="F1 k-mer counts"
104 help="Select the k-mer counts of the read set (meryldb )."/>
105 <param name="meryldb_PAT" type="data" format="meryldb"
106 label="Paternal hap-mer database"
107 help="K-mer counts of the paternal haplotype (meryldb)."/>
108 <param name="meryldb_MAT" type="data" format="meryldb"
109 label="Maternal hap-mer database"
110 help="K-mer counts of the maternal haplotype (meryldb)."/>
111 <conditional name="assembly_options">
112 <param name="number_assemblies" type="select"
113 label="Number of assemblies" help="Select the number of assemblies.">
114 <option value="one">One assembly (pseudo-haplotype or mixed-haplotype)</option>
115 <option value="two">Two assemblies (diploid)</option>
116 </param>
117 <when value="one">
118 <param name="assembly_01" type="data" format="fasta,fasta.gz"
119 label="Genome assembly"
120 help="Select the assembled genome (FASTA)."/>
121 </when>
122 <when value="two">
123 <param name="assembly_01" type="data" format="fasta,fasta.gz"
124 label="First genome assembly"
125 help="Select the first assembled genome (FASTA)."/>
126 <param name="assembly_02" type="data" format="fasta,fasta.gz"
127 label="Second genome assembly"
128 help="Select the second assembled genome (FASTA)."/>
129 </when>
130 </conditional>
131 </when>
132 </conditional>
133 <param name='label' type="text" value="output_merqury" label="Identification label" help="Assign an identification label.">
134 <sanitizer invalid_char="">
135 <valid initial="string.letters,string.digits">
136 <add value="_" />
137 </valid>
138 </sanitizer>
139 </param>
140 </inputs>
141 <outputs>
142 <collection name="bed_files" type="list" label="${tool.name} on ${on_string}: bed">
143 <discover_datasets pattern="(?P&lt;name&gt;.+)\.bed" format="bed" directory="output_files"/>
144 </collection>
145 <collection name="wig_files" type="list" label="${tool.name} on ${on_string}: wig">
146 <discover_datasets pattern="(?P&lt;name&gt;.+)\.wig" format="wig" directory="output_files"/>
147 </collection>
148 <collection name="qv_files" type="list" label="${tool.name} on ${on_string}: qv">
149 <discover_datasets pattern="(?P&lt;name&gt;.+)\.qv" format="tabular" directory="output_files"/>
150 </collection>
151 <collection name="png_files" type="list" label="${tool.name} on ${on_string}: png">
152 <discover_datasets pattern="(?P&lt;name&gt;.+)\.png" format="png" directory="output_files"/>
153 </collection>
154 <collection name="sizes_files" type="list" label="${tool.name} on ${on_string}: size files">
155 <discover_datasets pattern="(?P&lt;name&gt;.+)\.sizes" format="tabular" directory="output_files"/>
156 </collection>
157 <collection name="stats_files" type="list" label="${tool.name} on ${on_string}: stats">
158 <discover_datasets pattern="(?P&lt;name&gt;.+)\.stats" format="tabular" directory="output_files"/>
159 </collection>
160 </outputs>
161 <tests>
162 <test>
163 <conditional name="mode">
164 <param name="options" value="trio"/>
165 <param name="meryldb_F1" value="child.meryldb" ftype="meryldb"/>
166 <param name="meryldb_PAT" value="pat.meryldb" ftype="meryldb"/>
167 <param name="meryldb_MAT" value="mat.meryldb" ftype="meryldb"/>
168 <conditional name="assembly_options">
169 <param name="number_assemblies" value="one"/>
170 <param name="assembly_01" value="assembly.fasta.gz" ftype="fasta.gz"/>
171 </conditional>
172 </conditional>
173 <param name="label" value="output_01"/>
174 <output_collection name="png_files" type="list" count="3">
175 <element name="output_01.assembly.mat.spectra-cn.ln" file="output_01.assembly.spectra-cn.ln.png" ftype="png">
176 <assert_contents>
177 <has_size value="2733" delta="100"/>
178 </assert_contents>
179 </element>
180 <element name="output_01.assembly.pat.spectra-cn.ln" file="output_01.assembly.spectra-cn.ln.png" ftype="png">
181 <assert_contents>
182 <has_size value="2733" delta="100"/>
183 </assert_contents>
184 </element>
185 <element name="output_01.hapmers.blob" file="output_01.hapmers.blob.png" ftype="png">
186 <assert_contents>
187 <has_size value="10764" delta="100"/>
188 </assert_contents>
189 </element>
190 </output_collection>
191 <output_collection name="stats_files" type="list" count="2">
192 <element name="output_01.assembly.100_20000.phased_block" ftype="tabular"/>
193 <element name="output_01.completeness" ftype="tabular"/>
194 </output_collection>
195 <output_collection name="wig_files" type="list" count="2">
196 <element name="output_01.assembly.pat" ftype="wig"/>
197 <element name="output_01.assembly.mat" file="output_01.assembly.mat.wig" ftype="wig"/>
198 </output_collection>
199 <output_collection name="sizes_files" type="list" count="2">
200 <element name="output_01.assembly.100_20000.phased_block" ftype="tabular"/>
201 <element name="output_01.assembly.contig" file="output_01.assembly.contig.tabular" ftype="tabular"/>
202 </output_collection>
203 <output_collection name="bed_files" type="list" count="3">
204 <element name="output_01.assembly.100_20000.phased_block" ftype="bed"/>
205 <element name="output_01.assembly.100_20000.switch" ftype="bed"/>
206 <element name="output_01.assembly.sort"/>
207 </output_collection>
208 </test>
209 <test>
210 <conditional name="mode">
211 <param name="options" value="default"/>
212 <param name="meryldb_F1" value="read-db.meryldb" ftype="meryldb"/>
213 <conditional name="assembly_options">
214 <param name="number_assemblies" value="one"/>
215 <param name="assembly_01" value="assembly.fasta.gz" ftype="fasta.gz"/>
216 </conditional>
217 </conditional>
218 <param name="label" value="output_02"/>
219 <output_collection name="png_files" type="list" count="6">
220 <element name="output_02.assembly.spectra-cn.fl" file="output_02.assembly.spectra-cn.fl.png" ftype="png">
221 <assert_contents>
222 <has_size value="58059" delta="100"/>
223 </assert_contents>
224 </element>
225 <element name="output_02.assembly.spectra-cn.ln" file="output_02.assembly.spectra-cn.ln.png" ftype="png">
226 <assert_contents>
227 <has_size value="57763" delta="100"/>
228 </assert_contents>
229 </element>
230 <element name="output_02.assembly.spectra-cn.st" file="output_02.assembly.spectra-cn.st.png" ftype="png">
231 <assert_contents>
232 <has_size value="61814" delta="100"/>
233 </assert_contents>
234 </element>
235 <element name="output_02.spectra-asm.fl" file="output_02.spectra-asm.fl.png" ftype="png">
236 <assert_contents>
237 <has_size value="64445" delta="100"/>
238 </assert_contents>
239 </element>
240 <element name="output_02.spectra-asm.ln" file="output_02.spectra-asm.ln.png" ftype="png">
241 <assert_contents>
242 <has_size value="64795" delta="100"/>
243 </assert_contents>
244 </element>
245 <element name="output_02.spectra-asm.st" file="output_02.spectra-asm.st.png" ftype="png">
246 <assert_contents>
247 <has_size value="66317" delta="100"/>
248 </assert_contents>
249 </element>
250 </output_collection>
251 <output_collection name="stats_files" type="list" count="1">
252 <element name="output_02.completeness" file="output_02.completeness.stats.tabular" ftype="tabular"/>
253 </output_collection>
254 <output_collection name="qv_files" type="list" count="2">
255 <element name="output_02.assembly" ftype="tabular"/>
256 <element name="output_02" file="output_02_qv.tabular" ftype="tabular"/>
257 </output_collection>
258 <output_collection name="wig_files" type="list" count="1">
259 <element name="assembly_only" ftype="wig">
260 <assert_contents>
261 <has_size value="29909811" delta="100"/>
262 </assert_contents>
263 </element>
264 </output_collection>
265 <output_collection name="bed_files" type="list" count="1">
266 <element name="assembly_only" ftype="bed">
267 <assert_contents>
268 <has_size value="73516121" delta="100"/>
269 </assert_contents>
270 </element>
271 </output_collection>
272 </test>
273 <test>
274 <conditional name="mode">
275 <param name="options" value="default"/>
276 <param name="meryldb_F1" value="read-db.meryldb" ftype="meryldb"/>
277 <conditional name="assembly_options">
278 <param name="number_assemblies" value="two"/>
279 <param name="assembly_01" value="assembly.fasta.gz" ftype="fasta.gz"/>
280 <param name="assembly_02" value="assembly_02.fasta.gz" ftype="fasta.gz"/>
281 </conditional>
282 </conditional>
283 <param name="label" value="output_03"/>
284 <output_collection name="png_files" type="list" count="10">
285 <element name="output_03.assembly_01.spectra-cn.fl" file="output_03.assembly_01.spectra-cn.fl.png" ftype="png">
286 <assert_contents>
287 <has_size value="58059" delta="100"/>
288 </assert_contents>
289 </element>
290 <element name="output_03.assembly_01.spectra-cn.ln" file="output_03.assembly_01.spectra-cn.ln.png" ftype="png">
291 <assert_contents>
292 <has_size value="57763" delta="100"/>
293 </assert_contents>
294 </element>
295 <element name="output_03.assembly_01.spectra-cn.st" file="output_03.assembly_01.spectra-cn.st.png" ftype="png">
296 <assert_contents>
297 <has_size value="61814" delta="100"/>
298 </assert_contents>
299 </element>
300 <element name="output_03.assembly_02.spectra-cn.fl" file="output_03.assembly_02.spectra-cn.fl.png" ftype="png">
301 <assert_contents>
302 <has_size value="61579" delta="100"/>
303 </assert_contents>
304 </element>
305 <element name="output_03.assembly_02.spectra-cn.ln" file="output_03.assembly_02.spectra-cn.ln.png" ftype="png">
306 <assert_contents>
307 <has_size value="61538" delta="100"/>
308 </assert_contents>
309 </element>
310 <element name="output_03.assembly_02.spectra-cn.st" file="output_03.assembly_02.spectra-cn.st.png" ftype="png">
311 <assert_contents>
312 <has_size value="67297" delta="100"/>
313 </assert_contents>
314 </element>
315 <element name="output_03.spectra-asm.ln" file="output_03.spectra-asm.ln.png" ftype="png">
316 <assert_contents>
317 <has_size value="2733" delta="100"/>
318 </assert_contents>
319 </element>
320 <element name="output_03.spectra-cn.fl" file="output_03.spectra-cn.fl.png" ftype="png">
321 <assert_contents>
322 <has_size value="61397" delta="100"/>
323 </assert_contents>
324 </element>
325 <element name="output_03.spectra-cn.ln" file="output_03.spectra-cn.ln.png" ftype="png">
326 <assert_contents>
327 <has_size value="61050" delta="100"/>
328 </assert_contents>
329 </element>
330 <element name="output_03.spectra-cn.st" file="output_03.spectra-cn.st.png" ftype="png">
331 <assert_contents>
332 <has_size value="65243" delta="100"/>
333 </assert_contents>
334 </element>
335 </output_collection>
336 <output_collection name="stats_files" type="list" count="1">
337 <element name="output_03.completeness" file="output_03.completeness.stats.tabular" ftype="tabular"/>
338 </output_collection>
339 <output_collection name="qv_files" type="list" count="3">
340 <element name="output_03.assembly_01" ftype="tabular"/>
341 <element name="output_03.assembly_02" ftype="tabular"/>
342 <element name="output_03" file="output_03_qv.tabular" ftype="tabular"/>
343 </output_collection>
344 </test>
345 <test>
346 <conditional name="mode">
347 <param name="options" value="trio"/>
348 <param name="meryldb_F1" value="child.meryldb" ftype="meryldb"/>
349 <param name="meryldb_PAT" value="pat.meryldb" ftype="meryldb"/>
350 <param name="meryldb_MAT" value="mat.meryldb" ftype="meryldb"/>
351 <conditional name="assembly_options">
352 <param name="number_assemblies" value="two"/>
353 <param name="assembly_01" value="assembly.fasta.gz" ftype="fasta.gz"/>
354 <param name="assembly_02" value="assembly_02.fasta.gz" ftype="fasta.gz"/>
355 </conditional>
356 </conditional>
357 <param name="label" value="output_04"/>
358 <output_collection name="png_files" type="list" count="5">
359 <element name="output_04.assembly_01.mat.spectra-cn.ln" file="output_04.assembly_01.mat.spectra-cn.ln.png" ftype="png">
360 <assert_contents>
361 <has_size value="2733" delta="100"/>
362 </assert_contents>
363 </element>
364 <element name="output_04.assembly_01.pat.spectra-cn.ln" file="output_04.assembly_01.pat.spectra-cn.ln.png" ftype="png">
365 <assert_contents>
366 <has_size value="2733" delta="100"/>
367 </assert_contents>
368 </element>
369 <element name="output_04.assembly_02.mat.spectra-cn.ln" file="output_04.assembly_02.mat.spectra-cn.ln.png" ftype="png">
370 <assert_contents>
371 <has_size value="2733" delta="100"/>
372 </assert_contents>
373 </element>
374 <element name="output_04.assembly_02.pat.spectra-cn.ln" file="output_04.assembly_02.pat.spectra-cn.ln.png" ftype="png">
375 <assert_contents>
376 <has_size value="2733" delta="100"/>
377 </assert_contents>
378 </element>
379 <element name="output_04.hapmers.blob" file="output_04.hapmers.blob.png" ftype="png">
380 <assert_contents>
381 <has_size value="10764" delta="100"/>
382 </assert_contents>
383 </element>
384 </output_collection>
385 <output_collection name="stats_files" type="list" count="3">
386 <element name="output_04.assembly_01.100_20000.phased_block" ftype="tabular"/>
387 <element name="output_04.assembly_02.100_20000.phased_block" file="output_04.assembly_02.100_20000.phased_block.stats.tabular" ftype="tabular"/>
388 <element name="output_04.completeness"/>
389 </output_collection>
390 <output_collection name="qv_files" type="list" count="1">
391 <element name="output_04" file="output_04_qv.tabular" ftype="tabular"/>
392 </output_collection>
393 <output_collection name="wig_files" type="list" count="4">
394 <element name="output_04.assembly_01.pat" ftype="wig"/>
395 <element name="output_04.assembly_01.mat" ftype="wig"/>
396 <element name="output_04.assembly_02.pat" ftype="wig"/>
397 <element name="output_04.assembly_02.mat" ftype="wig"/>
398 </output_collection>
399 <output_collection name="sizes_files" type="list" count="4">
400 <element name="output_04.assembly_01.100_20000.phased_block"/>
401 <element name="output_04.assembly_01.contig" file="output_04.assembly_01.contig.tabular" ftype="tabular"/>
402 <element name="output_04.assembly_02.100_20000.phased_block"/>
403 <element name="output_04.assembly_02.contig" file="output_04.assembly_02.contig.tabular" ftype="tabular"/>
404 </output_collection>
405 <output_collection name="bed_files" type="list" count="6">
406 <element name="output_04.assembly_01.100_20000.phased_block" ftype="bed"/>
407 <element name="output_04.assembly_01.100_20000.switch" file="output_04.assembly_01.100_20000.switch.bed" ftype="bed"/>
408 <element name="output_04.assembly_01.sort" file="output_04.assembly_01.sort.bed" ftype="bed"/>
409 <element name="output_04.assembly_02.100_20000.phased_block" ftype="bed"/>
410 <element name="output_04.assembly_02.100_20000.switch" ftype="bed"/>
411 <element name="output_04.assembly_02.sort" ftype="bed"/>
412 </output_collection>
413 </test>
414 </tests>
415 <help><![CDATA[
416 .. class:: infomark
417
418 **Purpose**
419
420 Merqury allows a reference-free assembly evaluation based on efficient k-mer set operations. By comparing k-mers in a de novo
421 assembly to those found in unassembled high-accuracy reads, Merqury estimates base-level accuracy and completeness.
422
423 For trios, Merqury can also evaluate haplotype-specific accuracy, completeness, phase block continuity, and switch errors.
424
425 ----
426
427 .. class:: infomark
428
429 **Input**
430
431 Merqury requires two types of inputs: meryldbs and the genome assemblies.
432 There is no need to run merqury per-assemblies. If two assemblies areprovided, Merqury generates stats for each and combined.
433
434 ----
435
436 .. class:: infomark
437
438 **Output**
439
440 The generated metrics include consensus quality and k-mer completeness, and when parental genomic sequences are available (either assembled or unassembled), Merqury can output haplotype completeness, phase block statistics, switch error rates, and visual representations of phase consistency for the child’s genome. This includes TDF (or BED) features that can be displayed in a genome browser for visualizing the presence of k-mer classes across a genome (e.g., the k-mers inherited from a parental genome).
441
442 ]]> </help>
443 <expand macro="citations"/>
444 </tool>