comparison hifiasm.xml @ 12:da9d8bf98802 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/hifiasm commit 2bb01c64e79df856fbcb12afde62f7c14a5f59fa
author bgruening
date Fri, 24 Feb 2023 17:34:21 +0000
parents cd7936c5a9a5
children ec9e21e9c71b
comparison
equal deleted inserted replaced
11:cd7936c5a9a5 12:da9d8bf98802
1 <tool id="hifiasm" name="Hifiasm" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> 1 <tool id="hifiasm" name="Hifiasm" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
2 <description>haplotype-resolved de novo assembler for PacBio Hifi reads</description> 2 <description>haplotype-resolved de novo assembler for PacBio Hifi reads</description>
3 <macros> 3 <macros>
4 <token name="@TOOL_VERSION@">0.18.8</token> 4 <token name="@TOOL_VERSION@">0.18.8</token>
5 <token name="@VERSION_SUFFIX@">0</token> 5 <token name="@VERSION_SUFFIX@">1</token>
6 <token name="@FORMATS@">fasta,fasta.gz,fastq,fastq.gz</token> 6 <token name="@FORMATS@">fasta,fasta.gz,fastq,fastq.gz</token>
7 <xml name="reads"> 7 <xml name="reads">
8 <param name="reads" type="data" format="@FORMATS@" multiple="true" label="Input reads" /> 8 <param name="reads" type="data" format="@FORMATS@" multiple="true" label="Input reads" />
9 </xml> 9 </xml>
10 </macros> 10 </macros>
148 --primary 148 --primary
149 $input_filenames 149 $input_filenames
150 #if $log_out: 150 #if $log_out:
151 2> output.log 151 2> output.log
152 #end if 152 #end if
153 && mkdir noseq_files && mv *.noseq.gfa noseq_files 153
154 && mkdir noseq_files && mv *.noseq.gfa noseq_files
155
156 #if $bins_out:
157 && mkdir bin_files && mv *.bin bin_files
158 #end if
154 ]]> 159 ]]>
155 </command> 160 </command>
156 <inputs> 161 <inputs>
157 <conditional name="mode"> 162 <conditional name="mode">
158 <param name="mode_selector" type="select" label="Assembly mode"> 163 <param name="mode_selector" type="select" label="Assembly mode">
266 </sanitizer> 271 </sanitizer>
267 <validator type="regex">[0-9kKmMGg]+</validator> 272 <validator type="regex">[0-9kKmMGg]+</validator>
268 </param> 273 </param>
269 </when> 274 </when>
270 </conditional> 275 </conditional>
271 <param name="log_out" type="boolean" label="Output log file?" truevalue="yes" falsevalue="no"/> 276 <param name="log_out" type="boolean" label="Output log file?" truevalue="yes" falsevalue="no" />
277 <param name="bins_out" type="boolean" label="Output .bin files (used for development and debugging)?" truevalue="yes" falsevalue="no" />
272 </inputs> 278 </inputs>
273 <outputs> 279 <outputs>
274 <!--Standard mode--> 280 <!--Standard mode-->
275 <data name="raw_unitigs" format="gfa1" from_work_dir="output.r_utg.gfa" label="${tool.name} on ${on_string}: haplotype-resolved raw unitig graph for pseudohaplotype assembly"> 281 <data name="raw_unitigs" format="gfa1" from_work_dir="output.r_utg.gfa" label="${tool.name} on ${on_string}: haplotype-resolved raw unitig graph for pseudohaplotype assembly">
276 <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'blank'</filter> 282 <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'blank'</filter>
318 <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'set'</filter> 324 <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'set'</filter>
319 </data> 325 </data>
320 <data name="hap2_contigs_hic" format="gfa1" from_work_dir="output.hic.bench.p_utg.gfa" label="${tool.name} on ${on_string}: processsed unitig graph"> 326 <data name="hap2_contigs_hic" format="gfa1" from_work_dir="output.hic.bench.p_utg.gfa" label="${tool.name} on ${on_string}: processsed unitig graph">
321 <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'set'</filter> 327 <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'set'</filter>
322 </data> 328 </data>
323 <!--Log output--> 329 <!--Log, noseq, and bin output-->
324 <data name="log_file" format="txt" from_work_dir="output.log" label="${tool.name} ${on_string}: log file"> 330 <data name="log_file" format="txt" from_work_dir="output.log" label="${tool.name} ${on_string}: log file">
325 <filter>log_out</filter> 331 <filter>log_out</filter>
326 </data> 332 </data>
327 <collection name="noseq_files" type="list" label="${tool.name} on ${on_string}: noseq files"> 333 <collection name="noseq_files" type="list" label="${tool.name} on ${on_string}: noseq files">
328 <discover_datasets pattern="__name_and_ext__" format="gfa1" directory="noseq_files" /> 334 <discover_datasets pattern="__name_and_ext__" format="gfa1" directory="noseq_files" />
335 </collection>
336 <collection name="bin_files" type="list" label="${tool.name} on ${on_string}: bin files">
337 <filter>bins_out</filter>
338 <discover_datasets pattern="__name_and_ext__" format="gfa1" directory="bin_files" />
329 </collection> 339 </collection>
330 </outputs> 340 </outputs>
331 <tests> 341 <tests>
332 <!-- TEST 1 --> 342 <!-- TEST 1 -->
333 <test expect_num_outputs="5"> 343 <test expect_num_outputs="5">
494 </test> 504 </test>
495 <!-- TEST 12: test nanopore input --> 505 <!-- TEST 12: test nanopore input -->
496 <test expect_num_outputs="5"> 506 <test expect_num_outputs="5">
497 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> 507 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
498 <param name="mode_selector" value="standard" /> 508 <param name="mode_selector" value="standard" />
499 <param name="filter_bits" value="0" /> 509 <param name="filter_bits" value="0" />
500 <conditional name="ont_integration"> 510 <conditional name="ont_integration">
501 <param name="ont_integration_selector" value="set" /> 511 <param name="ont_integration_selector" value="set" />
502 <param name="ul" value="nanopore.fasta.gz" /> 512 <param name="ul" value="nanopore.fasta.gz" />
503 <param name="ul_tip" value="1" /> 513 <param name="ul_tip" value="1" />
504 </conditional> 514 </conditional>
507 </test> 517 </test>
508 <!-- TEST 13: test multi-file nanopore input --> 518 <!-- TEST 13: test multi-file nanopore input -->
509 <test expect_num_outputs="6"> 519 <test expect_num_outputs="6">
510 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> 520 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
511 <param name="mode_selector" value="standard" /> 521 <param name="mode_selector" value="standard" />
512 <param name="filter_bits" value="0" /> 522 <param name="filter_bits" value="0" />
513 <param name="log_out" value="yes" /> 523 <param name="log_out" value="yes" />
514 <conditional name="ont_integration"> 524 <conditional name="ont_integration">
515 <param name="ont_integration_selector" value="set" /> 525 <param name="ont_integration_selector" value="set" />
516 <param name="ul" value="nanopore.fasta.gz,nanopore.fasta.gz" /> 526 <param name="ul" value="nanopore.fasta.gz,nanopore.fasta.gz" />
517 <param name="ul_tip" value="1" /> 527 <param name="ul_tip" value="1" />
520 <assert_contents> 530 <assert_contents>
521 <has_text text="--ul ./ultralong/input_0.fasta.gz,./ultralong/input_1.fasta.gz"/> 531 <has_text text="--ul ./ultralong/input_0.fasta.gz,./ultralong/input_1.fasta.gz"/>
522 </assert_contents> 532 </assert_contents>
523 </output> 533 </output>
524 </test> 534 </test>
535 <!-- TEST 14: test bin files -->
536 <test expect_num_outputs="6">
537 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
538 <param name="filter_bits" value="0" />
539 <param name="mode_selector" value="standard" />
540 <param name="bins_out" value="yes" />
541 <output_collection name="bin_files" type="list" count="3" />
542 </test>
525 </tests> 543 </tests>
526 <help><![CDATA[ 544 <help><![CDATA[
527 .. class:: infomark 545 .. class:: infomark
528 546
529 **HiFiASM - a fast de novo assembler** 547 **HiFiASM - a fast de novo assembler**
530 548
531 549
532 Hifiasm is a fast haplotype-resolved de novo assembler for PacBio Hifi reads. It can assemble a human genome in several hours and works with the California redwood genome, one of the most complex genomes sequenced so far. Hifiasm can produce primary/alternate assemblies of quality competitive with the best assemblers. It also introduces a new graph binning algorithm and achieves the best haplotype-resolved assembly given trio data. 550 Hifiasm is a fast haplotype-resolved *de novo* assembler for PacBio Hifi reads. It can assemble a human genome in several hours and works with the California redwood genome, one of the most complex genomes sequenced so far. Hifiasm can produce primary/alternate assemblies of quality competitive with the best assemblers. It also introduces a new graph binning algorithm and achieves the best haplotype-resolved assembly given trio data.
533 551
534 ---- 552 ----
535 553
536 .. class:: infomark 554 .. class:: infomark
537 555
538 **Assembly mode** 556 **Assembly mode**
539 557
540 - *Standard* 558 - *Standard*: Standard assembly can be run in pseudohaplotype mode, or with Hi-C phasing using Hi-C reads from the same individual.
541 - *Trio* When parental short reads are available, hifiasm can generate a pair of haplotype-resolved assemblies with trio binning. 559 - *Trio*: When parental short reads are available, hifiasm can generate a pair of haplotype-resolved assemblies with trio binning.
542 560
543 ---- 561 ----
544 562
545 .. class:: infomark 563 .. class:: infomark
546 564
547 **Outputs** 565 **Outputs**
548 566
549 Non Trio assembly: 567 Non-Trio assembly:
550 568
551 - Haplotype-resolved raw unitig graph in GFA format. This graph keeps all haplotype information, including somatic mutations and recurrent sequencing errors. 569 - Haplotype-resolved raw unitig graph: This graph keeps all haplotype information, including somatic mutations and recurrent sequencing errors.
552 - Haplotype-resolved processed unitig graph without small bubbles : Small bubbles might be caused by somatic mutations or noise in data, which are not the real haplotype information. 570 - Haplotype-resolved processed unitig graph without small bubbles: This graph 'pops' small bubbles in the raw unitig graph; small bubbles might be caused by somatic mutations or noise in data, which are not the real haplotype information.
553 - Primary assembly contig graph : This graph collapses different haplotypes. 571 - Primary assembly contig graph: This graph includes a complete assembly with long stretches of phased blocks, though there may be some haplotype collapse.
554 - Alternate assembly contig graph : This graph consists of all assemblies that are discarded in primary contig graph. 572 - Alternate assembly contig graph: This graph consists of all contigs that are discarded from the primary contig graph.
573 - [hap1]/[hap2] contig graph: Each graph consists of phased contigs (output only with Hi-C phasing enabled).
555 574
556 575
557 Trio assembly: 576 Trio assembly:
558 577
559 - Haplotype-resolved raw unitig graph in GFA format . This graph keeps all haplotype information. 578 - Haplotype-resolved raw unitig graph in GFA format . This graph keeps all haplotype information.