Mercurial > repos > bgruening > hifiasm
view hifiasm.xml @ 20:5161f204ba63 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/hifiasm commit a6a1392294b6c3f57fa6080630bc35e35417054a
author | iuc |
---|---|
date | Wed, 23 Oct 2024 13:21:47 +0000 |
parents | 284a1cb82b61 |
children |
line wrap: on
line source
<tool id="hifiasm" name="Hifiasm" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> <description>haplotype-resolved de novo assembler for PacBio Hifi reads</description> <macros> <token name="@TOOL_VERSION@">0.20.0</token> <token name="@VERSION_SUFFIX@">0</token> <token name="@FORMATS@">fasta,fasta.gz,fastq,fastq.gz</token> <xml name="reads"> <param name="reads" type="data" format="@FORMATS@" multiple="true" label="Input reads"/> </xml> </macros> <requirements> <requirement type="package" version="@TOOL_VERSION@">hifiasm</requirement> <requirement type="package" version="0.1">yak</requirement> </requirements> <version_command>hifiasm --version</version_command> <command detect_errors="exit_code"> <![CDATA[ #set $input_files = list() #set $hap1_inputs = list() #set $hap2_inputs = list() #set $hic1_inputs = list() #set $hic2_inputs = list() #set $ultralong_inputs = list() #for idx, read in enumerate($mode.reads): #set $inputfile = 'input_%d.%s' % ($idx, $read.dataset.extension) ln -s '$read' $inputfile && $input_files.append($inputfile) #end for #set $input_filenames = ' '.join($input_files) #if str($hic_partition.hic_partition_selector) == 'set' mkdir HiCF HiCR && #for idx, read in enumerate($hic_partition.h1): #set $inputfile = './HiCF/input_%d.%s' % ($idx, $read.dataset.extension) ln -s '$read' $inputfile && $hic1_inputs.append($inputfile) #end for #for idx, read in enumerate($hic_partition.h2): #set $inputfile = './HiCR/input_%d.%s' % ($idx, $read.dataset.extension) ln -s '$read' $inputfile && $hic2_inputs.append($inputfile) #end for #end if #if str($ont_integration.ont_integration_selector) == 'set' mkdir ultralong && #for idx, read in enumerate($ont_integration.ul): #set $inputfile = './ultralong/input_%d.%s' % ($idx, $read.dataset.extension) ln -s '$read' $inputfile && $ultralong_inputs.append($inputfile) #end for #end if #if str($mode.mode_selector) == 'trio': #if str($mode.trioinput.trio_input_selector) == 'reads': #for idx, read in enumerate($mode.trioinput.hap1_reads): #set $inputfile = 'hap1_input_%d.%s' % ($idx, $read.dataset.extension) ln -s '$read' $inputfile && $hap1_inputs.append($inputfile) #end for #for idx, read in enumerate($mode.trioinput.hap2_reads): #set $inputfile = 'hap2_input_%d.%s' % ($idx, $read.dataset.extension) ln -s '$read' $inputfile && $hap2_inputs.append($inputfile) #end for #set $hap1_filenames = ' '.join($hap1_inputs) #set $hap2_filenames = ' '.join($hap2_inputs) yak count -k$mode.yak_kmer_length -b$filter_bits -t\${GALAXY_SLOTS:-1} -o hap1.yak $hap1_filenames && yak count -k$mode.yak_kmer_length -b$filter_bits -t\${GALAXY_SLOTS:-1} -o hap2.yak $hap2_filenames && #end if #if str($mode.trioinput.trio_input_selector) == 'lists': #set $hap1_filenames = $mode.trioinput.hap1_list #set $hap2_filenames = $mode.trioinput.hap2_list #end if #end if hifiasm -t \${GALAXY_SLOTS:-1} -o output -f $filter_bits #if str($advanced_options.advanced_selector) == 'set': -k $advanced_options.hifiasm_kmer_length -w $advanced_options.window_size -D $advanced_options.drop_kmers -N $advanced_options.max_overlaps -r $advanced_options.correction_rounds #if $advanced_options.min_hist_cnt: --min-hist-cnt $advanced_options.min_hist_cnt #end if --max-kocc $advanced_options.max_kocc #if $advanced_options.hg_size --hg-size $advanced_options.hg_size #end if #end if #if str($assembly_options.assembly_selector) == 'set': -a $assembly_options.cleaning_rounds -z $assembly_options.adapter_length -m $assembly_options.pop_contigs -p $assembly_options.pop_unitigs -n $assembly_options.remove_tips -x $assembly_options.max_overlap -y $assembly_options.min_overlap $assembly_options.disable_post_join $assembly_options.ignore_error_corrected #if $assembly_options.hom_cov --hom-cov $assembly_options.hom_cov #end if #end if #if str($mode.mode_selector) == 'trio': #if str($mode.trioinput.trio_input_selector) == 'reads': -1 hap1.yak -2 hap2.yak #end if #if str($mode.trioinput.trio_input_selector) == 'lists': -3 $hap1_filenames -4 $hap2_filenames #end if -c $mode.max_kmers -d $mode.min_kmers $mode.trio_dual #end if #if str($purge_options.purge_selector) == 'set': -l $purge_options.purge_level -s $purge_options.similarity_threshold -O $purge_options.minimum_overlap #if $purge_options.purge_max: --purge-max $purge_options.purge_max #end if #if $purge_options.n_hap: --n-hap $purge_options.n_hap #end if #end if #if str($scaffolding_options.scaffold_selector) == 'set': --dual-scaf #if $scaffolding_options.scaf_gap: --scaf-gap $scaffolding_options.scaf_gap #end if #end if #if str($hic_partition.hic_partition_selector) == 'set': --h1 ${ ','.join(["'%s'" % $x for $x in $hic1_inputs]) } --h2 ${ ','.join(["'%s'" % $x for $x in $hic2_inputs]) } #if $hic_partition.seed: --seed $hic_partition.seed #end if #if $hic_partition.n_weight: --n-weight $hic_partition.n_weight #end if #if $hic_partition.n_perturb: --n-perturb $hic_partition.n_perturb #end if #if $hic_partition.f_perturb: --f-perturb $hic_partition.f_perturb #end if --l-msjoin $hic_partition.l_msjoin #end if #if str($ont_integration.ont_integration_selector) == 'set': --ul ${ ','.join(["'%s'" % $x for $x in $ultralong_inputs]) } #if $ont_integration.ul_rate: --ul-rate $ont_integration.ul_rate #end if #if $ont_integration.ul_tip: --ul-tip $ont_integration.ul_tip #end if #end if ## Changed the default outputs of hifiasm. Hifiasm outputs a primary assembly and two balanced haplotypes in default. Incorporated the option '--primary' to output primary assembly and alternate assembly. --primary $input_filenames #if $log_out: 2> output.log #end if && mkdir noseq_files && mv *.noseq.gfa noseq_files #if $bins_out: && mkdir bin_files && mv *.bin bin_files #end if ]]> </command> <inputs> <conditional name="mode"> <param name="mode_selector" type="select" label="Assembly mode"> <option value="standard">Standard</option> <option value="trio">Trio mode</option> </param> <when value="standard"> <expand macro="reads"/> </when> <when value="trio"> <expand macro="reads"/> <conditional name="trioinput"> <param name="trio_input_selector" type="select" label="What parental information are you using?"> <option value="reads">Parental reads (FASTQ files, gzipped or otherwise)</option> <option value="lists">Lists of reads assigned by parent (text files)</option> </param> <when value="reads"> <param name="hap1_reads" type="data" format="fastq,fastq.gz" multiple="true" label="Haplotype 1 reads"/> <param name="hap2_reads" type="data" format="fastq,fastq.gz" multiple="true" label="Haplotype 2 reads"/> </when> <when value="lists"> <param name="hap1_list" type="data" format="txt,tabular" label="Haplotype 1 read list"/> <param name="hap2_list" type="data" format="txt,tabular" label="Haplotype 2 read list"/> </when> </conditional> <param name="max_kmers" argument="-c" type="integer" value="2" label="Lower bound of the binned k-mer's frequency"/> <param name="min_kmers" argument="-d" type="integer" value="5" label="Upper bound of the binned k-mer's frequency"/> <param name="yak_kmer_length" type="integer" min="0" max="64" value="31" label="Yak counter k-mer length"/> <param argument="--trio-dual" type="boolean" truevalue="--trio-dual" falsevalue="" label="Utilize homology information to correct trio-phasing errors"/> </when> </conditional> <param name="filter_bits" argument="-f" type="integer" min="0" value="37" label="Bits for bloom filter" help="A value of 0 disables the bloom filter"/> <conditional name="assembly_options"> <param name="assembly_selector" type="select" label="Assembly options"> <option value="blank">Leave default</option> <option value="set">Specify</option> </param> <when value="blank"/> <when value="set"> <param name="cleaning_rounds" argument="-a" type="integer" value="4" label="Cleaning rounds"/> <param name="adapter_length" argument="-z" type="integer" min="0" value="0" label="Length of adapters to be removed"/> <param name="pop_contigs" argument="-m" type="integer" value="10000000" label="Minimum contig bubble size" help="Pop contig graph bubbles smaller than this value"/> <param name="pop_unitigs" argument="-p" type="integer" value="100000" label="Minimum unitig bubble size" help="Pop unitig graph bubbles smaller than this value"/> <param name="remove_tips" argument="-n" type="integer" value="3" label="Tip unitigs" help="Keep only tip unitigs with a number of reads greater than or equal to this value"/> <param name="max_overlap" argument="-x" type="float" min="0" max="1" value="0.8" label="Maximum overlap drop ratio" help="This option is used with -r. Given a node N in the assembly graph, let max(N) be the length of the largest overlap of N. Hifiasm iteratively drops overlaps of N if their length/max(N) are below a threshold controlled by -x. Hifiasm applies -r rounds of short overlap removal with an increasing threshold between -x and -y"/> <param name="min_overlap" argument="-y" type="float" min="0" max="1" value="0.2" label="Minimum overlap drop ratio" help="This option is used with -r. Given a node N in the assembly graph, let max(N) be the length of the largest overlap of N. Hifiasm iteratively drops overlaps of N if their length/max(N) are over a threshold controlled by -y. Hifiasm applies -r rounds of short overlap removal with an increasing threshold between -x and -y"/> <param name="disable_post_join" argument="-u" type="boolean" truevalue="-u" falsevalue="" label="Skip post join contigs step" help="May improve N50"/> <param name="ignore_error_corrected" argument="-i" type="boolean" truevalue="-i" falsevalue="" value="False" label="Ignore error corrected reads and overlaps" help="Ignore error corrected reads and overlaps saved in prefix.*.bin files. Apart from assembly graphs, hifiasm also outputs three binary files that save alloverlap information during assembly step. With these files, hifiasm can avoid the time-consuming all-to-all overlap calculation step, and do the assembly directly and quickly. This might be helpful when users want to get an optimized assembly by multiple rounds of experiments with different parameters."/> <param argument="--hom-cov" type="integer" optional="True" value="" label="Homozygous read coverage"/> </when> </conditional> <conditional name="purge_options"> <param name="purge_selector" type="select" label="Options for purging duplicates"> <option value="blank">Leave default</option> <option value="set">Specify</option> </param> <when value="blank"/> <when value="set"> <param name="purge_level" argument="-l" type="select" label="Purge level"> <option value="0" selected="true">None (0)</option> <option value="1">Light (1)</option> <option value="2">Aggressive (2)</option> <option value="3">Aggressive - high heterozygosity rate (3)</option> </param> <param name="similarity_threshold" argument="-s" type="float" min="0" max="1" value="0.75" label="Similarity threshold for duplicate haplotigs"/> <param name="minimum_overlap" argument="-O" type="integer" value="1" label="Minimum overlapped reads for duplicate haplotigs"/> <param argument="--purge-max" type="integer" optional="true" label="Coverage upper bound" help="If not set, this will be determined automatically"/> <param argument="--n-hap" type="integer" min="0" value="" optional="true" label="Assumtion of haplotype number" help="A haplotype is defined as the combination of alleles for different polymorphisms that occur on the same chromosome."/> </when> </conditional> <conditional name="hic_partition"> <param name="hic_partition_selector" type="select" label="Options for Hi-C-partition"> <option value="blank">Leave default</option> <option value="set">Specify</option> </param> <when value="blank"/> <when value="set"> <param argument="--h1" type="data" format="fastq,fastq.gz" multiple="true" label="Hi-C R1 reads"/> <param argument="--h2" type="data" format="fastq,fastq.gz" multiple="true" label="Hi-C R2 reads"/> <param argument="--seed" type="integer" min="1" value="" optional="true" label="RNG seed"/> <param argument="--n-weight" type="integer" min="1" value="" optional="true" label="Rounds of reweighting Hi-C links. Increasing this may improves phasing results but takes longer time"/> <param argument="--n-perturb" type="integer" min="1" value="" optional="true" label="Rounds of perturbation. Increasing this may improves phasing results but takes longer time"/> <param argument="--f-perturb" type="float" min="0" max="1" value="" optional="true" label="Fraction to flip for perturbation. Increasing this may improves phasing results but takes longer time"/> <param argument="--l-msjoin" type="integer" min="0" value="500000" label="Detect misjoined unitigs of greater than or equal to specified size" help="A value of 0 disables this filter"/> </when> </conditional> <conditional name="ont_integration"> <param name="ont_integration_selector" type="select" label="Options for ONT integration"> <option value="blank">Leave default</option> <option value="set">Specify</option> </param> <when value="blank"/> <when value="set"> <param argument="--ul" type="data" format="fastq,fastq.gz,fasta,fasta.gz" multiple="true" label="Ultra-long (>100kb) reads"/> <param argument="--ul-rate" type="float" value="0.2" optional="true" label="error rate of ultra-long reads (default 0.2)"/> <param argument="--ul-tip" type="integer" value="6" optional="true" label="remove tip unitigs composed of less than (INT) reads for the UL assembly (default 6)"/> </when> </conditional> <conditional name="advanced_options"> <param name="advanced_selector" type="select" label="Advanced options"> <option value="blank">Leave default</option> <option value="set">Specify</option> </param> <when value="blank"/> <when value="set"> <param name="hifiasm_kmer_length" argument="-k" type="integer" min="0" max="64" value="51" label="Hifiasm k-mer length"/> <param name="window_size" argument="-w" type="integer" min="0" value="51" label="Minimizer window size"/> <param name="drop_kmers" argument="-D" type="float" value="5.0" label="Drop k-mers" help="K-mers that occur more than this value multiplied by the coverage will be discarded"/> <param name="max_overlaps" argument="-N" type="integer" value="100" label="Maximum overlaps to consider" help="The software selects the larger of this value and the k-mer count multiplied by coverage"/> <param name="correction_rounds" argument="-r" type="integer" value="3" label="Correction rounds"/> <param argument="--min-hist-cnt" type="integer" min="0" value="" optional="true" label="Minimum count threshold" help="When analyzing the k-mer spectrum, ignore counts below this value"/> <param argument="--max-kocc" type="integer" min="0" value="20000" label="Maximum k-mer ocurrence" help="Employ k-mers occurring less than INT times to rescue repetitive overlaps"/> <param argument="--hg-size" type="text" value="" optional="true" label="Estimated haploid genome size" help="Estimated haploid genome size used for inferring read coverage. If not provided, this parameter will be infered by hifism. Common suffices are required, for example, 100m or 3g"> <sanitizer invalid_char=""> <valid initial="string.digits"> <add value="k"/> <add value="K"/> <add value="m"/> <add value="M"/> <add value="G"/> <add value="g"/> </valid> </sanitizer> <validator type="regex">[0-9kKmMGg]+</validator> </param> </when> </conditional> <conditional name="scaffolding_options"> <param name="scaffold_selector" type="select" label="Options for hifiasm scaffolding"> <option value="blank">No scaffolding (default)</option> <option value="set">Turn on hifiasm scaffolding</option> </param> <when value="blank"/> <when value="set"> <param argument="--scaf-gap" type="integer" min="1" value="3000000" optional="true" label="Max scaffolding gap size" help="Set the max gap size of scaffolds (default is 3,000,000)"/> </when> </conditional> <param name="log_out" type="boolean" label="Output log file?" truevalue="yes" falsevalue="no"/> <param name="bins_out" type="boolean" label="Output .bin files (used for development and debugging)?" truevalue="yes" falsevalue="no"/> </inputs> <outputs> <!--Standard mode--> <data name="raw_unitigs" format="gfa1" from_work_dir="output.r_utg.gfa" label="${tool.name} on ${on_string}: haplotype-resolved raw unitig graph for pseudohaplotype assembly"> <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'blank'</filter> </data> <data name="processed_unitigs" format="gfa1" from_work_dir="output.p_utg.gfa" label="${tool.name} on ${on_string}: processed unitig graph for pseudohaplotype assembly"> <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'blank'</filter> </data> <data name="primary_contig_graph" format="gfa1" from_work_dir="output.p_ctg.gfa" label="${tool.name} on ${on_string}: primary assembly contig graph for pseudohaplotype assembly"> <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'blank'</filter> </data> <data name="alternate_contig_graph" format="gfa1" from_work_dir="output.a_ctg.gfa" label="${tool.name} on ${on_string}: alternate assembly contig graph for pseudohaplotype assembly"> <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'blank'</filter> </data> <!--Trio outputs without Hi-c reads--> <data name="hap1_contigs" format="gfa1" from_work_dir="output.dip.hap1.p_ctg.gfa" label="${tool.name} on ${on_string}: hap1.p_ctg contig graph"> <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'blank'</filter> </data> <data name="hap2_contigs" format="gfa1" from_work_dir="output.dip.hap2.p_ctg.gfa" label="${tool.name} on ${on_string}: hap2.p_ctg contig graph"> <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'blank'</filter> </data> <data name="raw_unitigs_trio" format="gfa1" from_work_dir="output.dip.r_utg.gfa" label="${tool.name} on ${on_string}: haplotype-resolved raw unitig graph for HiC-phased assembly"> <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'blank'</filter> </data> <data name="processed_unitigs_trio" format="gfa1" from_work_dir="output.dip.p_utg.gfa" label="${tool.name} on ${on_string}: haplotype-resolved processed unitig graph for HiC-phased assembly"> <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'blank'</filter> </data> <!-- Stardand mode with Hi-C partition outputs --> <data name="hic_pcontig_graph" format="gfa1" from_work_dir="output.hic.p_ctg.gfa" label="${tool.name} ${on_string}: Hi-C primary contig graph"> <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'set'</filter> </data> <data name="hic_acontig_graph" format="gfa1" from_work_dir="output.hic.a_ctg.gfa" label="${tool.name} ${on_string}: Hi-C alternate contig graph"> <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'set'</filter> </data> <data name="hic_balanced_contig_hap1_graph" format="gfa1" from_work_dir="output.hic.hap1.p_ctg.gfa" label="${tool.name} ${on_string}: Hi-C hap1 balanced contig graph hap1"> <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'set'</filter> </data> <data name="hic_balanced_contig_hap2_graph" format="gfa1" from_work_dir="output.hic.hap2.p_ctg.gfa" label="${tool.name} ${on_string}: Hi-C hap2 balanced contig graph hap2"> <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'set'</filter> </data> <data name="hic_raw_initig" format="gfa1" from_work_dir="output.hic.r_utg.gfa" label="${tool.name} ${on_string}: Hi-C raw unitig"> <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'set'</filter> </data> <!--Trio outputs with Hi-c reads--> <data name="hap1_contigs_hic" format="gfa1" from_work_dir="output.hic.bench.r_utg.gfa" label="${tool.name} on ${on_string}: raw unitig graph"> <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'set'</filter> </data> <data name="hap2_contigs_hic" format="gfa1" from_work_dir="output.hic.bench.p_utg.gfa" label="${tool.name} on ${on_string}: processsed unitig graph"> <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'set'</filter> </data> <!--Log, noseq, and bin output--> <data name="log_file" format="txt" from_work_dir="output.log" label="${tool.name} ${on_string}: log file"> <filter>log_out</filter> </data> <collection name="noseq_files" type="list" label="${tool.name} on ${on_string}: noseq files"> <discover_datasets pattern="__name_and_ext__" format="gfa1" directory="noseq_files"/> </collection> <collection name="bin_files" type="list" label="${tool.name} on ${on_string}: bin files"> <filter>bins_out</filter> <discover_datasets pattern="__name_and_ext__" format="gfa1" directory="bin_files"/> </collection> </outputs> <tests> <!-- TEST 1 --> <test expect_num_outputs="5"> <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz"/> <param name="filter_bits" value="0"/> <param name="mode_selector" value="standard"/> <output name="raw_unitigs" file="hifiasm-out1-raw.gfa" ftype="gfa1"/> <output name="processed_unitigs" file="hifiasm-out1-processed.gfa" ftype="gfa1"/> <output name="primary_contig_graph" file="hifiasm-out1-primary.gfa" ftype="gfa1"/> </test> <!-- TEST 2 --> <test expect_num_outputs="5"> <param name="reads" value="hifiasm-in2-0.fa.gz,hifiasm-in2-1.fa.gz,hifiasm-in2-2.fa.gz,hifiasm-in2-3.fa.gz,hifiasm-in2-4.fa.gz" ftype="fasta.gz"/> <param name="filter_bits" value="0"/> <param name="mode_selector" value="standard"/> <output name="raw_unitigs" file="hifiasm-out2-raw.gfa" ftype="gfa1"/> <output name="processed_unitigs" file="hifiasm-out2-processed.gfa" ftype="gfa1"/> <output name="primary_contig_graph" file="hifiasm-out2-primary.gfa" ftype="gfa1"/> </test> <!-- TEST 3: Test logfile out--> <test expect_num_outputs="6"> <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz"/> <param name="filter_bits" value="0"/> <param name="mode_selector" value="standard"/> <param name="log_out" value="yes"/> <output name="raw_unitigs" file="hifiasm-out1-raw.gfa" ftype="gfa1"/> <output name="processed_unitigs" file="hifiasm-out1-processed.gfa" ftype="gfa1"/> <output name="primary_contig_graph" file="hifiasm-out1-primary.gfa" ftype="gfa1"/> <output name="log_file" ftype="txt"> <assert_contents> <has_text text="-o output -f 0 --primary input_0.fasta.gz"/> </assert_contents> </output> </test> <!--TEST 4: Test Hi-C reads--> <test expect_num_outputs="6"> <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz"/> <param name="filter_bits" value="0"/> <param name="mode_selector" value="standard"/> <conditional name="hic_partition"> <param name="hic_partition_selector" value="set"/> <param name="h1" value="hic_1.fastq.gz"/> <param name="h2" value="hic_2.fastq.gz"/> <param name="n_weight" value="1"/> <param name="n_perturb" value="1"/> <param name="l_perturb" value="0"/> <param name="l_msjoin" value="0"/> </conditional> <output name="hic_pcontig_graph" file="hifiasm-out-hifi-p.gfa" ftype="gfa1"/> <output name="hic_acontig_graph" file="hifiasm-out-hifi-a.gfa" ftype="gfa1"/> <output name="hic_balanced_contig_hap1_graph" ftype="gfa1"> <assert_contents> <has_text_matching expression="^S"/> <has_size value="83914" delta="500"/> </assert_contents> </output> <output name="hic_balanced_contig_hap1_graph" ftype="gfa1" > <assert_contents> <has_text_matching expression="^S"/> <has_size value="83914" delta="500"/> </assert_contents> </output> <output name="hic_raw_initig" ftype="gfa1" > <assert_contents> <has_text_matching expression="^S"/> <has_size value="83904" delta="500"/> </assert_contents> </output> </test> <!-- TEST 5: Test trio mode --> <test expect_num_outputs="5"> <param name="filter_bits" value="0"/> <conditional name="mode"> <param name="mode_selector" value="trio"/> <param name="trio_input_selector" value="reads"/> <conditional name="trioinput"> <param name="reads" value="child.fasta.gz"/> <param name="hap1_reads" value="paternal.fasta.gz"/> <param name="hap2_reads" value="maternal.fasta.gz"/> </conditional> <param name="max_kmers" value="2"/> <param name="min_kmers" value="5"/> </conditional> <assert_command> <has_text text="-1 hap1.yak"/> <has_text text="-2 hap2.yak"/> <has_text text="--primary"/> </assert_command> </test> <!-- TEST 6: Test ignore-error-corrected option --> <test expect_num_outputs="5"> <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz"/> <param name="filter_bits" value="0"/> <param name="mode_selector" value="standard"/> <conditional name="assembly_options"> <param name="assembly_selector" value="set"/> <param name="ignore_error_corrected" value="True"/> </conditional> <output name="raw_unitigs" file="hifiasm-out3-raw.gfa" ftype="gfa1"/> <output name="processed_unitigs" file="hifiasm-out3-processed.gfa" ftype="gfa1"/> <output name="primary_contig_graph" file="hifiasm-out3-primary.gfa" ftype="gfa1"/> </test> <!-- TEST 7: Test expected haplotype number --> <test expect_num_outputs="5"> <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz"/> <param name="filter_bits" value="0"/> <param name="mode_selector" value="standard"/> <conditional name="purge_options"> <param name="purge_selector" value="set"/> <param name="n_hap" value="1"/> </conditional> <output name="raw_unitigs" file="hifiasm-out4-raw.gfa" ftype="gfa1"/> <output name="processed_unitigs" file="hifiasm-out4-processed.gfa" ftype="gfa1"/> <output name="primary_contig_graph" file="hifiasm-out4-primary.gfa" ftype="gfa1"/> </test> <!-- TEST 8: Test min_hist_cnt option --> <test expect_num_outputs="5"> <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz"/> <param name="filter_bits" value="0"/> <param name="mode_selector" value="standard"/> <conditional name="advanced_options"> <param name="advanced_selector" value="set"/> <param name="min_hist_cnt" value="1"/> </conditional> <output name="raw_unitigs" file="hifiasm-out5-raw.gfa" ftype="gfa1"/> <output name="processed_unitigs" file="hifiasm-out5-processed.gfa" ftype="gfa1"/> <output name="primary_contig_graph" file="hifiasm-out5-primary.gfa" ftype="gfa1"/> </test> <!-- TEST 9: Test max_kooc option --> <test expect_num_outputs="5"> <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz"/> <param name="filter_bits" value="0"/> <param name="mode_selector" value="standard"/> <conditional name="advanced_options"> <param name="advanced_selector" value="set"/> <param name="max_kooc" value="21000"/> </conditional> <output name="raw_unitigs" file="hifiasm-out6-raw.gfa" ftype="gfa1"/> <output name="processed_unitigs" file="hifiasm-out6-processed.gfa" ftype="gfa1"/> <output name="primary_contig_graph" file="hifiasm-out6-primary.gfa" ftype="gfa1"/> </test> <!-- TEST 10: Test hg-size option --> <test expect_num_outputs="5"> <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz"/> <param name="filter_bits" value="0"/> <param name="mode_selector" value="standard"/> <conditional name="advanced_options"> <param name="advanced_selector" value="set"/> <param name="hg_size" value="1k"/> </conditional> <output name="raw_unitigs" file="hifiasm-out7-raw.gfa" ftype="gfa1"/> <output name="processed_unitigs" file="hifiasm-out7-processed.gfa" ftype="gfa1"/> <output name="primary_contig_graph" file="hifiasm-out7-primary.gfa" ftype="gfa1"/> </test> <!-- TEST 11: Test ignore-error-corrected option --> <test expect_num_outputs="5"> <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz"/> <param name="filter_bits" value="0"/> <param name="mode_selector" value="standard"/> <conditional name="assembly_options"> <param name="assembly_selector" value="set"/> <param name="hom-cov" value="1000"/> </conditional> <output name="raw_unitigs" file="hifiasm-out8-raw.gfa" ftype="gfa1"/> <output name="processed_unitigs" file="hifiasm-out8-processed.gfa" ftype="gfa1"/> <output name="primary_contig_graph" file="hifiasm-out8-primary.gfa" ftype="gfa1"/> </test> <!-- TEST 12: test nanopore input --> <test expect_num_outputs="5"> <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz"/> <param name="mode_selector" value="standard"/> <param name="filter_bits" value="0"/> <conditional name="ont_integration"> <param name="ont_integration_selector" value="set"/> <param name="ul" value="nanopore.fasta.gz"/> <param name="ul_tip" value="1"/> </conditional> <output name="primary_contig_graph" file="hifiasm-out11-primary.gfa" ftype="gfa1"/> <output name="raw_unitigs" file="hifiasm-out11-raw.gfa" ftype="gfa1"/> </test> <!-- TEST 13: test multi-file nanopore input --> <test expect_num_outputs="6"> <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz"/> <param name="mode_selector" value="standard"/> <param name="filter_bits" value="0"/> <param name="log_out" value="yes"/> <conditional name="ont_integration"> <param name="ont_integration_selector" value="set"/> <param name="ul" value="nanopore.fasta.gz,nanopore.fasta.gz"/> <param name="ul_tip" value="1"/> </conditional> <output name="log_file" ftype="txt"> <assert_contents> <has_text text="--ul ./ultralong/input_0.fasta.gz,./ultralong/input_1.fasta.gz"/> </assert_contents> </output> </test> <!-- TEST 14: test bin files --> <test expect_num_outputs="6"> <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz"/> <param name="filter_bits" value="0"/> <param name="mode_selector" value="standard"/> <param name="bins_out" value="yes"/> <output_collection name="bin_files" type="list" count="3"/> </test> <!-- TEST 15: Test trio LIST mode --> <test expect_num_outputs="6"> <param name="filter_bits" value="0"/> <param name="log_out" value="yes"/> <conditional name="mode"> <param name="mode_selector" value="trio"/> <param name="reads" value="child.fasta.gz"/> <conditional name="trioinput"> <param name="trio_input_selector" value="lists"/> <param name="hap1_list" value="maternal.headers.txt"/> <param name="hap2_list" value="paternal.headers.txt"/> </conditional> <param name="max_kmers" value="2"/> <param name="min_kmers" value="5"/> </conditional> <output name="log_file" ftype="txt"> <assert_contents> <has_text text="flagged 100 reads, out of 100 lines in file"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ .. class:: infomark **HiFiASM - a fast de novo assembler** Hifiasm is a fast haplotype-resolved *de novo* assembler for PacBio Hifi reads. It can assemble a human genome in several hours and works with the California redwood genome, one of the most complex genomes sequenced so far. Hifiasm can produce primary/alternate assemblies of quality competitive with the best assemblers. It also introduces a new graph binning algorithm and achieves the best haplotype-resolved assembly given trio data. ---- .. class:: infomark **Assembly mode** - *Standard*: Standard assembly can be run in pseudohaplotype mode, or with Hi-C phasing using Hi-C reads from the same individual. - *Trio*: When parental short reads are available, hifiasm can generate a pair of haplotype-resolved assemblies with trio binning. ---- .. class:: infomark **Outputs** Non-Trio assembly: - Haplotype-resolved raw unitig graph: This graph keeps all haplotype information, including somatic mutations and recurrent sequencing errors. - Haplotype-resolved processed unitig graph without small bubbles: This graph 'pops' small bubbles in the raw unitig graph; small bubbles might be caused by somatic mutations or noise in data, which are not the real haplotype information. - Primary assembly contig graph: This graph includes a complete assembly with long stretches of phased blocks, though there may be some haplotype collapse. - Alternate assembly contig graph: This graph consists of all contigs that are discarded from the primary contig graph. - [hap1]/[hap2] contig graph: Each graph consists of phased contigs (output only with Hi-C phasing enabled). Trio assembly: - Haplotype-resolved raw unitig graph in GFA format . This graph keeps all haplotype information. - Phased paternal/haplotype1 contig graph. This graph keeps the phased paternal/haplotype1 assembly. - Phased maternal/haplotype2 contig graph. This graph keeps the phased maternal/haplotype2 assembly. ]]></help> <citations> <citation type="doi">10.1038/s41592-020-01056-5</citation> </citations> </tool>