Mercurial > repos > bgruening > hifiasm
view hifiasm.xml @ 1:6505bd37670d draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/hifiasm commit 1122ef1b9a407a781d1416d07b272016d16a6a85"
author | bgruening |
---|---|
date | Wed, 03 Mar 2021 16:16:42 +0000 |
parents | bf0a4667e3ce |
children | f3c89da3af16 |
line wrap: on
line source
<tool id="hifiasm" name="Hifiasm" version="@VERSION@+galaxy0"> <description>de novo assembler</description> <macros> <token name="@VERSION@">0.14</token> <token name="@FORMATS@">fasta,fasta.gz,fastq,fastq.gz</token> <xml name="reads"> <param name="reads" type="data" format="@FORMATS@" multiple="true" label="Input reads" /> </xml> </macros> <requirements> <requirement type="package" version="@VERSION@">hifiasm</requirement> <requirement type="package" version="0.1">yak</requirement> </requirements> <version_command>hifiasm --version</version_command> <command detect_errors="exit_code"> <![CDATA[ #set $input_files = list() #set $hap1_inputs = list() #set $hap2_inputs = list() #for idx, read in enumerate($mode.reads): #set $inputfile = 'input_%d.%s' % ($idx, $read.dataset.extension) ln -s '$read' $inputfile && $input_files.append($inputfile) #end for #set $input_filenames = ' '.join($input_files) #if str($mode.mode_selector) == 'trio': #for idx, read in enumerate($mode.hap1_reads): #set $inputfile = 'hap1_input_%d.%s' % ($idx, $read.dataset.extension) ln -s '$read' $inputfile && $hap1_inputs.append($inputfile) #end for #for idx, read in enumerate($mode.hap2_reads): #set $inputfile = 'hap2_input_%d.%s' % ($idx, $read.dataset.extension) ln -s '$read' $inputfile && $hap2_inputs.append($inputfile) #end for #set $hap1_filenames = ' '.join($hap1_inputs) #set $hap2_filenames = ' '.join($hap2_inputs) yak count -k31 -b37 -t\${GALAXY_SLOTS:-1} -o hap1.yak $hap1_filenames && yak count -k31 -b37 -t\${GALAXY_SLOTS:-1} -o hap2.yak $hap2_filenames && #end if hifiasm -i $input_filenames -t \${GALAXY_SLOTS:-1} -o output -f $filter_bits #if str($mode.mode_selector) == 'trio': -1 hap1.yak -2 hap2.yak #end if #if str($advanced_options.advanced_selector) == 'set': -z $advanced_options.adapter_length -k $advanced_options.kmer_length -w $advanced_options.window_size -D $advanced_options.drop_kmers -N $advanced_options.max_overlaps -r $advanced_options.correction_rounds #end if #if str($assembly_options.assembly_selector) == 'set': -a $assembly_options.cleaning_rounds -m $assembly_options.pop_contigs -p $assembly_options.pop_unitigs -n $assembly_options.remove_tips -x $assembly_options.max_overlap -y $assembly_options.min_overlap $assembly_options.disable_post_join #end if #if str($mode.mode_selector) == 'trio': -1 hap1.yak -2 hap2.yak -c $mode.max_kmers -d $mode.min_kmers #end if #if str($purge_options.purge_selector) == 'set': -l $purge_options.purge_level -s $purge_options.similarity_threshold -O $purge_options.minimum_overlap #if $purge_options.purge_cov: --purge-cov $purge_options.purge_cov #end if $purge_options.high_het #end if ]]> </command> <inputs> <conditional name="mode"> <param name="mode_selector" type="select" label="Assembly mode"> <option value="standard">Standard</option> <option value="trio">Trio mode</option> </param> <when value="standard"> <expand macro="reads" /> </when> <when value="trio"> <expand macro="reads" /> <param name="hap1_reads" type="data" format="fastq,fastq.gz" multiple="true" label="Haplotype 1 reads" /> <param name="hap2_reads" type="data" format="fastq,fastq.gz" multiple="true" label="Haplotype 2 reads" /> <param name="max_kmers" argument="-c" type="integer" value="2" label="Lower bound of the binned k-mer's frequency" /> <param name="min_kmers" argument="-d" type="integer" value="5" label="Upper bound of the binned k-mer's frequency" /> </when> </conditional> <param name="filter_bits" argument="-f" type="integer" min="0" value="37" label="Bits for bloom filter" help="A value of 0 disables the bloom filter" /> <conditional name="advanced_options"> <param name="advanced_selector" type="select" label="Advanced options"> <option value="blank">Leave default</option> <option value="set">Specify</option> </param> <when value="blank" /> <when value="set"> <param name="adapter_length" argument="-z" type="integer" min="0" value="0" label="Length of adapters to be removed" /> <param name="kmer_length" argument="-k" type="integer" min="0" max="64" value="51" label="K-mer length" /> <param name="window_size" argument="-w" type="integer" min="0" value="51" label="Minimizer window size" /> <param name="drop_kmers" argument="-D" type="float" value="5.0" label="Drop k-mers" help="K-mers that occur more than this value multiplied by the coverage will be discarded" /> <param name="max_overlaps" argument="-N" type="integer" value="100" label="Maximum overlaps to consider" help="The software selects the larger of this value and the k-mer count multiplied by coverage" /> <param name="correction_rounds" argument="-r" type="integer" value="3" label="Correction rounds" /> </when> </conditional> <conditional name="assembly_options"> <param name="assembly_selector" type="select" label="Assembly options"> <option value="blank">Leave default</option> <option value="set">Specify</option> </param> <when value="blank" /> <when value="set"> <param name="cleaning_rounds" argument="-a" type="integer" value="4" label="Cleaning rounds" /> <param name="pop_contigs" argument="-m" type="integer" value="10000000" label="Minimum contig bubble size" help="Pop contig graph bubbles smaller than this value" /> <param name="pop_unitigs" argument="-p" type="integer" value="100000" label="Minimum unitig bubble size" help="Pop unitig graph bubbles smaller than this value" /> <param name="remove_tips" argument="-n" type="integer" value="3" label="Tip unitigs" help="Keep only tip unitigs with a number of reads greater than or equal to this value" /> <param name="max_overlap" argument="-x" type="float" min="0" max="1" value="0.8" label="Maximum overlap drop ratio" /> <param name="min_overlap" argument="-y" type="float" min="0" max="1" value="0.2" label="Minimum overlap drop ratio" /> <param name="disable_post_join" argument="-u" type="boolean" truevalue="-u" falsevalue="" label="Skip post join contigs step" help="May improve N50" /> </when> </conditional> <conditional name="purge_options"> <param name="purge_selector" type="select" label="Options for purging duplicates"> <option value="blank">Leave default</option> <option value="set">Specify</option> </param> <when value="blank" /> <when value="set"> <param name="purge_level" argument="-l" type="select" label="Purge level"> <option value="0" selected="true">None</option> <option value="1">Light</option> <option value="2">Aggressive</option> </param> <param name="similarity_threshold" argument="-s" type="float" min="0" max="1" value="0.75" label="Similarity threshold for duplicate haplotigs" /> <param name="minimum_overlap" argument="-O" type="integer" value="1" label="Minimum overlapped reads for duplicate haplotigs" /> <param argument="--purge-cov" type="integer" optional="true" label="Coverage upper bound" help="If not set, this will be determined automatically" /> <param argument="--high-het" type="boolean" truevalue="--high-het" falsevalue="" label="Experimental high-heterozygosity mode" help="NB: May be unstable" /> </when> </conditional> </inputs> <outputs> <data name="raw_unitigs" format="gfa1" from_work_dir="output.r_utg.gfa" label="${tool.name} on ${on_string}, haplotype-resolved raw unitig graph"> <filter>mode['mode_selector'] == 'standard'</filter> </data> <data name="raw_unitigs" format="gfa1" from_work_dir="output.dip.r_utg.gfa" label="${tool.name} on ${on_string}, haplotype-resolved raw unitig graph"> <filter>mode['mode_selector'] == 'trio'</filter> </data> <data name="processed_unitigs" format="gfa1" from_work_dir="output.p_utg.gfa" label="${tool.name} on ${on_string}, processed unitig graph"> <filter>mode['mode_selector'] == 'standard'</filter> </data> <data name="primary_contig_graph" format="gfa1" from_work_dir="output.p_ctg.gfa" label="${tool.name} on ${on_string}, primary assembly contig graph"> <filter>mode['mode_selector'] == 'standard'</filter> </data> <data name="alternate_contig_graph" format="gfa1" from_work_dir="output.a_ctg.gfa" label="${tool.name} on ${on_string}, alternate assembly contig graph"> <filter>mode['mode_selector'] == 'standard'</filter> </data> <data name="hap1_contigs" format="gfa1" from_work_dir="output.hap1.p_ctg.gfa" label="${tool.name} ${mode.hap1_reads.name}, contig graph"> <filter>mode['mode_selector'] == 'trio'</filter> </data> <data name="hap2_contigs" format="gfa1" from_work_dir="output.hap2.p_ctg.gfa" label="${tool.name} ${mode.hap2_reads.name}, contig graph"> <filter>mode['mode_selector'] == 'trio'</filter> </data> </outputs> <tests> <test> <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> <param name="filter_bits" value="0" /> <param name="mode_selector" value="standard" /> <output name="raw_unitigs" file="hifiasm-out1-raw.gfa" ftype="gfa1" /> <output name="processed_unitigs" file="hifiasm-out1-processed.gfa" ftype="gfa1" /> <output name="primary_contig_graph" file="hifiasm-out1-primary.gfa" ftype="gfa1" /> <output name="alternate_contig_graph" file="hifiasm-out1-alternate.gfa" ftype="gfa1" /> </test> <test> <param name="reads" value="hifiasm-in2-0.fa.gz,hifiasm-in2-1.fa.gz,hifiasm-in2-2.fa.gz,hifiasm-in2-3.fa.gz,hifiasm-in2-4.fa.gz" ftype="fasta.gz" /> <param name="filter_bits" value="0" /> <param name="mode_selector" value="standard" /> <output name="raw_unitigs" file="hifiasm-out2-raw.gfa" ftype="gfa1" /> <output name="processed_unitigs" file="hifiasm-out2-processed.gfa" ftype="gfa1" /> <output name="primary_contig_graph" file="hifiasm-out2-primary.gfa" ftype="gfa1" /> <output name="alternate_contig_graph" file="hifiasm-out2-alternate.gfa" ftype="gfa1" /> </test> </tests> <help><![CDATA[ *********************************** HiFiASM - a fast de novo assembler *********************************** Hifiasm is a fast haplotype-resolved de novo assembler for PacBio Hifi reads. It can assemble a human genome in several hours and works with the California redwood genome, one of the most complex genomes sequenced so far. Hifiasm can produce primary/alternate assemblies of quality competitive with the best assemblers. It also introduces a new graph binning algorithm and achieves the best haplotype-resolved assembly given trio data. #### Assembly mode - *Standard* - *Trio* When parental short reads are available, hifiasm can generate a pair of haplotype-resolved assemblies with trio binning. #### Trio Options - *Haplotype 1 reads* : list of hap1/paternal read names - *Haplotype 2 reads* : list of hap2/maternal read names - *Lower bound of the binned k-mer's frequency* - *Upper bound of the binned k-mer's frequency* *Bits for bloom filter* (-f) - A value of 0 disables the bloom filter for small genomes. For genomes much larger than human, applying -f 38 or even - f39 is preferred to save memory on k-mer counting. #### Advanced options - *Length of adapters to be removed* Old HiFi reads may contain short adapter sequences at the ends of reads. You can specify 20 to trim both ends of reads by 20bp. - *K-mer length* (must be <64) - *Minimizer window size* - *Drop K-mers* K-mers that occur more than this value multiplied by the coverage will be discarded - *Maximum overlaps to consider* consider up to max(-D*coverage,-N) overlaps for each oriented read - *Correction rounds* round of correction #### Assembly options - *Cleaning rounds* round of assembly cleaning - *Minimum contig bubble* size Pop contig graph bubbles smaller than this value - *Minimum unitig bubble* size Pop unitig graph bubbles smaller than this value - *Tip unitigs* Keep only tip unitigs with a number of reads greater than or equal to this value - *Maximum overlap drop ratio* - *Minimum overlap drop ratio* - *Skip post join contigs step* disable post join contigs step which may improve N50 #### Options for purging duplicates - *Purge level* 0: no purging; 1: light; 2: aggressive [0 for trio; 2 for unzip] - *Similarity threshold for duplicate haplotigs* - *Minimum overlapped reads for duplicate haplotigs* - *Coverage upper bound* If not set, this will be determined automatically - *Experimental high-heterozygosity mode* enable this mode for high heterozygosity sample NB: May be unstable ### Outputs Non Trio assembly - Haplotype-resolved raw unitig graph in GFA format. This graph keeps all haplotype information, including somatic mutations and recurrent sequencing errors. - Haplotype-resolved processed unitig graph without small bubbles : Small bubbles might be caused by somatic mutations or noise in data, which are not the real haplotype information. - Primary assembly contig graph : This graph collapses different haplotypes. - Alternate assembly contig graph : This graph consists of all assemblies that are discarded in primary contig graph. Trio assembly - Haplotype-resolved raw unitig graph in GFA format . This graph keeps all haplotype information. - Phased paternal/haplotype1 contig graph. This graph keeps the phased paternal/haplotype1 assembly. - Phased maternal/haplotype2 contig graph. This graph keeps the phased maternal/haplotype2 assembly. ]]></help> <citations> <citation type="doi">10.1038/s41592-020-01056-5</citation> </citations> </tool>