view merqury.xml @ 0:9d79beb19ac3 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/merqury commit 561c3bcec49ecef9384b565861ccee11a377f7cd"
author iuc
date Sun, 18 Apr 2021 23:47:56 +0000
parents
children 39edec572bae
line wrap: on
line source

<tool id="merqury" name="Merqury" version="@TOOL_VERSION@" profile="20.01">
    <description>evaluate the assembly quality</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="edam_ontology"/>
    <expand macro="requirements"/>
    <version_command>merqury --version</version_command>
    <command detect_errors="exit_code"><![CDATA[
        mkdir -p output_files
        #if $mode.options == 'default'
            && mkdir -p read-db.meryl
            && tar -zxf ${mode.meryldb_F1} -C read-db.meryl --strip-components=1
            #if $mode.assembly_options.number_assemblies == 'one'
                #if $mode.assembly_options.assembly_01.ext.endswith(".gz")
                    && gunzip -c $mode.assembly_options.assembly_01 > assembly.fasta
                #else
                    && ln -s '$mode.assembly_options.assembly_01' assembly.fasta
                #end if 
                && merqury.sh read-db.meryl assembly.fasta ${label}
                && mv ${label}* output_files
                && find . -maxdepth 1 -name 'assembly_only*' -type f -print0 | xargs -0r mv -t output_files
            #else
                #if $mode.assembly_options.assembly_01.ext.endswith(".gz")
                    && gunzip -c $mode.assembly_options.assembly_01 > assembly_01.fasta
                #else
                    && ln -s '$mode.assembly_options.assembly_01' assembly_01.fasta
                #end if 
                #if $mode.assembly_options.assembly_02.ext.endswith(".gz")
                    && gunzip -c $mode.assembly_options.assembly_02 > assembly_02.fasta
                #else
                    && ln -s '$mode.assembly_options.assembly_02' assembly_02.fasta
                #end if 
                && merqury.sh read-db.meryl assembly_01.fasta assembly_02.fasta ${label}
                && mv ${label}* output_files
                && find . -maxdepth 1 -name 'assembly_only*' -type f -print0 | xargs -0r mv -t output_files
            #end if
        #else
            && mkdir -p read-db.meryl pat.meryl mat.meryl
            && tar -zxf ${mode.meryldb_F1} -C read-db.meryl --strip-components=1
            && tar -zxf ${mode.meryldb_PAT} -C pat.meryl --strip-components=1
            && tar -zxf ${mode.meryldb_MAT} -C mat.meryl --strip-components=1
            #if $mode.assembly_options.number_assemblies == 'one'
                #if $mode.assembly_options.assembly_01.ext.endswith(".gz")
                    && gunzip -c $mode.assembly_options.assembly_01 > assembly.fasta
                #else
                    && ln -s '$mode.assembly_options.assembly_01' assembly.fasta
                #end if 
                && merqury.sh read-db.meryl pat.meryl mat.meryl assembly.fasta ${label}
                && mv ${label}* output_files
                && find . -maxdepth 1 -name 'assembly_only*' -type f -print0 | xargs -0r mv -t output_files
            #else
                #if $mode.assembly_options.assembly_01.ext.endswith(".gz")
                    && gunzip -c $mode.assembly_options.assembly_01 > assembly_01.fasta
                #else
                    && ln -s '$mode.assembly_options.assembly_01' assembly_01.fasta
                #end if 
                #if $mode.assembly_options.assembly_02.ext.endswith(".gz")
                    && gunzip -c $mode.assembly_options.assembly_02 > assembly_02.fasta
                #else
                    && ln -s '$mode.assembly_options.assembly_02' assembly_02.fasta
                #end if
                && merqury.sh read-db.meryl pat.meryl mat.meryl assembly_01.fasta assembly_02.fasta ${label}
                && mv ${label}* output_files
                && find . -maxdepth 1 -name 'assembly_only*' -type f -print0 | xargs -0r mv -t output_files
            #end if
        #end if
    ]]>    </command>
    <inputs>
        <conditional name="mode">
            <param name="options" type="select" 
                label="Evaluation mode" help="Merqury allows two operational modes: default and trio mode.">
                <option value="default">Default mode</option>
                <option value="trio">Trio mode</option>
            </param>
            <when value="default">
                <param name="meryldb_F1" type="data" format="meryldb"
                            label="K-mer counts database" 
                            help="Select the k-mer counts of the read set."/>
                <conditional name="assembly_options">
                    <param name="number_assemblies" type="select"
                        label="Number of assemblies" help="Select the number of assemblies.">
                        <option value="one">One assembly (pseudo-haplotype or mixed-haplotype)</option>
                        <option value="two">Two assemblies (diploid)</option>
                    </param>
                    <when value="one">
                        <param name="assembly_01" type="data" format="fasta,fasta.gz" 
                            label="Genome assembly" 
                            help="Select the assembled genome (FASTA)."/>
                    </when>
                    <when value="two">
                        <param name="assembly_01" type="data" format="fasta,fasta.gz" 
                            label="First genome assembly" 
                            help="Select the first assembled genome (FASTA)."/>
                        <param name="assembly_02" type="data" format="fasta,fasta.gz" 
                            label="Second genome assembly" 
                            help="Select the second assembled genome (FASTA)."/>
                    </when>
                </conditional>
            </when>
            <when value='trio'>
                <param name="meryldb_F1" type="data" format="meryldb"
                    label="F1 k-mer counts" 
                    help="Select the k-mer counts of the read set (meryldb )."/>
                <param name="meryldb_PAT" type="data" format="meryldb"
                    label="Paternal hap-mer database" 
                    help="K-mer counts of the paternal haplotype (meryldb)."/> 
                <param name="meryldb_MAT" type="data" format="meryldb"
                    label="Maternal hap-mer database" 
                    help="K-mer counts of the maternal haplotype (meryldb)."/>
                <conditional name="assembly_options">
                    <param name="number_assemblies" type="select"
                        label="Number of assemblies" help="Select the number of assemblies.">
                        <option value="one">One assembly (pseudo-haplotype or mixed-haplotype)</option>
                        <option value="two">Two assemblies (diploid)</option>
                    </param>
                    <when value="one">
                        <param name="assembly_01" type="data" format="fasta,fasta.gz" 
                            label="Genome assembly" 
                            help="Select the assembled genome (FASTA)."/>
                    </when>
                    <when value="two">
                        <param name="assembly_01" type="data" format="fasta,fasta.gz" 
                            label="First genome assembly" 
                            help="Select the first assembled genome (FASTA)."/>
                        <param name="assembly_02" type="data" format="fasta,fasta.gz" 
                            label="Second genome assembly" 
                            help="Select the second assembled genome (FASTA)."/>
                    </when>
                </conditional>       
            </when>
        </conditional>
        <param name='label' type="text" value="output_merqury" label="Identification label" help="Assign an identification label.">
            <sanitizer invalid_char="">
                <valid initial="string.letters,string.digits">
                    <add value="_" />
                </valid>
            </sanitizer>
        </param>
    </inputs>
    <outputs>
        <collection name="bed_files" type="list" label="${tool.name} on ${on_string}: bed">
            <discover_datasets pattern="(?P&lt;name&gt;.+)\.bed" format="bed" directory="output_files"/>
        </collection>
        <collection name="wig_files" type="list" label="${tool.name} on ${on_string}: wig">
            <discover_datasets pattern="(?P&lt;name&gt;.+)\.wig" format="wig" directory="output_files"/>
        </collection>
        <collection name="qv_files" type="list" label="${tool.name} on ${on_string}: qv">
            <discover_datasets pattern="(?P&lt;name&gt;.+)\.qv" format="tabular" directory="output_files"/>
        </collection>
        <collection name="png_files" type="list" label="${tool.name} on ${on_string}: png">
            <discover_datasets pattern="(?P&lt;name&gt;.+)\.png" format="png" directory="output_files"/>
        </collection>
        <collection name="sizes_files" type="list" label="${tool.name} on ${on_string}: size files">
            <discover_datasets pattern="(?P&lt;name&gt;.+)\.sizes" format="tabular" directory="output_files"/>
        </collection>
        <collection name="stats_files" type="list" label="${tool.name} on ${on_string}: stats">
            <discover_datasets pattern="(?P&lt;name&gt;.+)\.stats" format="tabular" directory="output_files"/>
        </collection>
    </outputs>
    <tests>
        <test>
            <conditional name="mode">
                <param name="options" value="trio"/>
                <param name="meryldb_F1" value="child.meryldb" ftype="meryldb"/>
                <param name="meryldb_PAT" value="pat.meryldb" ftype="meryldb"/>
                <param name="meryldb_MAT" value="mat.meryldb" ftype="meryldb"/>
                <conditional name="assembly_options">
                    <param name="number_assemblies" value="one"/>
                    <param name="assembly_01" value="assembly.fasta.gz" ftype="fasta.gz"/>
                </conditional>
            </conditional>
            <param name="label" value="output_01"/>
            <output_collection name="png_files" type="list" count="3">
                <element name="output_01.assembly.mat.spectra-cn.ln" file="output_01.assembly.spectra-cn.ln.png" ftype="png">
                    <assert_contents>
                        <has_size value="2733" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_01.assembly.pat.spectra-cn.ln" file="output_01.assembly.spectra-cn.ln.png" ftype="png">
                    <assert_contents>
                        <has_size value="2733" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_01.hapmers.blob" file="output_01.hapmers.blob.png" ftype="png">
                    <assert_contents>
                        <has_size value="10764" delta="100"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="stats_files" type="list" count="2">
                <element name="output_01.assembly.100_20000.phased_block" ftype="tabular"/>
                <element name="output_01.completeness" ftype="tabular"/>
            </output_collection>
            <output_collection name="wig_files" type="list" count="2">
                <element name="output_01.assembly.pat" ftype="wig"/>
                <element name="output_01.assembly.mat" file="output_01.assembly.mat.wig" ftype="wig"/>
            </output_collection>
            <output_collection name="sizes_files" type="list" count="2">
                <element name="output_01.assembly.100_20000.phased_block" ftype="tabular"/>
                <element name="output_01.assembly.contig" file="output_01.assembly.contig.tabular" ftype="tabular"/>
            </output_collection>
            <output_collection name="bed_files" type="list" count="3">
                <element name="output_01.assembly.100_20000.phased_block" ftype="bed"/>
                <element name="output_01.assembly.100_20000.switch" ftype="bed"/>
                <element name="output_01.assembly.sort"/>
            </output_collection>
        </test>
        <test>
            <conditional name="mode">
                <param name="options" value="default"/>
                <param name="meryldb_F1" value="read-db.meryldb" ftype="meryldb"/>
                <conditional name="assembly_options">
                    <param name="number_assemblies" value="one"/>
                    <param name="assembly_01" value="assembly.fasta.gz" ftype="fasta.gz"/>
                </conditional>
            </conditional>
            <param name="label" value="output_02"/>
            <output_collection name="png_files" type="list" count="6">
                <element name="output_02.assembly.spectra-cn.fl" file="output_02.assembly.spectra-cn.fl.png" ftype="png">
                    <assert_contents>
                        <has_size value="58059" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_02.assembly.spectra-cn.ln" file="output_02.assembly.spectra-cn.ln.png" ftype="png">
                    <assert_contents>
                        <has_size value="57763" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_02.assembly.spectra-cn.st" file="output_02.assembly.spectra-cn.st.png" ftype="png">
                    <assert_contents>
                        <has_size value="61814" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_02.spectra-asm.fl" file="output_02.spectra-asm.fl.png" ftype="png">
                    <assert_contents>
                        <has_size value="64445" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_02.spectra-asm.ln" file="output_02.spectra-asm.ln.png" ftype="png">
                    <assert_contents>
                        <has_size value="64795" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_02.spectra-asm.st" file="output_02.spectra-asm.st.png" ftype="png">
                    <assert_contents>
                        <has_size value="66317" delta="100"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="stats_files" type="list" count="1">
                <element name="output_02.completeness" file="output_02.completeness.stats.tabular" ftype="tabular"/>
            </output_collection>
            <output_collection name="qv_files" type="list" count="2">
                <element name="output_02.assembly" ftype="tabular"/>
                <element name="output_02" file="output_02_qv.tabular" ftype="tabular"/>
            </output_collection>
            <output_collection name="wig_files" type="list" count="1">
                <element name="assembly_only" ftype="wig">
                    <assert_contents>
                        <has_size value="29909811" delta="100"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="bed_files" type="list" count="1">
                <element name="assembly_only" ftype="bed">
                    <assert_contents>
                        <has_size value="73516121" delta="100"/>
                    </assert_contents>
                </element>
            </output_collection>
        </test>
        <test>
            <conditional name="mode">
                <param name="options" value="default"/>
                <param name="meryldb_F1" value="read-db.meryldb" ftype="meryldb"/>
                <conditional name="assembly_options">
                    <param name="number_assemblies" value="two"/>
                    <param name="assembly_01" value="assembly.fasta.gz" ftype="fasta.gz"/>
                    <param name="assembly_02" value="assembly_02.fasta.gz" ftype="fasta.gz"/>
                </conditional>
            </conditional>
            <param name="label" value="output_03"/>
            <output_collection name="png_files" type="list" count="10">
                <element name="output_03.assembly_01.spectra-cn.fl" file="output_03.assembly_01.spectra-cn.fl.png" ftype="png">
                    <assert_contents>
                        <has_size value="58059" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_03.assembly_01.spectra-cn.ln" file="output_03.assembly_01.spectra-cn.ln.png" ftype="png">
                    <assert_contents>
                        <has_size value="57763" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_03.assembly_01.spectra-cn.st" file="output_03.assembly_01.spectra-cn.st.png" ftype="png">
                    <assert_contents>
                        <has_size value="61814" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_03.assembly_02.spectra-cn.fl" file="output_03.assembly_02.spectra-cn.fl.png" ftype="png">
                    <assert_contents>
                        <has_size value="61579" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_03.assembly_02.spectra-cn.ln" file="output_03.assembly_02.spectra-cn.ln.png" ftype="png">
                    <assert_contents>
                        <has_size value="61538" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_03.assembly_02.spectra-cn.st" file="output_03.assembly_02.spectra-cn.st.png" ftype="png">
                    <assert_contents>
                        <has_size value="67297" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_03.spectra-asm.ln" file="output_03.spectra-asm.ln.png" ftype="png">
                    <assert_contents>
                        <has_size value="2733" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_03.spectra-cn.fl" file="output_03.spectra-cn.fl.png" ftype="png">
                    <assert_contents>
                        <has_size value="61397" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_03.spectra-cn.ln" file="output_03.spectra-cn.ln.png" ftype="png">
                    <assert_contents>
                        <has_size value="61050" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_03.spectra-cn.st" file="output_03.spectra-cn.st.png" ftype="png">
                    <assert_contents>
                        <has_size value="65243" delta="100"/>
                    </assert_contents>
                </element>
            </output_collection>    
            <output_collection name="stats_files" type="list" count="1">
                <element name="output_03.completeness" file="output_03.completeness.stats.tabular" ftype="tabular"/>
            </output_collection>
            <output_collection name="qv_files" type="list" count="3">
                <element name="output_03.assembly_01" ftype="tabular"/>
                <element name="output_03.assembly_02" ftype="tabular"/>
                <element name="output_03" file="output_03_qv.tabular" ftype="tabular"/>
            </output_collection>
        </test>
        <test>
            <conditional name="mode">
                <param name="options" value="trio"/>
                <param name="meryldb_F1" value="child.meryldb" ftype="meryldb"/>
                <param name="meryldb_PAT" value="pat.meryldb" ftype="meryldb"/>
                <param name="meryldb_MAT" value="mat.meryldb" ftype="meryldb"/>
                <conditional name="assembly_options">
                    <param name="number_assemblies" value="two"/>
                    <param name="assembly_01" value="assembly.fasta.gz" ftype="fasta.gz"/>
                    <param name="assembly_02" value="assembly_02.fasta.gz" ftype="fasta.gz"/>
                </conditional>
            </conditional>
            <param name="label" value="output_04"/>
            <output_collection name="png_files" type="list" count="5">
                <element name="output_04.assembly_01.mat.spectra-cn.ln" file="output_04.assembly_01.mat.spectra-cn.ln.png" ftype="png">
                    <assert_contents>
                        <has_size value="2733" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_04.assembly_01.pat.spectra-cn.ln" file="output_04.assembly_01.pat.spectra-cn.ln.png" ftype="png">
                    <assert_contents>
                        <has_size value="2733" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_04.assembly_02.mat.spectra-cn.ln" file="output_04.assembly_02.mat.spectra-cn.ln.png" ftype="png">
                    <assert_contents>
                        <has_size value="2733" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_04.assembly_02.pat.spectra-cn.ln" file="output_04.assembly_02.pat.spectra-cn.ln.png" ftype="png">
                    <assert_contents>
                        <has_size value="2733" delta="100"/>
                    </assert_contents>
                </element>
                <element name="output_04.hapmers.blob" file="output_04.hapmers.blob.png" ftype="png">
                    <assert_contents>
                        <has_size value="10764" delta="100"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="stats_files" type="list" count="3">
                <element name="output_04.assembly_01.100_20000.phased_block" ftype="tabular"/>
                <element name="output_04.assembly_02.100_20000.phased_block" file="output_04.assembly_02.100_20000.phased_block.stats.tabular" ftype="tabular"/>
                <element name="output_04.completeness"/>
            </output_collection>
            <output_collection name="qv_files" type="list" count="1">
                <element name="output_04" file="output_04_qv.tabular" ftype="tabular"/>
            </output_collection>
            <output_collection name="wig_files" type="list" count="4">
                <element name="output_04.assembly_01.pat" ftype="wig"/>
                <element name="output_04.assembly_01.mat" ftype="wig"/>
                <element name="output_04.assembly_02.pat" ftype="wig"/>
                <element name="output_04.assembly_02.mat" ftype="wig"/>
            </output_collection>
            <output_collection name="sizes_files" type="list" count="4">
                <element name="output_04.assembly_01.100_20000.phased_block"/>
                <element name="output_04.assembly_01.contig" file="output_04.assembly_01.contig.tabular" ftype="tabular"/>
                <element name="output_04.assembly_02.100_20000.phased_block"/>
                <element name="output_04.assembly_02.contig" file="output_04.assembly_02.contig.tabular" ftype="tabular"/>
            </output_collection>
            <output_collection name="bed_files" type="list" count="6">
                <element name="output_04.assembly_01.100_20000.phased_block" ftype="bed"/>
                <element name="output_04.assembly_01.100_20000.switch" file="output_04.assembly_01.100_20000.switch.bed" ftype="bed"/>
                <element name="output_04.assembly_01.sort" file="output_04.assembly_01.sort.bed" ftype="bed"/>
                <element name="output_04.assembly_02.100_20000.phased_block" ftype="bed"/>
                <element name="output_04.assembly_02.100_20000.switch" ftype="bed"/>
                <element name="output_04.assembly_02.sort" ftype="bed"/>
            </output_collection>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**Purpose**

Merqury allows a reference-free assembly evaluation based on efficient k-mer set operations. By comparing k-mers in a de novo
assembly to those found in unassembled high-accuracy reads, Merqury estimates base-level accuracy and completeness. 

For trios, Merqury can also evaluate haplotype-specific accuracy, completeness, phase block continuity, and switch errors.                    

----
                    
.. class:: infomark
                    
**Input**

Merqury requires two types of inputs: meryldbs and the genome assemblies.
There is no need to run merqury per-assemblies. If two assemblies areprovided, Merqury generates stats for each and combined.

----
                    
.. class:: infomark
                    
**Output**

The generated metrics include consensus quality and k-mer completeness, and when parental genomic sequences are available (either assembled or unassembled), Merqury can output haplotype completeness, phase block statistics, switch error rates, and visual representations of phase consistency for the child’s genome. This includes TDF (or BED) features that can be displayed in a genome browser for visualizing the presence of k-mer classes across a genome (e.g., the k-mers inherited from a parental genome).                    

    ]]>    </help>
    <expand macro="citations"/>
</tool>