Mercurial > repos > iuc > genomescope

<tool id="genomescope" name="GenomeScope" version="@VERSION@" profile="20.01">
    <description>Analyze unassembled short reads</description>
    <macros>
        <token name="@VERSION@">2.0</token>
    </macros>
    <requirements>
        <requirement type="package" version="@VERSION@">genomescope2</requirement>
    </requirements>
    <version_command>genomescope2 --version</version_command>
    <command detect_errors="exit_code"><![CDATA[
        genomescope2
            --input '$input'
            --output .
            --kmer_length $kmer_length
            $no_unique_sequence $testing $trace_flag
            #if $ploidy:
                --ploidy $ploidy
            #end if
            #if $lambda:
                --lambda $lambda
            #end if
            #if $max_kmercov:
                --max_kmercov $max_kmercov
            #end if
            #if $topology:
                --topology $topology
            #end if
            #if $initial_repetitiveness:
                --initial_repetitiveness $initial_repetitiveness
            #end if
            #if $initial_heterozygosities:
                --initial_heterozygosities $initial_heterozygosities
            #end if
            #if $transform_exp:
                --transform_exp $transform_exp
            #end if
            #if $true_params:
                --true_params $true_params
            #end if
            #if $num_rounds:
                --num_rounds $num_rounds
            #end if
        ]]>
    </command>
    <inputs>
        <param argument="--input" type="data" format="tabular" label="Input histogram file" help="This file is a two column tabular file for example generated with the histo function of Jellyfish."/>
        <param name="model_output" type="boolean" label="Add the model parameters to your history"/>
        <param name="summary_output" type="boolean" label="Output a summary of the analysis"/>
        <param name="progress_output" type="boolean" label="Additional information for each optimization round"/>
        <param argument="--ploidy" type="integer" optional="true" label="Ploidy for model to use" help="Default: 2"/>
        <param argument="--kmer_length" type="integer" value="21" optional="false" label="K-mer length used to calculate k-mer spectra"/>
        <param argument="--lambda" type="integer" optional="true" label="Optional initial kmercov estimate for model to use"/>
        <param argument="--max_kmercov" type="integer" optional="true" label="Optional maximum k-mer coverage threshold" help="K-mers with coverage greater than max_kmercov are ignored by the model"/>
        <param argument="--no_unique_sequence" type="boolean" truevalue="--no_unique_sequence" falsevalue="" label="Turn off yellow unique sequence line in plots"/>
        <param argument="--topology" type="integer" optional="true" label="Flag for topology for model to use"/>
        <param argument="--initial_repetitiveness" type="integer" optional="true" label="Initial value for repetitiveness"/>
        <param argument="--initial_heterozygosities" type="integer" optional="true" label="Initial values for nucleotide heterozygosity rates"/>
        <param argument="--transform_exp" type="integer" optional="true" label="Parameter for the exponent when fitting a transformed (x**transform_exp*y vs. x) k-mer histogram" help="Default: 1"/>
        <param argument="--testing" type="boolean" truevalue="--testing" falsevalue="" label="Create testing.tsv file with model parameters"/>
        <param argument="--true_params" type="integer" optional="true" label="Flag to state true simulated parameters for testing mode"/>
        <param argument="--trace_flag" type="boolean" truevalue="--trace_flag" falsevalue="" label="Turn on printing of iteration progress of nlsLM function"/>
        <param argument="--num_rounds" type="integer" min="1" optional="true" label="Number of optimization rounds"/>
    </inputs>
    <outputs>
        <data name="linear_plot" format="png" from_work_dir="linear_plot.png" label="${tool.name} on ${on_string} Linear plot"/>
        <data name="log_plot" format="png" from_work_dir="log_plot.png" label="${tool.name} on ${on_string} Log plot"/>
        <data name="transformed_linear_plot" format="png" from_work_dir="transformed_linear_plot.png" label="${tool.name} on ${on_string} Transformed linear plot"/>
        <data name="transformed_log_plot" format="png" from_work_dir="transformed_log_plot.png" label="${tool.name} on ${on_string} Transformed log plot"/>
        <data name="model" format="txt" from_work_dir="model.txt" label="${tool.name} on ${on_string} Model">
            <filter>model_output</filter>
        </data>
        <data name="summary" format="txt" from_work_dir="summary.txt" label="${tool.name} on ${on_string} Summary">
            <filter>summary_output</filter>
        </data>
        <data name="progress" format="txt" from_work_dir="progress.txt" label="${tool.name} on ${on_string} Progress">
            <filter>progress_output</filter>
        </data>
        <data name="model_params" format="tabular" from_work_dir="SIMULATED_testing.tsv" label="${tool.name} on ${on_string} Model parameters">
            <filter>testing</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="8">
            <param name="input" value="genomescope-in1.tab"/>
            <param name="kmer_length" value="21"/>
            <param name="testing" value="true"/>
            <param name="model_output" value="true"/>
            <param name="summary_output" value="true"/>
            <param name="progress_output" value="true"/>
            <output name="linear_plot" file="genomescope-out1-1.png" ftype="png"/>
            <output name="log_plot" file="genomescope-out1-2.png" ftype="png"/>
            <output name="transformed_linear_plot" file="genomescope-out1-3.png" ftype="png"/>
            <output name="transformed_log_plot" file="genomescope-out1-4.png" ftype="png" compare="sim_size"/>
            <output name="model" file="genomescope-out1-1.txt" ftype="txt" lines_diff="2"/>
            <output name="summary" file="genomescope-out1-2.txt" ftype="txt" lines_diff="2"/>
            <output name="progress" file="genomescope-out1-3.txt" ftype="txt" lines_diff="2"/>
            <output name="testing" file="genomescope-out1-1.tab" ftype="tabular"/>
        </test>
    </tests>
    <help><![CDATA[

GenomeScope 2.0: Reference-free profiling of polyploid genomes
==============================================================

GenomeScope 2.0 applies classical insights from combinatorial theory to establish
a detailed mathematical model of how k-mer frequencies will be distributed in
heterozygous and polyploid genomes. GenomeScope 2.0 employs a polyploid-aware
mixture model that, within seconds, accurately infers genome properties from
unassembled sequencing data. GenomeScope 2.0 uses the k-mer count distribution,
e.g. from KMC or Jellyfish, and produces a report and several informative plots
describing the genome properties. We validate the approach on simulated polyploid
data created using a generative model with parameters for genome size, heterozygosity,
repetitiveness, ploidy, and sequencing coverage, and find GenomeScope 2.0 retains
accuracy across a broad range of realistic and extreme parameter values. We also
validate GenomeScope 2.0 by analyzing genuine sequence data from 11 diverse
polyploid genomes with known genome characteristics.

]]></help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btx153</citation>
        <citation type="doi">10.1038/s41467-020-14998-3</citation>
    </citations>
</tool>
author	iuc
date	Fri, 30 Apr 2021 20:21:25 +0000
parents
children	3169a38c2656