Mercurial > repos > iuc > trycycler_subsample

<tool id='trycycler_subsample' name='Trycycler subsample' version='@TOOL_VERSION@' profile='20.01'>
    <description>make a maximally-independent read subsets of an appropiate depth for your genome</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro='edam_ontology' />
    <expand macro='requirements' />
    <version_command>trycycler --version</version_command>
    <command detect_errors='exit_code'><![CDATA[
        mkdir -p output_subsamples &&
        trycycler subsample
            --reads '${reads}'
            --count $count
            #if $option.genome == 'true'
                --genome_size $option.genome_size
            #end if
            --min_read_depth $min_read_depth
            --threads \${GALAXY_SLOTS:-2}
            --out_dir output_subsamples
    ]]>    </command>
    <inputs>
        <param name='reads' type='data' format='fastq,fastq.gz' label='Long-read dataset' help='--reads' />
        <param argument='--count' type='integer' min='2' max='20' value='12' label='Number of subsampled reads set to output' help='The number of subsamples reads to make maximally-independent read subsets. The default is 12, a good number for most cases.' />
        <conditional name='option'>
            <param name='genome' type='select' label='Provide an estimated genomoe size' help='If you do not provide this, Trycycler subsample will run miniasm to quickly assemble your read set to get a size. This value is used to calculate read depths and does not need to be exact, e.g. 10% error is fine.' >
                <option value='true'>Provide estimeted genome size</option>
                <option value='false'>Run miniasm for estimating genome size</option>
            </param>
            <when value='false' />
            <when value='true'>
                <param argument='--genome_size' type='integer' min='10000' max='15000000' value='30000' label='Estimated total genome size' help='An estimate of the isolate total genome size.' />
            </when>
        </conditional>
        <param argument='--min_read_depth' type='integer' min='15' max='120' value='25' label='Minimum subset read depth' help='This is the minimum allowed read depth and also controls the subsampled depths. If is not provided, Trycycle decides what read depth each subset will be.' />
    </inputs>
    <outputs>
        <collection name='subsamples_fastq' type='list'>
            <discover_datasets pattern='__designation_and_ext__' format='fastq' directory='output_subsamples'  />
        </collection>
    </outputs>
    <tests>
        <test>
            <param name='reads' value='reads.fastq.gz' />
            <conditional name='option'>
                <param name='genome' value='true' />
                <param name='genome_size' value='10000' />
            </conditional>
            <param name='min_read_depth' value='20' />
            <param name='count' value='4' />
            <output_collection name='subsamples_fastq' type='list' count='4'>
                <element name='sample_01' file='sample_01_01.fastq' ftype='fastq' />
                <element name='sample_02' file='sample_02_01.fastq' ftype='fastq' />
                <element name='sample_03' file='sample_03_01.fastq' ftype='fastq' />
                <element name='sample_04' file='sample_04_01.fastq' ftype='fastq' />
            </output_collection>
        </test>
        <test>
            <param name='reads' value='reads.fastq.gz' />
            <conditional name='option'>
                <param name='genome' value='true' />
                <param name='genome_size' value='10000' />
            </conditional>
            <param name='min_read_depth' value='22' />
            <param name='count' value='6' />
            <output_collection name='subsamples_fastq' type='list' count='6'>
                <element name='sample_01' file='sample_01_02.fastq' ftype='fastq' />
                <element name='sample_02' file='sample_02_02.fastq' ftype='fastq' />
                <element name='sample_03' file='sample_03_02.fastq' ftype='fastq' />
                <element name='sample_04' file='sample_04_02.fastq' ftype='fastq' />
                <element name='sample_05' file='sample_05_02.fastq' ftype='fastq' />
                <element name='sample_06' file='sample_06_02.fastq' ftype='fastq' />
            </output_collection>
        </test>
        <test>
            <param name='reads' value='reads.fastq.gz' />
            <conditional name='option'>
                <param name='genome' value='true' />
                <param name='genome_size' value='10000' />
            </conditional>
            <param name='min_read_depth' value='25' />
            <param name='count' value='5' />
            <output_collection name='subsamples_fastq' type='list' count='5'>
                <element name='sample_01' file='sample_01_03.fastq' ftype='fastq' />
                <element name='sample_02' file='sample_02_03.fastq' ftype='fastq' />
                <element name='sample_03' file='sample_03_03.fastq' ftype='fastq' />
                <element name='sample_04' file='sample_04_03.fastq' ftype='fastq' />
                <element name='sample_05' file='sample_05_03.fastq' ftype='fastq' />
            </output_collection>
        </test>
        <test>
            <param name='reads' value='reads.fastq.gz' />
            <conditional name='option'>
                <param name='genome' value='true' />
                <param name='genome_size' value='10000' />
            </conditional>
            <param name='min_read_depth' value='30' />
            <param name='count' value='12' />
            <output_collection name='subsamples_fastq' type='list' count='12'>
                <element name='sample_01' file='sample_01_04.fastq' ftype='fastq' />
                <element name='sample_02' file='sample_02_04.fastq' ftype='fastq' />
                <element name='sample_03' file='sample_03_04.fastq' ftype='fastq' />
                <element name='sample_04' file='sample_04_04.fastq' ftype='fastq' />
                <element name='sample_05' file='sample_05_04.fastq' ftype='fastq' />
                <element name='sample_06' file='sample_06_04.fastq' ftype='fastq' />
                <element name='sample_07' file='sample_07_04.fastq' ftype='fastq' />
                <element name='sample_08' file='sample_08_04.fastq' ftype='fastq' />
                <element name='sample_09' file='sample_09_04.fastq' ftype='fastq' />
                <element name='sample_10' file='sample_10_04.fastq' ftype='fastq' />
                <element name='sample_11' file='sample_11_04.fastq' ftype='fastq' />
                <element name='sample_12' file='sample_12_04.fastq' ftype='fastq' />
            </output_collection>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**Purpose**

The *Trycycler subsample* tool tries to make maximally-independent read subsets of an appropriate depth for your genome. The goal is to use each subsample for generating varios assemblies by using different assemblers. It will also be handy to know the approximate size of your genome. If you're assembling something novel (i.e. you don't know the genome size), you could do a quick initial assembly at this point to get an estimate.

----

.. class:: infomark

**Input**

This tool requires a long-read dataset. Ideally your reads should have a coverage of at least 100x.


----

.. class:: infomark

**Output**

The result is a number of subsampled read sets, whose number is determined by the *count* parameter.

----

.. class:: infomark

@PIPELINE@
    ]]>    </help>
    <expand macro='citations' />
</tool>
author	iuc
date	Tue, 02 Apr 2024 08:31:35 +0000
parents	383bb5ad32e2
children