view artic_guppyplex.xml @ 5:311d11c4a68b draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/artic commit 9fe39fd1ad0c1a5af9e8d2d633fd3e7404adbac7
author iuc
date Mon, 24 Feb 2025 13:54:49 +0000
parents 76d6358f71b7
children
line wrap: on
line source

<tool id="artic_guppyplex" name="ARTIC guppyplex" version="@TOOL_VERSION@+galaxy0" profile="20.09">
    <description>Filter Nanopore reads by read length and (optionally) quality</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code">
    <![CDATA[
        mkdir inputs &&

        ## Note about compression handling in the following:
        ## guppyplex use mimetypes.guess_type to guess compression so
        ## it's important to get the suffix of the inputs right.
        ## Even if it detects compressed input, it will write uncompressed
        ## output so we need to handle output compression separately.

        ## symlink input files to appropriate names in the inputs/ directory
        bash prepare_inputs.sh &&
        #if str($input.structure) == 'one_to_one':
            #set $compressed = $input.reads.is_of_type("fastq.gz", "fastqsanger.gz")
        #else:
            #set $compressed = next(iter($input.reads)).is_of_type("fastq.gz", "fastqsanger.gz")
        #end if

        artic guppyplex 
            --min-length $min_length 
            --max-length $max_length
        #if $min_quality == 0:
            --skip-quality-check
        #else:
            --quality $min_quality
        #end if
            --directory inputs/
            --output guppyplex_out.fastq
        #if $compressed:
            && gzip guppyplex_out.fastq
        #end if
    ]]>
    </command>
    <configfiles>
        <configfile filename="prepare_inputs.sh"><![CDATA[
            #if str($input.structure) == 'one_to_one':
ln -s '$input.reads' inputs/1.${input.reads.ext}
            #else:
                #for $i, $elem in enumerate($input.reads):
ln -s '$elem' inputs/${i}.${elem.ext} &&
                #end for
:
            #end if
        ]]>
        </configfile>
    </configfiles>
    <inputs>
        <conditional name="input">
            <param name="structure" type="select"
            label="Structure of your input data"
            help="">
                <option value="one_to_one">One input dataset per sample</option>
                <option value="one_to_many">Multiple input datasets per sample</option>
            </param>
            <when value="one_to_one">
                <param name="reads" type="data" format="@FASTQ_FORMATS@"
                label="Sequencing dataset(s) - one per sample" />
            </when>
            <when value="one_to_many">
                <param name="reads" multiple="true" type="data" format="@FASTQ_FORMATS@"
                label="Partial sequencing datasets for your sample"
                help="Multiple datasets selected here will get combined into a single output for a single assumed sample. Select a nested list to have its inner lists interpreted as data from one sample each and to obtain one output per inner list." />
            </when>
        </conditional>
        <param name="max_length" type="integer" value="700" min="1" label="Maximum read length" help="Remove reads greater than this number of base pairs" />
        <param name="min_length" type="integer" value="400" min="1" label="Minimum read length" help="Remove reads less than this number of base pairs" />
        <param name="min_quality"
        optional="true" type="integer" min="0" value="7"
        label="Minimum read quality"
        help="Remove reads with average quality lower than this number. Set to 0 to skip the quality check." />
    </inputs>
    <outputs>
        <data name="output" format_source="reads" from_work_dir="guppyplex_out.fastq*" />
    </outputs>
    <tests>
        <test>
            <conditional name="input">
                <param name="structure" value="one_to_one" />
                <param name="reads" value="test.fastq.gz" />
            </conditional>
            <output name="output">
                <assert_contents>
                    <has_size value="72300" delta="1000" />
                </assert_contents>
            </output>
        </test>
        <test>
            <conditional name="input">
                <param name="structure" value="one_to_many" />
                <param name="reads" value="test.fastq.gz,test.fastq.gz" />
            </conditional>
            <output name="output">
                <assert_contents>
                    <has_size value="72300" delta="200" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
        The ARTIC_ guppyplex tool filters reads by length and (optionally) quality.
        This filter is typically used as a pre-processing step in the processing
        of amplicon sequencing Nanopore reads, where a size-based filter can
        be used to remove possibly-chimeric reads.
        
        The default paramters of the tool (minimum length of 400 and maximum of 700)
        are based on the ARTIC amplicon scheme. If used with a different amplicon
        scheme they should be adjusted to use the minimum length of an amplicon as
        the minimum length and the maximum length of an amplicon plus 200 as the
        maximum length.

        The tool can also be used simultaneously to gather partial fastq
        datasets into single datasets per sample.

        .. _ARTIC: https://artic.readthedocs.io/en/latest/
    ]]></help>
    <expand macro="citations" />
</tool>