Mercurial > repos > iuc > krocus

<tool id="krocus" name="krocus" version="@VERSION@">
    <description>Multi-locus sequence typing (MLST) from uncorrected long reads</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <xrefs>
        <xref type="bio.tools">krocus</xref>
    </xrefs>
    <expand macro="requirements" />
    <expand macro="version_command" />
    <command detect_errors="exit_code"><![CDATA[
        krocus
        --output_file detected_mlst.tsv
        #if $opt_args.filtered_reads
            --filtered_reads_file filtered_reads.fq
        #end if
        --max_gap $opt_args.max_gap
        --min_kmers_for_onex_pass $opt_args.min_kmers_for_onex_pass
        --margin $opt_args.margin
        --min_block_size $opt_args.min_block_size
        --min_fasta_hits $opt_args.min_fasta_hits
        --min_kmers_for_onex_pass $opt_args.min_kmers_for_onex_pass
        --max_kmers $opt_args.max_kmers
        --print_interval $opt_args.print_interval
        --kmer $opt_args.kmer
        $opt_args.divisible_by_3
        $opt_args.verbose
        '$krocus_mlst_databases.fields.path' '$fastq'
    ]]>
    </command>
    <inputs>
        <param name="fastq" type="data" format="fastqsanger,fastqsanger.gz" label="Select fastq dataset" help="Specify dataset with single read"/>
        <param label="Select a database" name="krocus_mlst_databases" type="select">
            <options from_data_table="krocus_mlst_databases">
                <validator message="No database is available" type="no_options" />
            </options>
        </param>
        <section name="opt_args" title="Optional Arguments">
            <param name="filtered_reads" type="boolean" truevalue="true" falsevalue="" checked="false" label="Save filtered reads ?"/>
            <param argument="--max_gap" type="integer" min="1" value="4" label="Maximum gap for blocks to be contigous, measured in
                        multiples of the k-mer size" help="Default:4" />
            <param argument="--margin" type="integer" min="1" value="10" label="Flanking region around a block to use for mapping" help="Default:10" />
            <param argument="--min_block_size" type="integer" min="1" value="150" label="Minimum block size in bases" help="Default:150" />
            <param argument="--min_fasta_hits" type="integer" min="1" value="10" label="Minimum No. of kmers matching a read" help="Default:10" />
            <param argument="--min_kmers_for_onex_pass" type="integer" min="2" value="5" label="Minimum No. of kmers matching a read in 1st pass" help="Default:5" />
            <param argument="--max_kmers" type="integer" min="1" value="10" label="Dont count kmers occuring more than this many times" help="Default:10" />
            <param argument="--print_interval" type="integer" min="1" value="10" label="Print ST every this number of reads" help="(Default:500)" />
            <param argument="--kmer" type="integer" min="2" value="10" label="k-mer size" help="(Default:11)" />
            <param argument="--divisible_by_3" type="boolean" truevalue="--divisible_by_3" falsevalue="" checked="false" label="Genes which are not divisible by 3 are excluded" help="(Default:No)" />
            <param argument="--verbose" type="boolean" truevalue="--verbose" falsevalue="" checked="false" label="Turn on debugging" help="(Default:No)" />
        </section>
    </inputs>

    <outputs>
        <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: Output" from_work_dir="detected_mlst.tsv"/>
        <data name="filtered_reads" format="fastq" label="${tool.name} on ${on_string}: Filtered reads" from_work_dir="filtered_reads.fq">
            <filter>opt_args['filtered_reads'] is True</filter>
        </data>
    </outputs>

    <tests>
        <test expect_num_outputs="1">
            <param name="fastq" value="pacbio.fastq.gz" ftype="fastqsanger" />
            <param name="krocus_mlst_databases" value="mlst_db"/>
            <output name="output_file" ftype="tabular" file="mlst_output_pacbio.tsv" compare="sim_size" />
        </test>
        <test expect_num_outputs="2">
            <param name="fastq" value="pacbio.fastq.gz" ftype="fastqsanger" />
            <param name="filtered_reads" value="True" />
            <param name="krocus_mlst_databases" value="mlst_db"/>
            <output name="filtered_reads" ftype="fastq" file="filtered_reads.fq" compare="sim_size" />
        </test>
    </tests>

    <help><![CDATA[
    Multi-locus sequence typing (MLST) from uncorrected long reads

    Krocus can predict a sequence type directly from uncorrected long reads, and
    which was designed to consume read data as it is produced, providing results
    in minutes. It is the only tool which can do this from uncorrected long
    reads. We tested Krocus on over 600 samples sequenced with using long read
    sequencing technologies from PacBio and Oxford Nanopore. It provides
    sequence types on average within 90 seconds, with a sensitivity of 94% and
    specificity of 97%, directly from uncorrected raw sequence reads.

    Documentation can be found at `<https://github.com/andrewjpage/krocus>`_.
    ]]>
    </help>
    <expand macro="citations"/>
</tool>