view extract_kraken_reads.xml @ 0:d491c23394f9 draft default tip

"planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/krakentools"
author jvolkening
date Thu, 30 Sep 2021 17:54:31 +0000
parents
children
line wrap: on
line source

<tool id="krakentools_extract_kraken_reads" name="Extract Kraken Reads By ID" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="17.09">
    <description>Extract reads that were classified by the Kraken family at specified taxonomic IDs</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <expand macro="stdio" />
    <version_command>echo -n @TOOL_VERSION@</version_command>

    <command detect_errors="exit_code"><![CDATA[

#if $library.type == 'paired':
    #set input_1 = $library.input_1
    #set input_2 = $library.input_2
#else if $library.type == 'paired_collection'
    #set input_1 = $library.input_1.forward
    #set input_2 = $library.input_1.reverse
#else
    #set input_1 = $library.input_1
#end if

## do not quote $taxid
extract_kraken_reads.py

    -k '$results'
    -s '$input_1'
    -o '$output_1'
    --taxid $taxid 
    --max '$max'
    $include_parents
    $include_children
    $exclude
    $fastq_output
#if str( $library.type ) != "single":
    -s2 '$input_2'
    -o2 '$output_2'
#end if
#if $include_parents or $include_children:
    --report $report
#end if
    
    ]]></command>
    <inputs>

        <!-- Reads -->
        <conditional name="library">
            <param name="type" type="select" label="Single or paired reads?">
                <option value="single">Single</option>
                <option value="paired">Paired</option>
                <option value="paired_collection">Paired Collection</option>
            </param>

            <when value="single">
                <param name="input_1" format="fastq,fastqsanger,fasta" type="data" label="FASTQ/A file" help="FASTQ or FASTQ input reads" />
            </when>

            <when value="paired">
                <param name="input_1" format="fastq,fastqsanger,fasta" type="data" label="FASTQ/A forward file" help="FASTQ or FASTQ input reads" />
                <param name="input_2" format="fastq,fastqsanger,fasta" type="data" label="FASTQ/A reverse file" help="FASTQ or FASTQ input reads" />
            </when>

            <when value="paired_collection">
                <param name="input_1" format="fastq,fastqsanger,fasta" type="data_collection" collection_type="paired" label="Paired Collection" help="FASTQ or FASTA read pair collection" />
            </when>

        </conditional>
        <param name="results" argument="-k" format="tabular" type="data" label="Results" help="Results (classification) file from Kraken/KrakenUniq/Kraken2" />
        <param name="report" argument="--report" format="tabular" type="data" label="Report" optional="True" help="Report file from Kraken/KrakenUniq/Kraken2" />

        <param name="taxid" argument="--taxid" type="text" value="" label="Taxonomic ID(s) to match" help="Space-delimited list of taxonomic IDs for which to extract matching reads">
            <validator type="regex" message="Enter a space-separated list of numeric tax IDs">^\d+[\d ]*$</validator>
        </param>
        <param name="max" argument="--max" type="integer" value="100000000" min="1" label="Maximum reads to save" help="Maximum number of reads to save for each ID" />
        <param name="exclude" argument="--exclude" type="boolean" value="False" truevalue="--exclude" falsevalue="" label="Invert output" help="Instead of finding reads that match given taxonomic IDs, find all reads that DO NOT match given IDs" />
        <param name="fastq_output" argument="--fastq-output" type="boolean" value="False" truevalue="--fastq-output" falsevalue="" label="Output as FASTQ" help="Write output as FASTQ instead of the default FASTA" />
        <param name="include_parents" argument="--include-parents" type="boolean" value="False" truevalue="--include-parents" falsevalue="" label="Include parents" help="Include reads classified at parent levels of the specified tax IDs" />
        <param name="include_children" argument="--include-children" type="boolean" value="False" truevalue="--include-children" falsevalue="" label="Include children" help="Include reads classified more specifically than the specified tax IDs" />

    </inputs>

    <outputs>
        <data name="output_1" format="fasta" metadata_source="input_1" label="${tool.name} on ${on_string}: forward reads">
            <change_format>
                <when input="fastq_output" value="True" format="fastqsanger" />
            </change_format>
        </data>
        <data name="output_2" format="fasta" metadata_source="input_2" label="${tool.name} on ${on_string}: reverse reads" >
            <filter>(library['type'] == 'paired' or library['type'] == 'paired_collection')</filter>
            <change_format>
                <when input="fastq_output" value="True" format="fastqsanger" />
            </change_format>
        </data>
    </outputs>

    <tests>
        <!-- test Kraken2 input, single input -->
        <test>
            <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/>
            <param name="library|type" value="single"/>
            <param name="results" value="kraken2.results" ftype="tabular"/>
            <param name="taxid" value="11176"/>
            <output name="output_1" file="out1.k2.11176.fa"/>
        </test>
        <!-- test paired input -->
        <test>
            <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/>
            <param name="input_2" value="R2.fq.gz" ftype="fastqsanger"/>
            <param name="library|type" value="paired"/>
            <param name="results" value="kraken2.results" ftype="tabular"/>
            <param name="taxid" value="11176"/>
            <output name="output_1" file="out1.k2.11176.fa"/>
            <output name="output_2" file="out2.k2.11176.fa"/>
        </test>
        <!-- test paired collection input -->
        <test>
            <param name="input_1">
                <collection type="paired">
                    <element name="forward" value="R1.fq.gz" ftype="fastqsanger"/>
                    <element name="reverse" value="R2.fq.gz" ftype="fastqsanger"/>
                </collection>
            </param>
            <param name="library|type" value="paired_collection"/>
            <param name="results" value="kraken2.results" ftype="tabular"/>
            <param name="taxid" value="11176"/>
            <output name="output_1" file="out1.k2.11176.fa"/>
            <output name="output_2" file="out2.k2.11176.fa"/>
        </test>
        <!-- test Kraken1 input, include children -->
        <test>
            <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/>
            <param name="library|type" value="single"/>
            <param name="results" value="kraken1.results" ftype="tabular"/>
            <param name="report" value="kraken1.report" ftype="tabular"/>
            <param name="taxid" value="11176"/>
            <param name="include_children" value="True"/>
            <output name="output_1" file="out1.k1.11176.children.fa"/>
        </test>
        <!-- test exclude -->
        <test>
            <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/>
            <param name="library|type" value="single"/>
            <param name="results" value="kraken1.results" ftype="tabular"/>
            <param name="report" value="kraken1.report" ftype="tabular"/>
            <param name="taxid" value="10386"/>
            <param name="include_children" value="True"/>
            <param name="exclude" value="True"/>
            <output name="output_1" file="out1.k1.e10386.children.fa"/>
        </test>
        <!-- test max -->
        <test>
            <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/>
            <param name="library|type" value="single"/>
            <param name="results" value="kraken2.results" ftype="tabular"/>
            <param name="taxid" value="11176"/>
            <param name="max" value="2"/>
            <output name="output_1" file="out1.k2.11176.max2.fa"/>
        </test>
        <!-- test include parents -->
        <test>
            <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/>
            <param name="library|type" value="single"/>
            <param name="results" value="kraken2.results" ftype="tabular"/>
            <param name="taxid" value="11176"/>
            <param name="include_parents" value="True"/>
            <param name="report" value="kraken2.report" ftype="tabular"/>
            <output name="output_1" file="out1.k2.11176.parents.fa"/>
        </test>
        <!-- test multiple tax IDs-->
        <test>
            <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/>
            <param name="library|type" value="single"/>
            <param name="results" value="kraken2.results" ftype="tabular"/>
            <param name="taxid" value="10386 11176"/>
            <param name="exclude" value="True"/>
            <param name="include_parents" value="True"/>
            <param name="report" value="kraken2.report" ftype="tabular"/>
            <output name="output_1" file="out1.k2.exclude_both.fa"/>
        </test>
        <!-- test multiple tax IDs-->
        <test expect_failure="True">
            <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/>
            <param name="library|type" value="single"/>
            <param name="results" value="kraken2.results" ftype="tabular"/>
            <param name="taxid" value="10386 f5"/>
        </test>
        <!-- test FASTQ output -->
        <test>
            <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/>
            <param name="library|type" value="single"/>
            <param name="results" value="kraken2.results" ftype="tabular"/>
            <param name="taxid" value="11176"/>
            <param name="fastq_output" value="True"/>
            <output name="output_1" file="out1.k2.11176.fq"/>
        </test>

    </tests>

    <help><![CDATA[

.. class:: infomark

**What it does**

-------------------

After running Kraken, Kraken2, or KrakenUniq, users may use the
`extract_kraken_reads.py` program to extract the FASTA or FASTQ reads
classified as a specific taxonomy ID. For example, this program can be used to
extract all bacterial reads or only reads assigned to Escherichia coli. Users
must provide (at minimum) the original sequence file(s), at least one taxonomy
ID, and the Kraken output file.

-------------------

**Command-line arguments**

-------------------

The following command-line usage corresponds with the Galaxy wrapper
parameters::

    usage: extract_kraken_reads.py [-h] -k KRAKEN_FILE -s SEQ_FILE1
                                [-s2 SEQ_FILE2] -t TAXID [TAXID ...] -o
                                OUTPUT_FILE [-o2 OUTPUT_FILE2] [--append]
                                [--noappend] [--max MAX_READS] [-r REPORT_FILE]
                                [--include-parents] [--include-children]
                                [--exclude] [--fastq-output]

    optional arguments:
    -h, --help            show this help message and exit
    -k KRAKEN_FILE        Kraken output file to parse
    -s SEQ_FILE1, -s1 SEQ_FILE1, -1 SEQ_FILE1, -U SEQ_FILE1
                            FASTA/FASTQ File containing the raw sequence letters.
    -s2 SEQ_FILE2, -2 SEQ_FILE2
                            2nd FASTA/FASTQ File containing the raw sequence
                            letters (paired).
    -t TAXID [TAXID ...], --taxid TAXID [TAXID ...]
                            Taxonomy ID[s] of reads to extract (space-delimited)
    -o OUTPUT_FILE, --output OUTPUT_FILE
                            Output FASTA/Q file containing the reads and sample
                            IDs
    -o2 OUTPUT_FILE2, --output2 OUTPUT_FILE2
                            Output FASTA/Q file containig the second pair of reads
                            [required for paired input]
    --max MAX_READS       Maximum number of reads to save [default: 100,000,000]
    -r REPORT_FILE, --report REPORT_FILE
                            Kraken report file. [required only if --include-
                            parents/children is specified]
    --include-parents     Include reads classified at parent levels of the
                            specified taxids
    --include-children    Include reads classified more specifically than the
                            specified taxids
    --exclude             Instead of finding reads matching specified taxids,
                            finds all reads NOT matching specified taxids
    --fastq-output        Print output FASTQ reads [requires input FASTQ,
                            default: output is FASTA]

--------------------

**More Information**

--------------------

Author:  Jennifer Lu

See the `online documentation`_

.. _`online documentation`: https://ccb.jhu.edu/software/krakentools/index.shtml?t=extractreads

--------------------

**Galaxy Wrapper Development**

--------------------

Author: Jeremy Volkening

    ]]></help>

    <expand macro="citations" />

</tool>