diff extract_kraken_reads.xml @ 0:d491c23394f9 draft default tip

"planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/krakentools"
author jvolkening
date Thu, 30 Sep 2021 17:54:31 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_kraken_reads.xml	Thu Sep 30 17:54:31 2021 +0000
@@ -0,0 +1,282 @@
+<tool id="krakentools_extract_kraken_reads" name="Extract Kraken Reads By ID" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="17.09">
+    <description>Extract reads that were classified by the Kraken family at specified taxonomic IDs</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <version_command>echo -n @TOOL_VERSION@</version_command>
+
+    <command detect_errors="exit_code"><![CDATA[
+
+#if $library.type == 'paired':
+    #set input_1 = $library.input_1
+    #set input_2 = $library.input_2
+#else if $library.type == 'paired_collection'
+    #set input_1 = $library.input_1.forward
+    #set input_2 = $library.input_1.reverse
+#else
+    #set input_1 = $library.input_1
+#end if
+
+## do not quote $taxid
+extract_kraken_reads.py
+
+    -k '$results'
+    -s '$input_1'
+    -o '$output_1'
+    --taxid $taxid 
+    --max '$max'
+    $include_parents
+    $include_children
+    $exclude
+    $fastq_output
+#if str( $library.type ) != "single":
+    -s2 '$input_2'
+    -o2 '$output_2'
+#end if
+#if $include_parents or $include_children:
+    --report $report
+#end if
+    
+    ]]></command>
+    <inputs>
+
+        <!-- Reads -->
+        <conditional name="library">
+            <param name="type" type="select" label="Single or paired reads?">
+                <option value="single">Single</option>
+                <option value="paired">Paired</option>
+                <option value="paired_collection">Paired Collection</option>
+            </param>
+
+            <when value="single">
+                <param name="input_1" format="fastq,fastqsanger,fasta" type="data" label="FASTQ/A file" help="FASTQ or FASTQ input reads" />
+            </when>
+
+            <when value="paired">
+                <param name="input_1" format="fastq,fastqsanger,fasta" type="data" label="FASTQ/A forward file" help="FASTQ or FASTQ input reads" />
+                <param name="input_2" format="fastq,fastqsanger,fasta" type="data" label="FASTQ/A reverse file" help="FASTQ or FASTQ input reads" />
+            </when>
+
+            <when value="paired_collection">
+                <param name="input_1" format="fastq,fastqsanger,fasta" type="data_collection" collection_type="paired" label="Paired Collection" help="FASTQ or FASTA read pair collection" />
+            </when>
+
+        </conditional>
+        <param name="results" argument="-k" format="tabular" type="data" label="Results" help="Results (classification) file from Kraken/KrakenUniq/Kraken2" />
+        <param name="report" argument="--report" format="tabular" type="data" label="Report" optional="True" help="Report file from Kraken/KrakenUniq/Kraken2" />
+
+        <param name="taxid" argument="--taxid" type="text" value="" label="Taxonomic ID(s) to match" help="Space-delimited list of taxonomic IDs for which to extract matching reads">
+            <validator type="regex" message="Enter a space-separated list of numeric tax IDs">^\d+[\d ]*$</validator>
+        </param>
+        <param name="max" argument="--max" type="integer" value="100000000" min="1" label="Maximum reads to save" help="Maximum number of reads to save for each ID" />
+        <param name="exclude" argument="--exclude" type="boolean" value="False" truevalue="--exclude" falsevalue="" label="Invert output" help="Instead of finding reads that match given taxonomic IDs, find all reads that DO NOT match given IDs" />
+        <param name="fastq_output" argument="--fastq-output" type="boolean" value="False" truevalue="--fastq-output" falsevalue="" label="Output as FASTQ" help="Write output as FASTQ instead of the default FASTA" />
+        <param name="include_parents" argument="--include-parents" type="boolean" value="False" truevalue="--include-parents" falsevalue="" label="Include parents" help="Include reads classified at parent levels of the specified tax IDs" />
+        <param name="include_children" argument="--include-children" type="boolean" value="False" truevalue="--include-children" falsevalue="" label="Include children" help="Include reads classified more specifically than the specified tax IDs" />
+
+    </inputs>
+
+    <outputs>
+        <data name="output_1" format="fasta" metadata_source="input_1" label="${tool.name} on ${on_string}: forward reads">
+            <change_format>
+                <when input="fastq_output" value="True" format="fastqsanger" />
+            </change_format>
+        </data>
+        <data name="output_2" format="fasta" metadata_source="input_2" label="${tool.name} on ${on_string}: reverse reads" >
+            <filter>(library['type'] == 'paired' or library['type'] == 'paired_collection')</filter>
+            <change_format>
+                <when input="fastq_output" value="True" format="fastqsanger" />
+            </change_format>
+        </data>
+    </outputs>
+
+    <tests>
+        <!-- test Kraken2 input, single input -->
+        <test>
+            <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/>
+            <param name="library|type" value="single"/>
+            <param name="results" value="kraken2.results" ftype="tabular"/>
+            <param name="taxid" value="11176"/>
+            <output name="output_1" file="out1.k2.11176.fa"/>
+        </test>
+        <!-- test paired input -->
+        <test>
+            <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/>
+            <param name="input_2" value="R2.fq.gz" ftype="fastqsanger"/>
+            <param name="library|type" value="paired"/>
+            <param name="results" value="kraken2.results" ftype="tabular"/>
+            <param name="taxid" value="11176"/>
+            <output name="output_1" file="out1.k2.11176.fa"/>
+            <output name="output_2" file="out2.k2.11176.fa"/>
+        </test>
+        <!-- test paired collection input -->
+        <test>
+            <param name="input_1">
+                <collection type="paired">
+                    <element name="forward" value="R1.fq.gz" ftype="fastqsanger"/>
+                    <element name="reverse" value="R2.fq.gz" ftype="fastqsanger"/>
+                </collection>
+            </param>
+            <param name="library|type" value="paired_collection"/>
+            <param name="results" value="kraken2.results" ftype="tabular"/>
+            <param name="taxid" value="11176"/>
+            <output name="output_1" file="out1.k2.11176.fa"/>
+            <output name="output_2" file="out2.k2.11176.fa"/>
+        </test>
+        <!-- test Kraken1 input, include children -->
+        <test>
+            <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/>
+            <param name="library|type" value="single"/>
+            <param name="results" value="kraken1.results" ftype="tabular"/>
+            <param name="report" value="kraken1.report" ftype="tabular"/>
+            <param name="taxid" value="11176"/>
+            <param name="include_children" value="True"/>
+            <output name="output_1" file="out1.k1.11176.children.fa"/>
+        </test>
+        <!-- test exclude -->
+        <test>
+            <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/>
+            <param name="library|type" value="single"/>
+            <param name="results" value="kraken1.results" ftype="tabular"/>
+            <param name="report" value="kraken1.report" ftype="tabular"/>
+            <param name="taxid" value="10386"/>
+            <param name="include_children" value="True"/>
+            <param name="exclude" value="True"/>
+            <output name="output_1" file="out1.k1.e10386.children.fa"/>
+        </test>
+        <!-- test max -->
+        <test>
+            <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/>
+            <param name="library|type" value="single"/>
+            <param name="results" value="kraken2.results" ftype="tabular"/>
+            <param name="taxid" value="11176"/>
+            <param name="max" value="2"/>
+            <output name="output_1" file="out1.k2.11176.max2.fa"/>
+        </test>
+        <!-- test include parents -->
+        <test>
+            <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/>
+            <param name="library|type" value="single"/>
+            <param name="results" value="kraken2.results" ftype="tabular"/>
+            <param name="taxid" value="11176"/>
+            <param name="include_parents" value="True"/>
+            <param name="report" value="kraken2.report" ftype="tabular"/>
+            <output name="output_1" file="out1.k2.11176.parents.fa"/>
+        </test>
+        <!-- test multiple tax IDs-->
+        <test>
+            <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/>
+            <param name="library|type" value="single"/>
+            <param name="results" value="kraken2.results" ftype="tabular"/>
+            <param name="taxid" value="10386 11176"/>
+            <param name="exclude" value="True"/>
+            <param name="include_parents" value="True"/>
+            <param name="report" value="kraken2.report" ftype="tabular"/>
+            <output name="output_1" file="out1.k2.exclude_both.fa"/>
+        </test>
+        <!-- test multiple tax IDs-->
+        <test expect_failure="True">
+            <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/>
+            <param name="library|type" value="single"/>
+            <param name="results" value="kraken2.results" ftype="tabular"/>
+            <param name="taxid" value="10386 f5"/>
+        </test>
+        <!-- test FASTQ output -->
+        <test>
+            <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/>
+            <param name="library|type" value="single"/>
+            <param name="results" value="kraken2.results" ftype="tabular"/>
+            <param name="taxid" value="11176"/>
+            <param name="fastq_output" value="True"/>
+            <output name="output_1" file="out1.k2.11176.fq"/>
+        </test>
+
+    </tests>
+
+    <help><![CDATA[
+
+.. class:: infomark
+
+**What it does**
+
+-------------------
+
+After running Kraken, Kraken2, or KrakenUniq, users may use the
+`extract_kraken_reads.py` program to extract the FASTA or FASTQ reads
+classified as a specific taxonomy ID. For example, this program can be used to
+extract all bacterial reads or only reads assigned to Escherichia coli. Users
+must provide (at minimum) the original sequence file(s), at least one taxonomy
+ID, and the Kraken output file.
+
+-------------------
+
+**Command-line arguments**
+
+-------------------
+
+The following command-line usage corresponds with the Galaxy wrapper
+parameters::
+
+    usage: extract_kraken_reads.py [-h] -k KRAKEN_FILE -s SEQ_FILE1
+                                [-s2 SEQ_FILE2] -t TAXID [TAXID ...] -o
+                                OUTPUT_FILE [-o2 OUTPUT_FILE2] [--append]
+                                [--noappend] [--max MAX_READS] [-r REPORT_FILE]
+                                [--include-parents] [--include-children]
+                                [--exclude] [--fastq-output]
+
+    optional arguments:
+    -h, --help            show this help message and exit
+    -k KRAKEN_FILE        Kraken output file to parse
+    -s SEQ_FILE1, -s1 SEQ_FILE1, -1 SEQ_FILE1, -U SEQ_FILE1
+                            FASTA/FASTQ File containing the raw sequence letters.
+    -s2 SEQ_FILE2, -2 SEQ_FILE2
+                            2nd FASTA/FASTQ File containing the raw sequence
+                            letters (paired).
+    -t TAXID [TAXID ...], --taxid TAXID [TAXID ...]
+                            Taxonomy ID[s] of reads to extract (space-delimited)
+    -o OUTPUT_FILE, --output OUTPUT_FILE
+                            Output FASTA/Q file containing the reads and sample
+                            IDs
+    -o2 OUTPUT_FILE2, --output2 OUTPUT_FILE2
+                            Output FASTA/Q file containig the second pair of reads
+                            [required for paired input]
+    --max MAX_READS       Maximum number of reads to save [default: 100,000,000]
+    -r REPORT_FILE, --report REPORT_FILE
+                            Kraken report file. [required only if --include-
+                            parents/children is specified]
+    --include-parents     Include reads classified at parent levels of the
+                            specified taxids
+    --include-children    Include reads classified more specifically than the
+                            specified taxids
+    --exclude             Instead of finding reads matching specified taxids,
+                            finds all reads NOT matching specified taxids
+    --fastq-output        Print output FASTQ reads [requires input FASTQ,
+                            default: output is FASTA]
+
+--------------------
+
+**More Information**
+
+--------------------
+
+Author:  Jennifer Lu
+
+See the `online documentation`_
+
+.. _`online documentation`: https://ccb.jhu.edu/software/krakentools/index.shtml?t=extractreads
+
+--------------------
+
+**Galaxy Wrapper Development**
+
+--------------------
+
+Author: Jeremy Volkening
+
+    ]]></help>
+
+    <expand macro="citations" />
+
+</tool>