view kodoja_search.xml @ 0:18fe33eb7775 draft

Uploaded kodoja_search.py v0.0.3 wrapper. https://github.com/abaizan/kodoja_galaxy/commit/55004d41a9c0750b2543f394594ee58cc4426609
author peterjc
date Wed, 14 Mar 2018 12:46:57 -0400
parents
children 4554fcd4ef6d
line wrap: on
line source

<tool id="kodoja_search" name="Kodoja database search" version="0.0.0">
    <description>identify viruses from plant RNA sequencing data</description>
    <requirements>
        <requirement type="package" version="0.0.3">kodoja</requirement>
    </requirements>
    <version_command>kodoja_search.py --version</version_command>
    <command detect_errors="exit_code"><![CDATA[
kodoja_search.py

--kraken_db '${kraken_db.fields.path}'
--kaiju_db '${kaiju_db.fields.path}'

#if $single_paired.single_paired_selector == 'yes'
    #if $forward_input.is_of_type( 'fastq' ):
        --data_format fastq
    #else:
        --data_format fasta
    #end if
    --read1 '${single_paired.forward_input}'
    --read2 '${single_paired.reverse_input}'
#else:
    #if $single_paired.input_sequences.is_of_type('fastq')
        --data_format fastq
    #else:
        --data_format fasta
    #end if
    --read1 '${single_paired.input_sequences}'
#end if

## TODO:
## -m min_trim
## -a trim_adapt
## -q kraken_quick
## -p kraken_preload
## -c kaiju_score
## -l kaiju_minlen
## -i kaiju_mismatch

## We'll capture predictably named output files from here:
-o .
&&
mv ./virus_table.txt '$combined_table'
]]></command>
    <inputs>
        <param label="Select a Kraken database" name="kraken_db" type="select">
            <options from_data_table="kraken_databases">
                <validator message="No Kraken database is available" type="no_options" />
            </options>
        </param>
        <param label="Select a Kaiju database" name="kaiju_db" type="select">
            <options from_data_table="kaiju_databases">
                <validator message="No Kaiju database is available" type="no_options" />
            </options>
        </param>
        <conditional name="single_paired">
            <param name="single_paired_selector" type="select" label="Single or paired reads" help="--paired">
                <!-- TODO?
                <option value="collection">Collection</option>
                -->
                <option value="yes">Paired</option>
                <option selected="True" value="no">Single</option>
            </param>
            <when value="yes">
                <param format="fasta,fastq" name="forward_input" type="data" label="Forward strand" help="FASTA or FASTQ dataset"/>
                <param format="fasta,fastq" name="reverse_input" type="data" label="Reverse strand" help="FASTA or FASTQ dataset"/>
            </when>
            <when value="no">
                <param format="fasta,fastq" label="Input sequences" name="input_sequences" type="data" help="FASTA or FASTQ datasets"/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="combined_table" format="tabular" label="Kodoja species report for ${on_string}" />
    </outputs>
    <tests>
        <test>
            <param name="kraken_db" value="kraken3viruses" />
            <param name="kaiju_db" value="kaiju3viruses" />
            <param name="single_paired_selector" value="no" />
            <param name="input_sequences" value="testData_1.fastq" ftype="fastq" />
            <output name="combined_table" file="virus_table_SE_fastq.tabular" ftype="tabular" />
        </test>
        <test>
            <param name="kraken_db" value="kraken3viruses" />
            <param name="kaiju_db" value="kaiju3viruses" />
            <param name="single_paired_selector" value="yes" />
            <param name="forward_input" value="testData_1.fastq" ftype="fastq" />
            <param name="reverse_input" value="testData_2.fastq" ftype="fastq" />
            <output name="combined_table" file="virus_table_PE_fastq.tabular" ftype="tabular" />
        </test>
        <test>
            <param name="kraken_db" value="kraken3viruses" />
            <param name="kaiju_db" value="kaiju3viruses" />
            <param name="single_paired_selector" value="yes" />
            <param name="forward_input" value="testData_1.fasta" ftype="fasta" />
            <param name="reverse_input" value="testData_2.fasta" ftype="fasta" />
            <output name="combined_table" file="virus_table_PE_fasta.tabular" ftype="tabular" />
        </test>
    </tests>
    <help><![CDATA[
Kodoja is a tool intended to identify viral sequences in a
FASTQ/FASTA sequencing run by matching them against both
Kraken and Kaiju databases.

The main output is a five column tab-separated table as follows
(tabular format in Galaxy):

1. Species name
2. Species NCBI taxonomy identifier (TaxID)
3. Number of reads assigned by *either* Kraken or Kaiju to this species
4. Number of Reads assigned by *both* Kraken and Kaiju to this species
5. Genus name
6. Number of reads assigned by *either* Kraken or Kaiju to this genus
7. Number of reads assigned by *both* Kraken and Kaiju to this genus

For example,

================================== ============= ================= ============================= ========== =============== ===========================
Species                            Species TaxID Species sequences Species sequences (stringent) Genus      Genus sequences Genus sequences (stringent)
---------------------------------- ------------- ----------------- ----------------------------- ---------- --------------- ---------------------------
Cassava brown streak virus                137758                45                            45 Ipomovirus
Ugandan cassava brown streak virus        946046                28                            28 Ipomovirus
Tobacco etch virus                         12227                21                            19 Potyvirus
================================== ============= ================= ============================= ========== =============== ===========================

This is the command line tool's help::

    usage: kodoja_search.py [-h] [--version] -o OUTPUT_DIR -d1 KRAKEN_DB -d2
    KAIJU_DB -r1 READ1 [-r2 READ2] [-f DATA_FORMAT]
    [-t THREADS] [-s] [-m TRIM_MINLEN] [-a TRIM_ADAPT]
    [-q KRAKEN_QUICK] [-p] [-c KAIJU_SCORE]
    [-l KAIJU_MINLEN] [-i KAIJU_MISMATCH]

    Kodoja

    optional arguments:
      -h, --help            show this help message and exit
      --version             show program's version number and exit
      -o OUTPUT_DIR, --output_dir OUTPUT_DIR
                            Output directory path, required
      -d1 KRAKEN_DB, --kraken_db KRAKEN_DB
                            Kraken database path, required
      -d2 KAIJU_DB, --kaiju_db KAIJU_DB
                            Kaiju database path, required
      -r1 READ1, --read1 READ1
                            Read 1 file path, required
      -r2 READ2, --read2 READ2
                            Read 2 file path
      -f DATA_FORMAT, --data_format DATA_FORMAT
                            Sequence data format
      -t THREADS, --threads THREADS
                            Number of threads
      -s, --host_subset     Subset host sequences before Kaiju
      -m TRIM_MINLEN, --trim_minlen TRIM_MINLEN
                            Trimmomatic minimum length
      -a TRIM_ADAPT, --trim_adapt TRIM_ADAPT
                            Illumina adapter sequence file
      -q KRAKEN_QUICK, --kraken_quick KRAKEN_QUICK
                            Number of minium hits by Kraken
      -p, --kraken_preload  Kraken preload database
      -c KAIJU_SCORE, --kaiju_score KAIJU_SCORE
                            Kaju alignment score
      -l KAIJU_MINLEN, --kaiju_minlen KAIJU_MINLEN
                            Kaju minimum length
      -i KAIJU_MISMATCH, --kaiju_mismatch KAIJU_MISMATCH
                            Kaju allowed mismatches

    ]]></help>
    <citations>
        <citation type="bibtex">
@misc{githubkodoja,
  author = {Baizan Edge, Amanda},
  year = {2018},
  title = {Kodoja},
  publisher = {GitHub},
  journal = {GitHub repository},
  url = {https://github.com/abaizan/kodoja},
}</citation>
    </citations>
</tool>