Mercurial > repos > abaizan > kodoja
view kodoja_search.xml @ 3:d4111d1de76f draft default tip
v0.0.8, expose kodoja_VRL.tsv output
author | peterjc |
---|---|
date | Fri, 14 Sep 2018 09:55:56 -0400 |
parents | ee917702dbd8 |
children |
line wrap: on
line source
<tool id="kodoja_search" name="Kodoja database search" version="0.0.8"> <description>identify viruses from plant RNA sequencing data</description> <requirements> <requirement type="package" version="0.0.8">kodoja</requirement> </requirements> <version_command>kodoja_search.py --version</version_command> <command detect_errors="exit_code"><![CDATA[ ## This if statement is for backward compatibility as early versions of the Kraken ## wrapper assumed the UI facing field name was also part of the directory path if [ -d '${kraken_db.fields.path}/${kraken_db.fields.name}' ]; then export KRAKEN_DEFAULT_DB='${kraken_db.fields.path}/${kraken_db.fields.name}'; else export KRAKEN_DEFAULT_DB='${kraken_db.fields.path}'; fi && kodoja_search.py -t="\${GALAXY_SLOTS:-4}" --kraken_db "\$KRAKEN_DEFAULT_DB" --kaiju_db '${kaiju_db.fields.path}' #if $single_paired.single_paired_selector == 'yes' #if $forward_input.is_of_type( 'fastq' ): --data_format fastq #else: --data_format fasta #end if --read1 '${single_paired.forward_input}' --read2 '${single_paired.reverse_input}' #else: #if $single_paired.input_sequences.is_of_type('fastq') --data_format fastq #else: --data_format fasta #end if --read1 '${single_paired.input_sequences}' #end if ## TODO: ## -m min_trim ## -a trim_adapt ## -q kraken_quick ## -p kraken_preload ## -c kaiju_score ## -l kaiju_minlen ## -i kaiju_mismatch ## We'll capture predictably named output files from here: -o . && mv ./virus_table.txt '$combined_table' #if $capture_reads_table: && mv ./kodoja_VRL.txt '$reads_table' #end if ]]></command> <inputs> <param label="Select a Kraken database" name="kraken_db" type="select"> <options from_data_table="kraken_databases"> <validator message="No Kraken database is available" type="no_options" /> </options> </param> <param label="Select a Kaiju database" name="kaiju_db" type="select"> <options from_data_table="kaiju_databases"> <validator message="No Kaiju database is available" type="no_options" /> </options> </param> <conditional name="single_paired"> <param name="single_paired_selector" type="select" label="Single or paired reads"> <!-- TODO? <option value="collection">Collection</option> --> <option value="yes">Paired</option> <option selected="True" value="no">Single</option> </param> <when value="yes"> <param format="fasta,fastq" name="forward_input" type="data" label="Forward strand" help="FASTA or FASTQ dataset"/> <param format="fasta,fastq" name="reverse_input" type="data" label="Reverse strand" help="FASTA or FASTQ dataset"/> </when> <when value="no"> <param format="fasta,fastq" label="Input sequences" name="input_sequences" type="data" help="FASTA or FASTQ datasets"/> </when> </conditional> <param name="capture_reads_table" type="boolean" value="false" label="Capture read assignment table" help="This table can be used to filter out reads matched to (individual) viruses"/> </inputs> <outputs> <data name="combined_table" format="tabular" label="Kodoja species report for ${on_string}" /> <data name="reads_table" format="tabular" label="Kodoja read assignment for ${on_string}"> <filter>capture_reads_table</filter> </data> </outputs> <tests> <test> <param name="kraken_db" value="kraken3viruses" /> <param name="kaiju_db" value="kaiju3viruses" /> <param name="single_paired_selector" value="no" /> <param name="input_sequences" value="testData_1.fastq" ftype="fastq" /> <output name="combined_table" file="virus_table_SE_fastq.tabular" ftype="tabular" /> </test> <test> <param name="kraken_db" value="kraken3viruses" /> <param name="kaiju_db" value="kaiju3viruses" /> <param name="single_paired_selector" value="yes" /> <param name="forward_input" value="testData_1.fastq" ftype="fastq" /> <param name="reverse_input" value="testData_2.fastq" ftype="fastq" /> <output name="combined_table" file="virus_table_PE_fastq.tabular" ftype="tabular" /> </test> <test> <param name="kraken_db" value="kraken3viruses" /> <param name="kaiju_db" value="kaiju3viruses" /> <param name="single_paired_selector" value="yes" /> <param name="forward_input" value="testData_1.fastq" ftype="fastq" /> <param name="reverse_input" value="testData_2.fastq" ftype="fastq" /> <param name="capture_reads_table" value="true" /> <output name="combined_table" file="virus_table_PE_fastq.tabular" ftype="tabular" /> <output name="reads_table" file="read_table_PE_fastq.tabular" ftype="tabular" /> </test> <test> <param name="kraken_db" value="kraken3viruses" /> <param name="kaiju_db" value="kaiju3viruses" /> <param name="single_paired_selector" value="yes" /> <param name="forward_input" value="testData_1.fasta" ftype="fasta" /> <param name="reverse_input" value="testData_2.fasta" ftype="fasta" /> <output name="combined_table" file="virus_table_PE_fasta.tabular" ftype="tabular" /> </test> </tests> <help><![CDATA[ Kodoja is a tool intended to identify viral sequences in a FASTQ/FASTA sequencing run by matching them against both Kraken and Kaiju databases. The main output is a tab-separated table as follows (tabular format in Galaxy) with the following columns: 1. Species name 2. Species NCBI taxonomy identifier (TaxID) 3. Number of reads assigned by *either* Kraken or Kaiju to this species 4. Number of Reads assigned by *both* Kraken and Kaiju to this species 5. Genus name 6. Number of reads assigned by *either* Kraken or Kaiju to this genus 7. Number of reads assigned by *both* Kraken and Kaiju to this genus The counts in columns 6 and 7 are for reads assigned to that genus, but not to any species within it. For example, ================================== ============= ================= ============================= ========== =============== =========================== Species Species TaxID Species sequences Species sequences (stringent) Genus Genus sequences Genus sequences (stringent) ---------------------------------- ------------- ----------------- ----------------------------- ---------- --------------- --------------------------- Cassava brown streak virus 137758 45 45 Ipomovirus 0 0 Ugandan cassava brown streak virus 946046 28 28 Ipomovirus 0 0 Tobacco etch virus 12227 21 19 Potyvirus 0 0 ================================== ============= ================= ============================= ========== =============== =========================== The second most important output, which you can optionally capture for use within Galaxy, is a per-read table summarising matches found with Kraken and/or Kaiju. The Kodoja Retrieve tool is not currently available within Galaxy, but you can instead use this file directly within Galaxy to filter out just the virus reads, or even reads matched to a specific taxid. See for example ``seq_filter_by_id`` which is available via the Galaxy Tool Shed: http://toolshed.g2.bx.psu.edu/view/peterjc/seq_filter_by_id https://github.com/peterjc/pico_galaxy/tree/master/tools/seq_filter_by_id The Kodoja Search command line tool offers additional options not currently exposed in Galaxy, including:: Number of threads -s, --host_subset Subset host sequences before Kaiju -m TRIM_MINLEN, --trim_minlen TRIM_MINLEN Trimmomatic minimum length -a TRIM_ADAPT, --trim_adapt TRIM_ADAPT Illumina adapter sequence file -q KRAKEN_QUICK, --kraken_quick KRAKEN_QUICK Number of minium hits by Kraken -p, --kraken_preload Kraken preload database -c KAIJU_SCORE, --kaiju_score KAIJU_SCORE Kaju alignment score -l KAIJU_MINLEN, --kaiju_minlen KAIJU_MINLEN Kaju minimum length -i KAIJU_MISMATCH, --kaiju_mismatch KAIJU_MISMATCH Kaju allowed mismatches For more information, please see the Kodoja manual https://github.com/abaizan/kodoja/wiki/Kodoja-Manual ]]></help> <citations> <citation type="bibtex"> @misc{githubkodoja, author = {Baizan Edge, Amanda}, year = {2018}, title = {Kodoja}, publisher = {GitHub}, journal = {GitHub repository}, url = {https://github.com/abaizan/kodoja}, }</citation> </citations> </tool>