3
|
1 <tool id="kodoja_search" name="Kodoja database search" version="0.0.8">
|
0
|
2 <description>identify viruses from plant RNA sequencing data</description>
|
|
3 <requirements>
|
3
|
4 <requirement type="package" version="0.0.8">kodoja</requirement>
|
0
|
5 </requirements>
|
|
6 <version_command>kodoja_search.py --version</version_command>
|
|
7 <command detect_errors="exit_code"><![CDATA[
|
3
|
8 ## This if statement is for backward compatibility as early versions of the Kraken
|
|
9 ## wrapper assumed the UI facing field name was also part of the directory path
|
|
10 if [ -d '${kraken_db.fields.path}/${kraken_db.fields.name}' ]; then export KRAKEN_DEFAULT_DB='${kraken_db.fields.path}/${kraken_db.fields.name}'; else export KRAKEN_DEFAULT_DB='${kraken_db.fields.path}'; fi &&
|
0
|
11 kodoja_search.py
|
2
|
12 -t="\${GALAXY_SLOTS:-4}"
|
3
|
13 --kraken_db "\$KRAKEN_DEFAULT_DB"
|
0
|
14 --kaiju_db '${kaiju_db.fields.path}'
|
|
15
|
|
16 #if $single_paired.single_paired_selector == 'yes'
|
|
17 #if $forward_input.is_of_type( 'fastq' ):
|
|
18 --data_format fastq
|
|
19 #else:
|
|
20 --data_format fasta
|
|
21 #end if
|
|
22 --read1 '${single_paired.forward_input}'
|
|
23 --read2 '${single_paired.reverse_input}'
|
|
24 #else:
|
|
25 #if $single_paired.input_sequences.is_of_type('fastq')
|
|
26 --data_format fastq
|
|
27 #else:
|
|
28 --data_format fasta
|
|
29 #end if
|
|
30 --read1 '${single_paired.input_sequences}'
|
|
31 #end if
|
|
32
|
|
33 ## TODO:
|
|
34 ## -m min_trim
|
|
35 ## -a trim_adapt
|
|
36 ## -q kraken_quick
|
|
37 ## -p kraken_preload
|
|
38 ## -c kaiju_score
|
|
39 ## -l kaiju_minlen
|
|
40 ## -i kaiju_mismatch
|
|
41
|
|
42 ## We'll capture predictably named output files from here:
|
|
43 -o .
|
|
44 &&
|
|
45 mv ./virus_table.txt '$combined_table'
|
3
|
46 #if $capture_reads_table:
|
|
47 &&
|
|
48 mv ./kodoja_VRL.txt '$reads_table'
|
|
49 #end if
|
0
|
50 ]]></command>
|
|
51 <inputs>
|
|
52 <param label="Select a Kraken database" name="kraken_db" type="select">
|
|
53 <options from_data_table="kraken_databases">
|
|
54 <validator message="No Kraken database is available" type="no_options" />
|
|
55 </options>
|
|
56 </param>
|
|
57 <param label="Select a Kaiju database" name="kaiju_db" type="select">
|
|
58 <options from_data_table="kaiju_databases">
|
|
59 <validator message="No Kaiju database is available" type="no_options" />
|
|
60 </options>
|
|
61 </param>
|
|
62 <conditional name="single_paired">
|
1
|
63 <param name="single_paired_selector" type="select" label="Single or paired reads">
|
0
|
64 <!-- TODO?
|
|
65 <option value="collection">Collection</option>
|
|
66 -->
|
|
67 <option value="yes">Paired</option>
|
|
68 <option selected="True" value="no">Single</option>
|
|
69 </param>
|
|
70 <when value="yes">
|
|
71 <param format="fasta,fastq" name="forward_input" type="data" label="Forward strand" help="FASTA or FASTQ dataset"/>
|
|
72 <param format="fasta,fastq" name="reverse_input" type="data" label="Reverse strand" help="FASTA or FASTQ dataset"/>
|
|
73 </when>
|
|
74 <when value="no">
|
|
75 <param format="fasta,fastq" label="Input sequences" name="input_sequences" type="data" help="FASTA or FASTQ datasets"/>
|
|
76 </when>
|
|
77 </conditional>
|
3
|
78 <param name="capture_reads_table" type="boolean" value="false" label="Capture read assignment table" help="This table can be used to filter out reads matched to (individual) viruses"/>
|
0
|
79 </inputs>
|
|
80 <outputs>
|
|
81 <data name="combined_table" format="tabular" label="Kodoja species report for ${on_string}" />
|
3
|
82 <data name="reads_table" format="tabular" label="Kodoja read assignment for ${on_string}">
|
|
83 <filter>capture_reads_table</filter>
|
|
84 </data>
|
0
|
85 </outputs>
|
|
86 <tests>
|
|
87 <test>
|
|
88 <param name="kraken_db" value="kraken3viruses" />
|
|
89 <param name="kaiju_db" value="kaiju3viruses" />
|
|
90 <param name="single_paired_selector" value="no" />
|
|
91 <param name="input_sequences" value="testData_1.fastq" ftype="fastq" />
|
|
92 <output name="combined_table" file="virus_table_SE_fastq.tabular" ftype="tabular" />
|
|
93 </test>
|
|
94 <test>
|
|
95 <param name="kraken_db" value="kraken3viruses" />
|
|
96 <param name="kaiju_db" value="kaiju3viruses" />
|
|
97 <param name="single_paired_selector" value="yes" />
|
|
98 <param name="forward_input" value="testData_1.fastq" ftype="fastq" />
|
|
99 <param name="reverse_input" value="testData_2.fastq" ftype="fastq" />
|
|
100 <output name="combined_table" file="virus_table_PE_fastq.tabular" ftype="tabular" />
|
|
101 </test>
|
|
102 <test>
|
|
103 <param name="kraken_db" value="kraken3viruses" />
|
|
104 <param name="kaiju_db" value="kaiju3viruses" />
|
|
105 <param name="single_paired_selector" value="yes" />
|
3
|
106 <param name="forward_input" value="testData_1.fastq" ftype="fastq" />
|
|
107 <param name="reverse_input" value="testData_2.fastq" ftype="fastq" />
|
|
108 <param name="capture_reads_table" value="true" />
|
|
109 <output name="combined_table" file="virus_table_PE_fastq.tabular" ftype="tabular" />
|
|
110 <output name="reads_table" file="read_table_PE_fastq.tabular" ftype="tabular" />
|
|
111 </test>
|
|
112 <test>
|
|
113 <param name="kraken_db" value="kraken3viruses" />
|
|
114 <param name="kaiju_db" value="kaiju3viruses" />
|
|
115 <param name="single_paired_selector" value="yes" />
|
0
|
116 <param name="forward_input" value="testData_1.fasta" ftype="fasta" />
|
|
117 <param name="reverse_input" value="testData_2.fasta" ftype="fasta" />
|
|
118 <output name="combined_table" file="virus_table_PE_fasta.tabular" ftype="tabular" />
|
|
119 </test>
|
|
120 </tests>
|
|
121 <help><![CDATA[
|
|
122 Kodoja is a tool intended to identify viral sequences in a
|
|
123 FASTQ/FASTA sequencing run by matching them against both
|
|
124 Kraken and Kaiju databases.
|
|
125
|
1
|
126 The main output is a tab-separated table as follows (tabular format in Galaxy)
|
|
127 with the following columns:
|
0
|
128
|
|
129 1. Species name
|
|
130 2. Species NCBI taxonomy identifier (TaxID)
|
|
131 3. Number of reads assigned by *either* Kraken or Kaiju to this species
|
|
132 4. Number of Reads assigned by *both* Kraken and Kaiju to this species
|
|
133 5. Genus name
|
|
134 6. Number of reads assigned by *either* Kraken or Kaiju to this genus
|
|
135 7. Number of reads assigned by *both* Kraken and Kaiju to this genus
|
|
136
|
2
|
137 The counts in columns 6 and 7 are for reads assigned to that genus, but not
|
|
138 to any species within it.
|
|
139
|
0
|
140 For example,
|
|
141
|
|
142 ================================== ============= ================= ============================= ========== =============== ===========================
|
|
143 Species Species TaxID Species sequences Species sequences (stringent) Genus Genus sequences Genus sequences (stringent)
|
|
144 ---------------------------------- ------------- ----------------- ----------------------------- ---------- --------------- ---------------------------
|
2
|
145 Cassava brown streak virus 137758 45 45 Ipomovirus 0 0
|
|
146 Ugandan cassava brown streak virus 946046 28 28 Ipomovirus 0 0
|
|
147 Tobacco etch virus 12227 21 19 Potyvirus 0 0
|
0
|
148 ================================== ============= ================= ============================= ========== =============== ===========================
|
|
149
|
3
|
150 The second most important output, which you can optionally capture
|
|
151 for use within Galaxy, is a per-read table summarising matches found
|
|
152 with Kraken and/or Kaiju. The Kodoja Retrieve tool is not currently
|
|
153 available within Galaxy, but you can instead use this file directly
|
|
154 within Galaxy to filter out just the virus reads, or even reads
|
|
155 matched to a specific taxid. See for example ``seq_filter_by_id``
|
|
156 which is available via the Galaxy Tool Shed:
|
|
157
|
|
158 http://toolshed.g2.bx.psu.edu/view/peterjc/seq_filter_by_id
|
|
159 https://github.com/peterjc/pico_galaxy/tree/master/tools/seq_filter_by_id
|
|
160
|
|
161 The Kodoja Search command line tool offers additional options not
|
|
162 currently exposed in Galaxy, including::
|
0
|
163
|
|
164 Number of threads
|
|
165 -s, --host_subset Subset host sequences before Kaiju
|
|
166 -m TRIM_MINLEN, --trim_minlen TRIM_MINLEN
|
|
167 Trimmomatic minimum length
|
|
168 -a TRIM_ADAPT, --trim_adapt TRIM_ADAPT
|
|
169 Illumina adapter sequence file
|
|
170 -q KRAKEN_QUICK, --kraken_quick KRAKEN_QUICK
|
|
171 Number of minium hits by Kraken
|
|
172 -p, --kraken_preload Kraken preload database
|
|
173 -c KAIJU_SCORE, --kaiju_score KAIJU_SCORE
|
|
174 Kaju alignment score
|
|
175 -l KAIJU_MINLEN, --kaiju_minlen KAIJU_MINLEN
|
|
176 Kaju minimum length
|
|
177 -i KAIJU_MISMATCH, --kaiju_mismatch KAIJU_MISMATCH
|
|
178 Kaju allowed mismatches
|
|
179
|
2
|
180 For more information, please see the Kodoja manual
|
|
181 https://github.com/abaizan/kodoja/wiki/Kodoja-Manual
|
0
|
182 ]]></help>
|
|
183 <citations>
|
|
184 <citation type="bibtex">
|
|
185 @misc{githubkodoja,
|
|
186 author = {Baizan Edge, Amanda},
|
|
187 year = {2018},
|
|
188 title = {Kodoja},
|
|
189 publisher = {GitHub},
|
|
190 journal = {GitHub repository},
|
|
191 url = {https://github.com/abaizan/kodoja},
|
|
192 }</citation>
|
|
193 </citations>
|
|
194 </tool>
|