0
|
1 <tool id="kodoja_search" name="Kodoja database search" version="0.0.0">
|
|
2 <description>identify viruses from plant RNA sequencing data</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="0.0.3">kodoja</requirement>
|
|
5 </requirements>
|
|
6 <version_command>kodoja_search.py --version</version_command>
|
|
7 <command detect_errors="exit_code"><![CDATA[
|
|
8 kodoja_search.py
|
|
9
|
|
10 --kraken_db '${kraken_db.fields.path}'
|
|
11 --kaiju_db '${kaiju_db.fields.path}'
|
|
12
|
|
13 #if $single_paired.single_paired_selector == 'yes'
|
|
14 #if $forward_input.is_of_type( 'fastq' ):
|
|
15 --data_format fastq
|
|
16 #else:
|
|
17 --data_format fasta
|
|
18 #end if
|
|
19 --read1 '${single_paired.forward_input}'
|
|
20 --read2 '${single_paired.reverse_input}'
|
|
21 #else:
|
|
22 #if $single_paired.input_sequences.is_of_type('fastq')
|
|
23 --data_format fastq
|
|
24 #else:
|
|
25 --data_format fasta
|
|
26 #end if
|
|
27 --read1 '${single_paired.input_sequences}'
|
|
28 #end if
|
|
29
|
|
30 ## TODO:
|
|
31 ## -m min_trim
|
|
32 ## -a trim_adapt
|
|
33 ## -q kraken_quick
|
|
34 ## -p kraken_preload
|
|
35 ## -c kaiju_score
|
|
36 ## -l kaiju_minlen
|
|
37 ## -i kaiju_mismatch
|
|
38
|
|
39 ## We'll capture predictably named output files from here:
|
|
40 -o .
|
|
41 &&
|
|
42 mv ./virus_table.txt '$combined_table'
|
|
43 ]]></command>
|
|
44 <inputs>
|
|
45 <param label="Select a Kraken database" name="kraken_db" type="select">
|
|
46 <options from_data_table="kraken_databases">
|
|
47 <validator message="No Kraken database is available" type="no_options" />
|
|
48 </options>
|
|
49 </param>
|
|
50 <param label="Select a Kaiju database" name="kaiju_db" type="select">
|
|
51 <options from_data_table="kaiju_databases">
|
|
52 <validator message="No Kaiju database is available" type="no_options" />
|
|
53 </options>
|
|
54 </param>
|
|
55 <conditional name="single_paired">
|
|
56 <param name="single_paired_selector" type="select" label="Single or paired reads" help="--paired">
|
|
57 <!-- TODO?
|
|
58 <option value="collection">Collection</option>
|
|
59 -->
|
|
60 <option value="yes">Paired</option>
|
|
61 <option selected="True" value="no">Single</option>
|
|
62 </param>
|
|
63 <when value="yes">
|
|
64 <param format="fasta,fastq" name="forward_input" type="data" label="Forward strand" help="FASTA or FASTQ dataset"/>
|
|
65 <param format="fasta,fastq" name="reverse_input" type="data" label="Reverse strand" help="FASTA or FASTQ dataset"/>
|
|
66 </when>
|
|
67 <when value="no">
|
|
68 <param format="fasta,fastq" label="Input sequences" name="input_sequences" type="data" help="FASTA or FASTQ datasets"/>
|
|
69 </when>
|
|
70 </conditional>
|
|
71 </inputs>
|
|
72 <outputs>
|
|
73 <data name="combined_table" format="tabular" label="Kodoja species report for ${on_string}" />
|
|
74 </outputs>
|
|
75 <tests>
|
|
76 <test>
|
|
77 <param name="kraken_db" value="kraken3viruses" />
|
|
78 <param name="kaiju_db" value="kaiju3viruses" />
|
|
79 <param name="single_paired_selector" value="no" />
|
|
80 <param name="input_sequences" value="testData_1.fastq" ftype="fastq" />
|
|
81 <output name="combined_table" file="virus_table_SE_fastq.tabular" ftype="tabular" />
|
|
82 </test>
|
|
83 <test>
|
|
84 <param name="kraken_db" value="kraken3viruses" />
|
|
85 <param name="kaiju_db" value="kaiju3viruses" />
|
|
86 <param name="single_paired_selector" value="yes" />
|
|
87 <param name="forward_input" value="testData_1.fastq" ftype="fastq" />
|
|
88 <param name="reverse_input" value="testData_2.fastq" ftype="fastq" />
|
|
89 <output name="combined_table" file="virus_table_PE_fastq.tabular" ftype="tabular" />
|
|
90 </test>
|
|
91 <test>
|
|
92 <param name="kraken_db" value="kraken3viruses" />
|
|
93 <param name="kaiju_db" value="kaiju3viruses" />
|
|
94 <param name="single_paired_selector" value="yes" />
|
|
95 <param name="forward_input" value="testData_1.fasta" ftype="fasta" />
|
|
96 <param name="reverse_input" value="testData_2.fasta" ftype="fasta" />
|
|
97 <output name="combined_table" file="virus_table_PE_fasta.tabular" ftype="tabular" />
|
|
98 </test>
|
|
99 </tests>
|
|
100 <help><![CDATA[
|
|
101 Kodoja is a tool intended to identify viral sequences in a
|
|
102 FASTQ/FASTA sequencing run by matching them against both
|
|
103 Kraken and Kaiju databases.
|
|
104
|
|
105 The main output is a five column tab-separated table as follows
|
|
106 (tabular format in Galaxy):
|
|
107
|
|
108 1. Species name
|
|
109 2. Species NCBI taxonomy identifier (TaxID)
|
|
110 3. Number of reads assigned by *either* Kraken or Kaiju to this species
|
|
111 4. Number of Reads assigned by *both* Kraken and Kaiju to this species
|
|
112 5. Genus name
|
|
113 6. Number of reads assigned by *either* Kraken or Kaiju to this genus
|
|
114 7. Number of reads assigned by *both* Kraken and Kaiju to this genus
|
|
115
|
|
116 For example,
|
|
117
|
|
118 ================================== ============= ================= ============================= ========== =============== ===========================
|
|
119 Species Species TaxID Species sequences Species sequences (stringent) Genus Genus sequences Genus sequences (stringent)
|
|
120 ---------------------------------- ------------- ----------------- ----------------------------- ---------- --------------- ---------------------------
|
|
121 Cassava brown streak virus 137758 45 45 Ipomovirus
|
|
122 Ugandan cassava brown streak virus 946046 28 28 Ipomovirus
|
|
123 Tobacco etch virus 12227 21 19 Potyvirus
|
|
124 ================================== ============= ================= ============================= ========== =============== ===========================
|
|
125
|
|
126 This is the command line tool's help::
|
|
127
|
|
128 usage: kodoja_search.py [-h] [--version] -o OUTPUT_DIR -d1 KRAKEN_DB -d2
|
|
129 KAIJU_DB -r1 READ1 [-r2 READ2] [-f DATA_FORMAT]
|
|
130 [-t THREADS] [-s] [-m TRIM_MINLEN] [-a TRIM_ADAPT]
|
|
131 [-q KRAKEN_QUICK] [-p] [-c KAIJU_SCORE]
|
|
132 [-l KAIJU_MINLEN] [-i KAIJU_MISMATCH]
|
|
133
|
|
134 Kodoja
|
|
135
|
|
136 optional arguments:
|
|
137 -h, --help show this help message and exit
|
|
138 --version show program's version number and exit
|
|
139 -o OUTPUT_DIR, --output_dir OUTPUT_DIR
|
|
140 Output directory path, required
|
|
141 -d1 KRAKEN_DB, --kraken_db KRAKEN_DB
|
|
142 Kraken database path, required
|
|
143 -d2 KAIJU_DB, --kaiju_db KAIJU_DB
|
|
144 Kaiju database path, required
|
|
145 -r1 READ1, --read1 READ1
|
|
146 Read 1 file path, required
|
|
147 -r2 READ2, --read2 READ2
|
|
148 Read 2 file path
|
|
149 -f DATA_FORMAT, --data_format DATA_FORMAT
|
|
150 Sequence data format
|
|
151 -t THREADS, --threads THREADS
|
|
152 Number of threads
|
|
153 -s, --host_subset Subset host sequences before Kaiju
|
|
154 -m TRIM_MINLEN, --trim_minlen TRIM_MINLEN
|
|
155 Trimmomatic minimum length
|
|
156 -a TRIM_ADAPT, --trim_adapt TRIM_ADAPT
|
|
157 Illumina adapter sequence file
|
|
158 -q KRAKEN_QUICK, --kraken_quick KRAKEN_QUICK
|
|
159 Number of minium hits by Kraken
|
|
160 -p, --kraken_preload Kraken preload database
|
|
161 -c KAIJU_SCORE, --kaiju_score KAIJU_SCORE
|
|
162 Kaju alignment score
|
|
163 -l KAIJU_MINLEN, --kaiju_minlen KAIJU_MINLEN
|
|
164 Kaju minimum length
|
|
165 -i KAIJU_MISMATCH, --kaiju_mismatch KAIJU_MISMATCH
|
|
166 Kaju allowed mismatches
|
|
167
|
|
168 ]]></help>
|
|
169 <citations>
|
|
170 <citation type="bibtex">
|
|
171 @misc{githubkodoja,
|
|
172 author = {Baizan Edge, Amanda},
|
|
173 year = {2018},
|
|
174 title = {Kodoja},
|
|
175 publisher = {GitHub},
|
|
176 journal = {GitHub repository},
|
|
177 url = {https://github.com/abaizan/kodoja},
|
|
178 }</citation>
|
|
179 </citations>
|
|
180 </tool>
|