2
|
1 <tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.12">
|
|
2 <description>Search translated nucleotide database with protein query sequence(s)</description>
|
|
3 <!-- If job splitting is enabled, break up the query file into parts -->
|
|
4 <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism>
|
|
5 <version_command>tblastn -version</version_command>
|
|
6 <command interpreter="python">hide_stderr.py
|
|
7 ## The command is a Cheetah template which allows some Python based syntax.
|
|
8 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
|
|
9 tblastn
|
|
10 -query "$query"
|
|
11 #if $db_opts.db_opts_selector == "db":
|
|
12 -db "${db_opts.database.fields.path}"
|
|
13 #else:
|
|
14 -subject "$db_opts.subject"
|
|
15 #end if
|
|
16 -evalue $evalue_cutoff
|
|
17 -out $output1
|
|
18 ##Set the extended list here so if/when we add things, saved workflows are not affected
|
|
19 #if str($out_format)=="ext":
|
|
20 -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
|
|
21 #else:
|
|
22 -outfmt $out_format
|
|
23 #end if
|
|
24 -num_threads 8
|
|
25 #if $adv_opts.adv_opts_selector=="advanced":
|
|
26 -db_gencode $adv_opts.db_gencode
|
|
27 $adv_opts.filter_query
|
|
28 -matrix $adv_opts.matrix
|
|
29 ## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
|
|
30 ## Note -max_target_seqs overrides -num_descriptions and -num_alignments
|
|
31 #if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
|
|
32 -max_target_seqs $adv_opts.max_hits
|
|
33 #end if
|
|
34 #if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
|
|
35 -word_size $adv_opts.word_size
|
|
36 #end if
|
|
37 ##Ungapped disabled for now - see comments below
|
|
38 ##$adv_opts.ungapped
|
|
39 $adv_opts.parse_deflines
|
|
40 ## End of advanced options:
|
|
41 #end if
|
|
42 </command>
|
|
43 <inputs>
|
|
44 <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/>
|
|
45 <conditional name="db_opts">
|
|
46 <param name="db_opts_selector" type="select" label="Subject database/sequences">
|
|
47 <option value="db" selected="True">BLAST Database</option>
|
|
48 <option value="file">FASTA file (pairwise e-values)</option>
|
|
49 </param>
|
|
50 <when value="db">
|
|
51 <param name="database" type="select" label="Nucleotide BLAST database">
|
|
52 <options from_file="blastdb.loc">
|
|
53 <column name="value" index="0"/>
|
|
54 <column name="name" index="1"/>
|
|
55 <column name="path" index="2"/>
|
|
56 </options>
|
|
57 </param>
|
|
58 <param name="subject" type="hidden" value="" />
|
|
59 </when>
|
|
60 <when value="file">
|
|
61 <param name="database" type="hidden" value="" />
|
|
62 <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/>
|
|
63 </when>
|
|
64 </conditional>
|
|
65 <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
|
|
66 <param name="out_format" type="select" label="Output format">
|
|
67 <option value="6" selected="True">Tabular (standard 12 columns)</option>
|
|
68 <option value="ext">Tabular (extended 24 columns)</option>
|
|
69 <option value="5">BLAST XML</option>
|
|
70 <option value="0">Pairwise text</option>
|
|
71 <option value="0 -html">Pairwise HTML</option>
|
|
72 <option value="2">Query-anchored text</option>
|
|
73 <option value="2 -html">Query-anchored HTML</option>
|
|
74 <option value="4">Flat query-anchored text</option>
|
|
75 <option value="4 -html">Flat query-anchored HTML</option>
|
|
76 <!--
|
|
77 <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
|
|
78 -->
|
|
79 </param>
|
|
80 <conditional name="adv_opts">
|
|
81 <param name="adv_opts_selector" type="select" label="Advanced Options">
|
|
82 <option value="basic" selected="True">Hide Advanced Options</option>
|
|
83 <option value="advanced">Show Advanced Options</option>
|
|
84 </param>
|
|
85 <when value="basic" />
|
|
86 <when value="advanced">
|
|
87 <param name="db_gencode" type="select" label="Database/subject genetic code">
|
|
88 <!-- See http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for details -->
|
|
89 <option value="1" select="True">1. Standard</option>
|
|
90 <option value="2">2. Vertebrate Mitochondrial</option>
|
|
91 <option value="3">3. Yeast Mitochondrial</option>
|
|
92 <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
|
|
93 <option value="5">5. Invertebrate Mitochondrial</option>
|
|
94 <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option>
|
|
95 <option value="9">9. Echinoderm Mitochondrial</option>
|
|
96 <option value="10">10. Euplotid Nuclear</option>
|
|
97 <option value="11">11. Bacteria and Archaea</option>
|
|
98 <option value="12">12. Alternative Yeast Nuclear</option>
|
|
99 <option value="13">13. Ascidian Mitochondrial</option>
|
|
100 <option value="14">14. Flatworm Mitochondrial</option>
|
|
101 <option value="15">15. Blepharisma Macronuclear</option>
|
|
102 <option value="16">16. Chlorophycean Mitochondrial Code</option>
|
|
103 <option value="21">21. Trematode Mitochondrial Code</option>
|
|
104 <option value="22">22. Scenedesmus obliquus mitochondrial Code</option>
|
|
105 <option value="23">23. Thraustochytrium Mitochondrial Code</option>
|
|
106 <option value="24">24. Pterobranchia mitochondrial code</option>
|
|
107 </param>
|
|
108 <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
|
|
109 <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" />
|
|
110 <param name="matrix" type="select" label="Scoring matrix">
|
|
111 <option value="BLOSUM90">BLOSUM90</option>
|
|
112 <option value="BLOSUM80">BLOSUM80</option>
|
|
113 <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option>
|
|
114 <option value="BLOSUM50">BLOSUM50</option>
|
|
115 <option value="BLOSUM45">BLOSUM45</option>
|
|
116 <option value="PAM250">PAM250</option>
|
|
117 <option value="PAM70">PAM70</option>
|
|
118 <option value="PAM30">PAM30</option>
|
|
119 </param>
|
|
120 <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
|
|
121 <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
|
|
122 <validator type="in_range" min="0" />
|
|
123 </param>
|
|
124 <!-- I'd like word_size to be optional, with minimum 2 for blastp -->
|
|
125 <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
|
|
126 <validator type="in_range" min="0" />
|
|
127 </param>
|
|
128 <!--
|
|
129 Can't use '-ungapped' on its own, error back is:
|
|
130 Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search
|
|
131 Tried using '-ungapped -comp_based_stats F' and tblastn crashed with 'Attempt to access NULL pointer.'
|
|
132 <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" />
|
|
133 -->
|
|
134 <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
|
|
135 </when>
|
|
136 </conditional>
|
|
137 </inputs>
|
|
138 <outputs>
|
|
139 <data name="output1" format="tabular" label="tblastn on ${db_opts.db_opts_selector}">
|
|
140 <change_format>
|
|
141 <when input="out_format" value="0" format="txt"/>
|
|
142 <when input="out_format" value="0 -html" format="html"/>
|
|
143 <when input="out_format" value="2" format="txt"/>
|
|
144 <when input="out_format" value="2 -html" format="html"/>
|
|
145 <when input="out_format" value="4" format="txt"/>
|
|
146 <when input="out_format" value="4 -html" format="html"/>
|
|
147 <when input="out_format" value="5" format="blastxml"/>
|
|
148 </change_format>
|
|
149 </data>
|
|
150 </outputs>
|
|
151 <requirements>
|
|
152 <requirement type="binary">tblastn</requirement>
|
|
153 </requirements>
|
|
154 <tests>
|
|
155 <test>
|
|
156 <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
|
|
157 <param name="db_opts_selector" value="file" />
|
|
158 <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
|
|
159 <param name="database" value="" />
|
|
160 <param name="evalue_cutoff" value="1e-10" />
|
|
161 <param name="out_format" value="5" />
|
|
162 <param name="adv_opts_selector" value="advanced" />
|
|
163 <param name="filter_query" value="false" />
|
|
164 <param name="matrix" value="BLOSUM80" />
|
|
165 <param name="max_hits" value="0" />
|
|
166 <param name="word_size" value="0" />
|
|
167 <param name="parse_deflines" value="false" />
|
|
168 <output name="output1" file="tblastn_four_human_vs_rhodopsin.xml" ftype="blastxml" />
|
|
169 </test>
|
|
170 <test>
|
|
171 <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
|
|
172 <param name="db_opts_selector" value="file" />
|
|
173 <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
|
|
174 <param name="database" value="" />
|
|
175 <param name="evalue_cutoff" value="1e-10" />
|
|
176 <param name="out_format" value="ext" />
|
|
177 <param name="adv_opts_selector" value="advanced" />
|
|
178 <param name="filter_query" value="false" />
|
|
179 <param name="matrix" value="BLOSUM80" />
|
|
180 <param name="max_hits" value="0" />
|
|
181 <param name="word_size" value="0" />
|
|
182 <param name="parse_deflines" value="false" />
|
|
183 <output name="output1" file="tblastn_four_human_vs_rhodopsin_ext.tabular" ftype="tabular" />
|
|
184 </test>
|
|
185 <test>
|
|
186 <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
|
|
187 <param name="db_opts_selector" value="file" />
|
|
188 <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
|
|
189 <param name="database" value="" />
|
|
190 <param name="evalue_cutoff" value="1e-10" />
|
|
191 <param name="out_format" value="6" />
|
|
192 <param name="adv_opts_selector" value="advanced" />
|
|
193 <param name="filter_query" value="false" />
|
|
194 <param name="matrix" value="BLOSUM80" />
|
|
195 <param name="max_hits" value="0" />
|
|
196 <param name="word_size" value="0" />
|
|
197 <param name="parse_deflines" value="false" />
|
|
198 <output name="output1" file="tblastn_four_human_vs_rhodopsin.tabular" ftype="tabular" />
|
|
199 </test>
|
|
200 <test>
|
|
201 <!-- Same as above, but parse deflines - on BLAST 2.2.25+ makes no difference -->
|
|
202 <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
|
|
203 <param name="db_opts_selector" value="file" />
|
|
204 <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
|
|
205 <param name="database" value="" />
|
|
206 <param name="evalue_cutoff" value="1e-10" />
|
|
207 <param name="out_format" value="6" />
|
|
208 <param name="adv_opts_selector" value="advanced" />
|
|
209 <param name="filter_query" value="false" />
|
|
210 <param name="matrix" value="BLOSUM80" />
|
|
211 <param name="max_hits" value="0" />
|
|
212 <param name="word_size" value="0" />
|
|
213 <param name="parse_deflines" value="true" />
|
|
214 <output name="output1" file="tblastn_four_human_vs_rhodopsin.tabular" ftype="tabular" />
|
|
215 </test>
|
|
216 <test>
|
|
217 <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
|
|
218 <param name="db_opts_selector" value="file" />
|
|
219 <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
|
|
220 <param name="database" value="" />
|
|
221 <param name="evalue_cutoff" value="1e-10" />
|
|
222 <param name="out_format" value="0 -html" />
|
|
223 <param name="adv_opts_selector" value="advanced" />
|
|
224 <param name="filter_query" value="false" />
|
|
225 <param name="matrix" value="BLOSUM80" />
|
|
226 <param name="max_hits" value="0" />
|
|
227 <param name="word_size" value="0" />
|
|
228 <param name="parse_deflines" value="false" />
|
|
229 <output name="output1" file="tblastn_four_human_vs_rhodopsin.html" ftype="html" />
|
|
230 </test>
|
|
231 </tests>
|
|
232 <help>
|
|
233
|
|
234 .. class:: warningmark
|
|
235
|
|
236 **Note**. Database searches may take a substantial amount of time.
|
|
237 For large input datasets it is advisable to allow overnight processing.
|
|
238
|
|
239 -----
|
|
240
|
|
241 **What it does**
|
|
242
|
|
243 Search a *translated nucleotide database* using a *protein query*,
|
|
244 using the NCBI BLAST+ tblastn command line tool.
|
|
245
|
|
246 -----
|
|
247
|
|
248 **Output format**
|
|
249
|
|
250 Because Galaxy focuses on processing tabular data, the default output of this
|
|
251 tool is tabular. The standard BLAST+ tabular output contains 12 columns:
|
|
252
|
|
253 ====== ========= ============================================
|
|
254 Column NCBI name Description
|
|
255 ------ --------- --------------------------------------------
|
|
256 1 qseqid Query Seq-id (ID of your sequence)
|
|
257 2 sseqid Subject Seq-id (ID of the database hit)
|
|
258 3 pident Percentage of identical matches
|
|
259 4 length Alignment length
|
|
260 5 mismatch Number of mismatches
|
|
261 6 gapopen Number of gap openings
|
|
262 7 qstart Start of alignment in query
|
|
263 8 qend End of alignment in query
|
|
264 9 sstart Start of alignment in subject (database hit)
|
|
265 10 send End of alignment in subject (database hit)
|
|
266 11 evalue Expectation value (E-value)
|
|
267 12 bitscore Bit score
|
|
268 ====== ========= ============================================
|
|
269
|
|
270 The BLAST+ tools can optionally output additional columns of information,
|
|
271 but this takes longer to calculate. Most (but not all) of these columns are
|
|
272 included by selecting the extended tabular output. The extra columns are
|
|
273 included *after* the standard 12 columns. This is so that you can write
|
|
274 workflow filtering steps that accept either the 12 or 24 column tabular
|
|
275 BLAST output.
|
|
276
|
|
277 ====== ============= ===========================================
|
|
278 Column NCBI name Description
|
|
279 ------ ------------- -------------------------------------------
|
|
280 13 sallseqid All subject Seq-id(s), separated by a ';'
|
|
281 14 score Raw score
|
|
282 15 nident Number of identical matches
|
|
283 16 positive Number of positive-scoring matches
|
|
284 17 gaps Total number of gaps
|
|
285 18 ppos Percentage of positive-scoring matches
|
|
286 19 qframe Query frame
|
|
287 20 sframe Subject frame
|
|
288 21 qseq Aligned part of query sequence
|
|
289 22 sseq Aligned part of subject sequence
|
|
290 23 qlen Query sequence length
|
|
291 24 slen Subject sequence length
|
|
292 ====== ============= ===========================================
|
|
293
|
|
294 The third option is BLAST XML output, which is designed to be parsed by
|
|
295 another program, and is understood by some Galaxy tools.
|
|
296
|
|
297 You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
|
|
298 The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
|
|
299 The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
|
|
300 The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
|
|
301 and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
|
|
302
|
|
303 -------
|
|
304
|
|
305 **References**
|
|
306
|
|
307 Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
|
|
308
|
|
309 </help>
|
|
310 </tool>
|