annotate tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 <tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.11">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 <description>Search nucleotide database with nucleotide query sequence(s)</description>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 <version_command>blastn -version</version_command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 <command interpreter="python">hide_stderr.py
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 ## The command is a Cheetah template which allows some Python based syntax.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 blastn
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 -query "$query"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 #if $db_opts.db_opts_selector == "db":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 -db "${db_opts.database.fields.path}"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 #else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 -subject "$db_opts.subject"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 -task $blast_type
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 -evalue $evalue_cutoff
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 -out $output1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 ##Set the extended list here so if/when we add things, saved workflows are not affected
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 #if str($out_format)=="ext":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 #else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 -outfmt $out_format
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 -num_threads 8
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 #if $adv_opts.adv_opts_selector=="advanced":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 $adv_opts.filter_query
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 $adv_opts.strand
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 ## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 ## Note -max_target_seqs overrides -num_descriptions and -num_alignments
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 #if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 -max_target_seqs $adv_opts.max_hits
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 #if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 -word_size $adv_opts.word_size
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 $adv_opts.ungapped
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 $adv_opts.parse_deflines
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 ## End of advanced options:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 </command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 <inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 <conditional name="db_opts">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 <param name="db_opts_selector" type="select" label="Subject database/sequences">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 <option value="db" selected="True">BLAST Database</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 <option value="file">FASTA file</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 <when value="db">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 <param name="database" type="select" label="Nucleotide BLAST database">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 <options from_file="blastdb.loc">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 <column name="value" index="0"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 <column name="name" index="1"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 <column name="path" index="2"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 </options>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 <param name="subject" type="hidden" value="" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 <when value="file">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 <param name="database" type="hidden" value="" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 <param name="blast_type" type="select" display="radio" label="Type of BLAST">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 <option value="megablast">megablast</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 <option value="blastn">blastn</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 <option value="blastn-short">blastn-short</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 <option value="dc-megablast">dc-megablast</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 <!-- Using BLAST 2.2.24+ this gives an error:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 BLAST engine error: Program type 'vecscreen' not supported
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 <option value="vecscreen">vecscreen</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 <param name="out_format" type="select" label="Output format">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 <option value="6" selected="True">Tabular (standard 12 columns)</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 <option value="ext">Tabular (extended 24 columns)</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 <option value="5">BLAST XML</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 <option value="0">Pairwise text</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 <option value="0 -html">Pairwise HTML</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 <option value="2">Query-anchored text</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 <option value="2 -html">Query-anchored HTML</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 <option value="4">Flat query-anchored text</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 <option value="4 -html">Flat query-anchored HTML</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 <!--
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 <conditional name="adv_opts">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 <param name="adv_opts_selector" type="select" label="Advanced Options">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 <option value="basic" selected="True">Hide Advanced Options</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 <option value="advanced">Show Advanced Options</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 <when value="basic" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 <when value="advanced">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 <!-- Could use a select (yes, no, other) where other allows setting 'level window linker' -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 <param name="filter_query" type="boolean" label="Filter out low complexity regions (with DUST)" truevalue="-dust yes" falsevalue="-dust no" checked="true" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 <param name="strand" type="select" label="Query strand(s) to search against database/subject">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 <option value="-strand both">Both</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 <option value="-strand plus">Plus (forward)</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 <option value="-strand minus">Minus (reverse complement)</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 <validator type="in_range" min="0" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 <!-- I'd like word_size to be optional, with minimum 4 for blastn -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 4.">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 <validator type="in_range" min="0" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 </inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 <outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 <data name="output1" format="tabular" label="${blast_type.value_label} on ${db_opts.db_opts_selector}">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 <change_format>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 <when input="out_format" value="0" format="txt"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 <when input="out_format" value="0 -html" format="html"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 <when input="out_format" value="2" format="txt"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 <when input="out_format" value="2 -html" format="html"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 <when input="out_format" value="4" format="txt"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 <when input="out_format" value="4 -html" format="html"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 <when input="out_format" value="5" format="blastxml"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 </change_format>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 </data>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 </outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 <requirements>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 <requirement type="binary">blastn</requirement>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 </requirements>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 <help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 .. class:: warningmark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 **Note**. Database searches may take a substantial amount of time.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 For large input datasets it is advisable to allow overnight processing.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 **What it does**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141 Search a *nucleotide database* using a *nucleotide query*,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 using the NCBI BLAST+ blastn command line tool.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143 Algorithms include blastn, megablast, and discontiguous megablast.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 **Output format**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 Because Galaxy focuses on processing tabular data, the default output of this
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150 tool is tabular. The standard BLAST+ tabular output contains 12 columns:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152 ====== ========= ============================================
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153 Column NCBI name Description
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154 ------ --------- --------------------------------------------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155 1 qseqid Query Seq-id (ID of your sequence)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156 2 sseqid Subject Seq-id (ID of the database hit)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157 3 pident Percentage of identical matches
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 4 length Alignment length
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159 5 mismatch Number of mismatches
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160 6 gapopen Number of gap openings
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 7 qstart Start of alignment in query
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162 8 qend End of alignment in query
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163 9 sstart Start of alignment in subject (database hit)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164 10 send End of alignment in subject (database hit)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165 11 evalue Expectation value (E-value)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166 12 bitscore Bit score
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167 ====== ========= ============================================
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
168
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
169 The BLAST+ tools can optionally output additional columns of information,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
170 but this takes longer to calculate. Most (but not all) of these columns are
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
171 included by selecting the extended tabular output. The extra columns are
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
172 included *after* the standard 12 columns. This is so that you can write
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
173 workflow filtering steps that accept either the 12 or 24 column tabular
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
174 BLAST output.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
175
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
176 ====== ============= ===========================================
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
177 Column NCBI name Description
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
178 ------ ------------- -------------------------------------------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
179 13 sallseqid All subject Seq-id(s), separated by a ';'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
180 14 score Raw score
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
181 15 nident Number of identical matches
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
182 16 positive Number of positive-scoring matches
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
183 17 gaps Total number of gaps
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
184 18 ppos Percentage of positive-scoring matches
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
185 19 qframe Query frame
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
186 20 sframe Subject frame
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
187 21 qseq Aligned part of query sequence
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
188 22 sseq Aligned part of subject sequence
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
189 23 qlen Query sequence length
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
190 24 slen Subject sequence length
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
191 ====== ============= ===========================================
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
192
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
193 The third option is BLAST XML output, which is designed to be parsed by
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
194 another program, and is understood by some Galaxy tools.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
195
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
196 You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
197 The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
198 The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
199 The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
200 and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
201
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
202 -------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
203
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
204 **References**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
205
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
206 Zhang et al. A Greedy Algorithm for Aligning DNA Sequences. 2000. JCB: 203-214.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
207
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
208 </help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
209 </tool>