annotate ncbi_blastp_wrapper.xml @ 0:d375502056f1 draft

Uploaded
author devteam
date Fri, 17 Aug 2012 09:11:43 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d375502056f1 Uploaded
devteam
parents:
diff changeset
1 <tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.11">
d375502056f1 Uploaded
devteam
parents:
diff changeset
2 <description>Search protein database with protein query sequence(s)</description>
d375502056f1 Uploaded
devteam
parents:
diff changeset
3 <!-- If job splitting is enabled, break up the query file into four -->
d375502056f1 Uploaded
devteam
parents:
diff changeset
4 <parallelism method="multi" split_inputs="query" split_mode="number_of_parts" split_size="4" shared_inputs="subject" merge_outputs="output1"></parallelism>
d375502056f1 Uploaded
devteam
parents:
diff changeset
5 <version_command>blastp -version</version_command>
d375502056f1 Uploaded
devteam
parents:
diff changeset
6 <command interpreter="python">hide_stderr.py
d375502056f1 Uploaded
devteam
parents:
diff changeset
7 ## The command is a Cheetah template which allows some Python based syntax.
d375502056f1 Uploaded
devteam
parents:
diff changeset
8 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
d375502056f1 Uploaded
devteam
parents:
diff changeset
9 blastp
d375502056f1 Uploaded
devteam
parents:
diff changeset
10 -query "$query"
d375502056f1 Uploaded
devteam
parents:
diff changeset
11 #if $db_opts.db_opts_selector == "db":
d375502056f1 Uploaded
devteam
parents:
diff changeset
12 -db "${db_opts.database.fields.path}"
d375502056f1 Uploaded
devteam
parents:
diff changeset
13 #else:
d375502056f1 Uploaded
devteam
parents:
diff changeset
14 -subject "$db_opts.subject"
d375502056f1 Uploaded
devteam
parents:
diff changeset
15 #end if
d375502056f1 Uploaded
devteam
parents:
diff changeset
16 -task $blast_type
d375502056f1 Uploaded
devteam
parents:
diff changeset
17 -evalue $evalue_cutoff
d375502056f1 Uploaded
devteam
parents:
diff changeset
18 -out $output1
d375502056f1 Uploaded
devteam
parents:
diff changeset
19 ##Set the extended list here so if/when we add things, saved workflows are not affected
d375502056f1 Uploaded
devteam
parents:
diff changeset
20 #if str($out_format)=="ext":
d375502056f1 Uploaded
devteam
parents:
diff changeset
21 -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
d375502056f1 Uploaded
devteam
parents:
diff changeset
22 #else:
d375502056f1 Uploaded
devteam
parents:
diff changeset
23 -outfmt $out_format
d375502056f1 Uploaded
devteam
parents:
diff changeset
24 #end if
d375502056f1 Uploaded
devteam
parents:
diff changeset
25 -num_threads 8
d375502056f1 Uploaded
devteam
parents:
diff changeset
26 #if $adv_opts.adv_opts_selector=="advanced":
d375502056f1 Uploaded
devteam
parents:
diff changeset
27 $adv_opts.filter_query
d375502056f1 Uploaded
devteam
parents:
diff changeset
28 -matrix $adv_opts.matrix
d375502056f1 Uploaded
devteam
parents:
diff changeset
29 ## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
d375502056f1 Uploaded
devteam
parents:
diff changeset
30 ## Note -max_target_seqs overrides -num_descriptions and -num_alignments
d375502056f1 Uploaded
devteam
parents:
diff changeset
31 #if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
d375502056f1 Uploaded
devteam
parents:
diff changeset
32 -max_target_seqs $adv_opts.max_hits
d375502056f1 Uploaded
devteam
parents:
diff changeset
33 #end if
d375502056f1 Uploaded
devteam
parents:
diff changeset
34 #if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
d375502056f1 Uploaded
devteam
parents:
diff changeset
35 -word_size $adv_opts.word_size
d375502056f1 Uploaded
devteam
parents:
diff changeset
36 #end if
d375502056f1 Uploaded
devteam
parents:
diff changeset
37 ##Ungapped disabled for now - see comments below
d375502056f1 Uploaded
devteam
parents:
diff changeset
38 ##$adv_opts.ungapped
d375502056f1 Uploaded
devteam
parents:
diff changeset
39 $adv_opts.parse_deflines
d375502056f1 Uploaded
devteam
parents:
diff changeset
40 ## End of advanced options:
d375502056f1 Uploaded
devteam
parents:
diff changeset
41 #end if
d375502056f1 Uploaded
devteam
parents:
diff changeset
42 </command>
d375502056f1 Uploaded
devteam
parents:
diff changeset
43 <inputs>
d375502056f1 Uploaded
devteam
parents:
diff changeset
44 <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/>
d375502056f1 Uploaded
devteam
parents:
diff changeset
45 <conditional name="db_opts">
d375502056f1 Uploaded
devteam
parents:
diff changeset
46 <param name="db_opts_selector" type="select" label="Subject database/sequences">
d375502056f1 Uploaded
devteam
parents:
diff changeset
47 <option value="db" selected="True">BLAST Database</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
48 <option value="file">FASTA file</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
49 </param>
d375502056f1 Uploaded
devteam
parents:
diff changeset
50 <when value="db">
d375502056f1 Uploaded
devteam
parents:
diff changeset
51 <param name="database" type="select" label="Protein BLAST database">
d375502056f1 Uploaded
devteam
parents:
diff changeset
52 <options from_file="blastdb_p.loc">
d375502056f1 Uploaded
devteam
parents:
diff changeset
53 <column name="value" index="0"/>
d375502056f1 Uploaded
devteam
parents:
diff changeset
54 <column name="name" index="1"/>
d375502056f1 Uploaded
devteam
parents:
diff changeset
55 <column name="path" index="2"/>
d375502056f1 Uploaded
devteam
parents:
diff changeset
56 </options>
d375502056f1 Uploaded
devteam
parents:
diff changeset
57 </param>
d375502056f1 Uploaded
devteam
parents:
diff changeset
58 <param name="subject" type="hidden" value="" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
59 </when>
d375502056f1 Uploaded
devteam
parents:
diff changeset
60 <when value="file">
d375502056f1 Uploaded
devteam
parents:
diff changeset
61 <param name="database" type="hidden" value="" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
62 <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/>
d375502056f1 Uploaded
devteam
parents:
diff changeset
63 </when>
d375502056f1 Uploaded
devteam
parents:
diff changeset
64 </conditional>
d375502056f1 Uploaded
devteam
parents:
diff changeset
65 <param name="blast_type" type="select" display="radio" label="Type of BLAST">
d375502056f1 Uploaded
devteam
parents:
diff changeset
66 <option value="blastp">blastp</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
67 <option value="blastp-short">blastp-short</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
68 </param>
d375502056f1 Uploaded
devteam
parents:
diff changeset
69 <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
70 <param name="out_format" type="select" label="Output format">
d375502056f1 Uploaded
devteam
parents:
diff changeset
71 <option value="6" selected="True">Tabular (standard 12 columns)</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
72 <option value="ext">Tabular (extended 24 columns)</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
73 <option value="5">BLAST XML</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
74 <option value="0">Pairwise text</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
75 <option value="0 -html">Pairwise HTML</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
76 <option value="2">Query-anchored text</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
77 <option value="2 -html">Query-anchored HTML</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
78 <option value="4">Flat query-anchored text</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
79 <option value="4 -html">Flat query-anchored HTML</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
80 <!--
d375502056f1 Uploaded
devteam
parents:
diff changeset
81 <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
82 -->
d375502056f1 Uploaded
devteam
parents:
diff changeset
83 </param>
d375502056f1 Uploaded
devteam
parents:
diff changeset
84 <conditional name="adv_opts">
d375502056f1 Uploaded
devteam
parents:
diff changeset
85 <param name="adv_opts_selector" type="select" label="Advanced Options">
d375502056f1 Uploaded
devteam
parents:
diff changeset
86 <option value="basic" selected="True">Hide Advanced Options</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
87 <option value="advanced">Show Advanced Options</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
88 </param>
d375502056f1 Uploaded
devteam
parents:
diff changeset
89 <when value="basic" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
90 <when value="advanced">
d375502056f1 Uploaded
devteam
parents:
diff changeset
91 <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
d375502056f1 Uploaded
devteam
parents:
diff changeset
92 <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
93 <param name="matrix" type="select" label="Scoring matrix">
d375502056f1 Uploaded
devteam
parents:
diff changeset
94 <option value="BLOSUM90">BLOSUM90</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
95 <option value="BLOSUM80">BLOSUM80</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
96 <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
97 <option value="BLOSUM50">BLOSUM50</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
98 <option value="BLOSUM45">BLOSUM45</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
99 <option value="PAM250">PAM250</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
100 <option value="PAM70">PAM70</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
101 <option value="PAM30">PAM30</option>
d375502056f1 Uploaded
devteam
parents:
diff changeset
102 </param>
d375502056f1 Uploaded
devteam
parents:
diff changeset
103 <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
d375502056f1 Uploaded
devteam
parents:
diff changeset
104 <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
d375502056f1 Uploaded
devteam
parents:
diff changeset
105 <validator type="in_range" min="0" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
106 </param>
d375502056f1 Uploaded
devteam
parents:
diff changeset
107 <!-- I'd like word_size to be optional, with minimum 2 for blastp -->
d375502056f1 Uploaded
devteam
parents:
diff changeset
108 <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
d375502056f1 Uploaded
devteam
parents:
diff changeset
109 <validator type="in_range" min="0" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
110 </param>
d375502056f1 Uploaded
devteam
parents:
diff changeset
111 <!--
d375502056f1 Uploaded
devteam
parents:
diff changeset
112 Can't use '-ungapped' on its own, error back is:
d375502056f1 Uploaded
devteam
parents:
diff changeset
113 Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search
d375502056f1 Uploaded
devteam
parents:
diff changeset
114 Tried using '-ungapped -comp_based_stats F' and blastp crashed with 'Attempt to access NULL pointer.'
d375502056f1 Uploaded
devteam
parents:
diff changeset
115 <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
116 -->
d375502056f1 Uploaded
devteam
parents:
diff changeset
117 <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
d375502056f1 Uploaded
devteam
parents:
diff changeset
118 </when>
d375502056f1 Uploaded
devteam
parents:
diff changeset
119 </conditional>
d375502056f1 Uploaded
devteam
parents:
diff changeset
120 </inputs>
d375502056f1 Uploaded
devteam
parents:
diff changeset
121 <outputs>
d375502056f1 Uploaded
devteam
parents:
diff changeset
122 <data name="output1" format="tabular" label="${blast_type.value_label} on ${db_opts.db_opts_selector}">
d375502056f1 Uploaded
devteam
parents:
diff changeset
123 <change_format>
d375502056f1 Uploaded
devteam
parents:
diff changeset
124 <when input="out_format" value="0" format="txt"/>
d375502056f1 Uploaded
devteam
parents:
diff changeset
125 <when input="out_format" value="0 -html" format="html"/>
d375502056f1 Uploaded
devteam
parents:
diff changeset
126 <when input="out_format" value="2" format="txt"/>
d375502056f1 Uploaded
devteam
parents:
diff changeset
127 <when input="out_format" value="2 -html" format="html"/>
d375502056f1 Uploaded
devteam
parents:
diff changeset
128 <when input="out_format" value="4" format="txt"/>
d375502056f1 Uploaded
devteam
parents:
diff changeset
129 <when input="out_format" value="4 -html" format="html"/>
d375502056f1 Uploaded
devteam
parents:
diff changeset
130 <when input="out_format" value="5" format="blastxml"/>
d375502056f1 Uploaded
devteam
parents:
diff changeset
131 </change_format>
d375502056f1 Uploaded
devteam
parents:
diff changeset
132 </data>
d375502056f1 Uploaded
devteam
parents:
diff changeset
133 </outputs>
d375502056f1 Uploaded
devteam
parents:
diff changeset
134 <requirements>
d375502056f1 Uploaded
devteam
parents:
diff changeset
135 <requirement type="package" version="2.2.26+">blast+</requirement>
d375502056f1 Uploaded
devteam
parents:
diff changeset
136 </requirements>
d375502056f1 Uploaded
devteam
parents:
diff changeset
137 <tests>
d375502056f1 Uploaded
devteam
parents:
diff changeset
138 <test>
d375502056f1 Uploaded
devteam
parents:
diff changeset
139 <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
140 <param name="db_opts_selector" value="file" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
141 <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
142 <param name="database" value="" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
143 <param name="evalue_cutoff" value="1e-8" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
144 <param name="blast_type" value="blastp" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
145 <param name="out_format" value="5" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
146 <param name="adv_opts_selector" value="advanced" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
147 <param name="filter_query" value="False" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
148 <param name="matrix" value="BLOSUM62" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
149 <param name="max_hits" value="0" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
150 <param name="word_size" value="0" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
151 <param name="parse_deflines" value="True" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
152 <output name="output1" file="blastp_four_human_vs_rhodopsin.xml" ftype="blastxml" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
153 </test>
d375502056f1 Uploaded
devteam
parents:
diff changeset
154 <test>
d375502056f1 Uploaded
devteam
parents:
diff changeset
155 <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
156 <param name="db_opts_selector" value="file" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
157 <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
158 <param name="database" value="" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
159 <param name="evalue_cutoff" value="1e-8" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
160 <param name="blast_type" value="blastp" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
161 <param name="out_format" value="6" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
162 <param name="adv_opts_selector" value="advanced" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
163 <param name="filter_query" value="False" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
164 <param name="matrix" value="BLOSUM62" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
165 <param name="max_hits" value="0" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
166 <param name="word_size" value="0" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
167 <param name="parse_deflines" value="True" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
168 <output name="output1" file="blastp_four_human_vs_rhodopsin.tabular" ftype="tabular" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
169 </test>
d375502056f1 Uploaded
devteam
parents:
diff changeset
170 <test>
d375502056f1 Uploaded
devteam
parents:
diff changeset
171 <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
172 <param name="db_opts_selector" value="file" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
173 <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
174 <param name="database" value="" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
175 <param name="evalue_cutoff" value="1e-8" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
176 <param name="blast_type" value="blastp" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
177 <param name="out_format" value="ext" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
178 <param name="adv_opts_selector" value="advanced" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
179 <param name="filter_query" value="False" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
180 <param name="matrix" value="BLOSUM62" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
181 <param name="max_hits" value="0" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
182 <param name="word_size" value="0" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
183 <param name="parse_deflines" value="True" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
184 <output name="output1" file="blastp_four_human_vs_rhodopsin_ext.tabular" ftype="tabular" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
185 </test>
d375502056f1 Uploaded
devteam
parents:
diff changeset
186 <test>
d375502056f1 Uploaded
devteam
parents:
diff changeset
187 <param name="query" value="rhodopsin_proteins.fasta" ftype="fasta" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
188 <param name="db_opts_selector" value="file" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
189 <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
190 <param name="database" value="" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
191 <param name="evalue_cutoff" value="1e-8" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
192 <param name="blast_type" value="blastp" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
193 <param name="out_format" value="6" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
194 <param name="adv_opts_selector" value="basic" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
195 <output name="output1" file="blastp_rhodopsin_vs_four_human.tabular" ftype="tabular" />
d375502056f1 Uploaded
devteam
parents:
diff changeset
196 </test>
d375502056f1 Uploaded
devteam
parents:
diff changeset
197 </tests>
d375502056f1 Uploaded
devteam
parents:
diff changeset
198 <help>
d375502056f1 Uploaded
devteam
parents:
diff changeset
199
d375502056f1 Uploaded
devteam
parents:
diff changeset
200 .. class:: warningmark
d375502056f1 Uploaded
devteam
parents:
diff changeset
201
d375502056f1 Uploaded
devteam
parents:
diff changeset
202 **Note**. Database searches may take a substantial amount of time.
d375502056f1 Uploaded
devteam
parents:
diff changeset
203 For large input datasets it is advisable to allow overnight processing.
d375502056f1 Uploaded
devteam
parents:
diff changeset
204
d375502056f1 Uploaded
devteam
parents:
diff changeset
205 -----
d375502056f1 Uploaded
devteam
parents:
diff changeset
206
d375502056f1 Uploaded
devteam
parents:
diff changeset
207 **What it does**
d375502056f1 Uploaded
devteam
parents:
diff changeset
208
d375502056f1 Uploaded
devteam
parents:
diff changeset
209 Search a *protein database* using a *protein query*,
d375502056f1 Uploaded
devteam
parents:
diff changeset
210 using the NCBI BLAST+ blastp command line tool.
d375502056f1 Uploaded
devteam
parents:
diff changeset
211
d375502056f1 Uploaded
devteam
parents:
diff changeset
212 -----
d375502056f1 Uploaded
devteam
parents:
diff changeset
213
d375502056f1 Uploaded
devteam
parents:
diff changeset
214 **Output format**
d375502056f1 Uploaded
devteam
parents:
diff changeset
215
d375502056f1 Uploaded
devteam
parents:
diff changeset
216 Because Galaxy focuses on processing tabular data, the default output of this
d375502056f1 Uploaded
devteam
parents:
diff changeset
217 tool is tabular. The standard BLAST+ tabular output contains 12 columns:
d375502056f1 Uploaded
devteam
parents:
diff changeset
218
d375502056f1 Uploaded
devteam
parents:
diff changeset
219 ====== ========= ============================================
d375502056f1 Uploaded
devteam
parents:
diff changeset
220 Column NCBI name Description
d375502056f1 Uploaded
devteam
parents:
diff changeset
221 ------ --------- --------------------------------------------
d375502056f1 Uploaded
devteam
parents:
diff changeset
222 1 qseqid Query Seq-id (ID of your sequence)
d375502056f1 Uploaded
devteam
parents:
diff changeset
223 2 sseqid Subject Seq-id (ID of the database hit)
d375502056f1 Uploaded
devteam
parents:
diff changeset
224 3 pident Percentage of identical matches
d375502056f1 Uploaded
devteam
parents:
diff changeset
225 4 length Alignment length
d375502056f1 Uploaded
devteam
parents:
diff changeset
226 5 mismatch Number of mismatches
d375502056f1 Uploaded
devteam
parents:
diff changeset
227 6 gapopen Number of gap openings
d375502056f1 Uploaded
devteam
parents:
diff changeset
228 7 qstart Start of alignment in query
d375502056f1 Uploaded
devteam
parents:
diff changeset
229 8 qend End of alignment in query
d375502056f1 Uploaded
devteam
parents:
diff changeset
230 9 sstart Start of alignment in subject (database hit)
d375502056f1 Uploaded
devteam
parents:
diff changeset
231 10 send End of alignment in subject (database hit)
d375502056f1 Uploaded
devteam
parents:
diff changeset
232 11 evalue Expectation value (E-value)
d375502056f1 Uploaded
devteam
parents:
diff changeset
233 12 bitscore Bit score
d375502056f1 Uploaded
devteam
parents:
diff changeset
234 ====== ========= ============================================
d375502056f1 Uploaded
devteam
parents:
diff changeset
235
d375502056f1 Uploaded
devteam
parents:
diff changeset
236 The BLAST+ tools can optionally output additional columns of information,
d375502056f1 Uploaded
devteam
parents:
diff changeset
237 but this takes longer to calculate. Most (but not all) of these columns are
d375502056f1 Uploaded
devteam
parents:
diff changeset
238 included by selecting the extended tabular output. The extra columns are
d375502056f1 Uploaded
devteam
parents:
diff changeset
239 included *after* the standard 12 columns. This is so that you can write
d375502056f1 Uploaded
devteam
parents:
diff changeset
240 workflow filtering steps that accept either the 12 or 24 column tabular
d375502056f1 Uploaded
devteam
parents:
diff changeset
241 BLAST output.
d375502056f1 Uploaded
devteam
parents:
diff changeset
242
d375502056f1 Uploaded
devteam
parents:
diff changeset
243 ====== ============= ===========================================
d375502056f1 Uploaded
devteam
parents:
diff changeset
244 Column NCBI name Description
d375502056f1 Uploaded
devteam
parents:
diff changeset
245 ------ ------------- -------------------------------------------
d375502056f1 Uploaded
devteam
parents:
diff changeset
246 13 sallseqid All subject Seq-id(s), separated by a ';'
d375502056f1 Uploaded
devteam
parents:
diff changeset
247 14 score Raw score
d375502056f1 Uploaded
devteam
parents:
diff changeset
248 15 nident Number of identical matches
d375502056f1 Uploaded
devteam
parents:
diff changeset
249 16 positive Number of positive-scoring matches
d375502056f1 Uploaded
devteam
parents:
diff changeset
250 17 gaps Total number of gaps
d375502056f1 Uploaded
devteam
parents:
diff changeset
251 18 ppos Percentage of positive-scoring matches
d375502056f1 Uploaded
devteam
parents:
diff changeset
252 19 qframe Query frame
d375502056f1 Uploaded
devteam
parents:
diff changeset
253 20 sframe Subject frame
d375502056f1 Uploaded
devteam
parents:
diff changeset
254 21 qseq Aligned part of query sequence
d375502056f1 Uploaded
devteam
parents:
diff changeset
255 22 sseq Aligned part of subject sequence
d375502056f1 Uploaded
devteam
parents:
diff changeset
256 23 qlen Query sequence length
d375502056f1 Uploaded
devteam
parents:
diff changeset
257 24 slen Subject sequence length
d375502056f1 Uploaded
devteam
parents:
diff changeset
258 ====== ============= ===========================================
d375502056f1 Uploaded
devteam
parents:
diff changeset
259
d375502056f1 Uploaded
devteam
parents:
diff changeset
260 The third option is BLAST XML output, which is designed to be parsed by
d375502056f1 Uploaded
devteam
parents:
diff changeset
261 another program, and is understood by some Galaxy tools.
d375502056f1 Uploaded
devteam
parents:
diff changeset
262
d375502056f1 Uploaded
devteam
parents:
diff changeset
263 You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
d375502056f1 Uploaded
devteam
parents:
diff changeset
264 The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
d375502056f1 Uploaded
devteam
parents:
diff changeset
265 The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
d375502056f1 Uploaded
devteam
parents:
diff changeset
266 The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
d375502056f1 Uploaded
devteam
parents:
diff changeset
267 and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
d375502056f1 Uploaded
devteam
parents:
diff changeset
268
d375502056f1 Uploaded
devteam
parents:
diff changeset
269 -------
d375502056f1 Uploaded
devteam
parents:
diff changeset
270
d375502056f1 Uploaded
devteam
parents:
diff changeset
271 **References**
d375502056f1 Uploaded
devteam
parents:
diff changeset
272
d375502056f1 Uploaded
devteam
parents:
diff changeset
273 Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
d375502056f1 Uploaded
devteam
parents:
diff changeset
274
d375502056f1 Uploaded
devteam
parents:
diff changeset
275 Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005.
d375502056f1 Uploaded
devteam
parents:
diff changeset
276
d375502056f1 Uploaded
devteam
parents:
diff changeset
277 </help>
d375502056f1 Uploaded
devteam
parents:
diff changeset
278 </tool>