Mercurial > repos > galaxyp > blast_plus_remote_blastp
comparison blast_plus_remote_blastp.xml @ 5:22a767177ac9 draft
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/tools/ncbi_blast_plus commit 8cb8939dadaad8e804e35128cfb7b2560eb4d9b4
author | galaxyp |
---|---|
date | Fri, 20 Jan 2017 16:00:56 -0500 |
parents | |
children | e9975de58321 |
comparison
equal
deleted
inserted
replaced
4:a51980bc0ffe | 5:22a767177ac9 |
---|---|
1 <tool id="blast_plus_remote_blastp" name="NCBI BLAST+ remote blastp" version="2.6.0"> | |
2 <description>Search protein database with protein query sequence(s)</description> | |
3 <!-- If job splitting is enabled, break up the query file into parts --> | |
4 <!-- | |
5 <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="500" merge_outputs="output1" /> | |
6 --> | |
7 <requirements> | |
8 <requirement type="binary">blastp</requirement> | |
9 <requirement type="package" version="2.6.0">blast</requirement> | |
10 </requirements> | |
11 <stdio> | |
12 <exit_code range="1" level="fatal" description="Bad input dataset or BLAST options" /> | |
13 <exit_code range="2" level="fatal" description="Error in BLAST database" /> | |
14 <exit_code range="3" level="fatal" description="Error in BLAST engine" /> | |
15 <exit_code range="4" level="fatal" description="Out of Memory" /> | |
16 <exit_code range="5:" level="fatal" description="Unknown Error" /> | |
17 </stdio> | |
18 <version_command>blastp -version</version_command> | |
19 <command> | |
20 ## The command is a Cheetah template which allows some Python based syntax. | |
21 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces | |
22 blastp | |
23 -query "$query" | |
24 #if $db_opts.db_opts_selector == "db": | |
25 -db "${db_opts.database.fields.path}" | |
26 #elif $db_opts.db_opts_selector == "histdb": | |
27 -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" | |
28 #elif $db_opts.db_opts_selector == "remote": | |
29 -db $db_opts.database | |
30 -remote | |
31 #set $txids = [] | |
32 #set $ntxids = [] | |
33 #for $i, $org in enumerate($db_opts.taxid_repeat): | |
34 #if $org.exclude: | |
35 #set $ntxids = $ntxids + ["txid" + $org.taxid.__str__] | |
36 #else | |
37 #set $txids = $txids + ["txid" + $org.taxid.__str__] | |
38 #end if | |
39 #end for | |
40 #if (len($txids) + len($ntxids)) > 0: | |
41 #set $entrez_query = '' | |
42 #if len($txids) > 0: | |
43 #set $entrez_query = $entrez_query + '(' + ' OR '.join($txids) + ')' | |
44 #end if | |
45 #if len($ntxids) > 0: | |
46 #set $entrez_query = $entrez_query + ' NOT (' + ' OR '.join($ntxids) + ')' | |
47 #end if | |
48 -entrez_query '$entrez_query' | |
49 #end if | |
50 #else: | |
51 -subject "$db_opts.subject" | |
52 #end if | |
53 -task $blast_type | |
54 -evalue $evalue_cutoff | |
55 -out blast_output | |
56 ##Set the extended list here so if/when we add things, saved workflows are not affected | |
57 #if str($fmt_opt.out_format)=="text": | |
58 -outfmt "$fmt_opt.outfmt" $fmt_opt.html $fmt_opt.show_gis | |
59 #if $fmt_opt.num_descriptions.__str__.strip() != '': | |
60 -num_descriptions $fmt_opt.num_descriptions | |
61 #end if | |
62 #if $fmt_opt.num_alignments.__str__.strip() != '': | |
63 -num_alignments $fmt_opt.num_alignments | |
64 #end if | |
65 #elif str($fmt_opt.out_format)=="cols": | |
66 #set cols = (str($fmt_opt.std_cols)+","+str($fmt_opt.ext_cols)+","+str($fmt_opt.ids_cols)+","+str($fmt_opt.misc_cols)+","+str($fmt_opt.tax_cols)).replace("None", "").replace(",,", ",").replace(",", " ").strip() | |
67 -outfmt "6 $cols" | |
68 #if $fmt_opt.max_target_seqs.__str__.strip() != '': | |
69 -max_target_seqs $fmt_opt.max_target_seqs | |
70 #end if | |
71 #else: | |
72 -outfmt "$fmt_opt.outfmt" | |
73 #if $fmt_opt.max_target_seqs.__str__.strip() != '': | |
74 -max_target_seqs $fmt_opt.max_target_seqs | |
75 #end if | |
76 #end if | |
77 #if $db_opts.db_opts_selector != "remote": | |
78 -num_threads 8 | |
79 #end if | |
80 #if $adv_opts.adv_opts_selector=="advanced": | |
81 $adv_opts.filter_query | |
82 -matrix $adv_opts.scoring.matrix | |
83 $adv_opts.scoring.gap_costs | |
84 | |
85 #if $adv_opts.word_size.__str__.strip() != '': | |
86 -word_size $adv_opts.word_size | |
87 #end if | |
88 | |
89 #if $adv_opts.window_size.__str__.strip() != '': | |
90 -window_size $adv_opts.window_size | |
91 #end if | |
92 | |
93 #if $adv_opts.threshold.__str__.strip() != '': | |
94 -threshold $adv_opts.threshold | |
95 #end if | |
96 | |
97 #if $adv_opts.comp_based_stats.__str__.strip() != '': | |
98 -comp_based_stats $adv_opts.comp_based_stats | |
99 #end if | |
100 | |
101 ##Ungapped disabled for now - see comments below | |
102 ##$adv_opts.ungapped | |
103 $adv_opts.use_sw_tback | |
104 $adv_opts.parse_deflines | |
105 ## End of advanced options: | |
106 #end if | |
107 </command> | |
108 <inputs> | |
109 <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> | |
110 <conditional name="db_opts"> | |
111 <param name="db_opts_selector" type="select" label="Subject database/sequences"> | |
112 <option value="db" selected="True">Local BLAST Database</option> | |
113 <option value="histdb">BLAST database from your history</option> | |
114 <option value="file">Local FASTA file</option> | |
115 <option value="remote">NCBI Remote Database</option> | |
116 </param> | |
117 <when value="db"> | |
118 <param name="database" type="select" label="Protein BLAST database"> | |
119 <options from_file="blastdb_p.loc"> | |
120 <column name="value" index="0"/> | |
121 <column name="name" index="1"/> | |
122 <column name="path" index="2"/> | |
123 </options> | |
124 </param> | |
125 <param name="subject" type="hidden" value="" /> | |
126 </when> | |
127 <when value="histdb"> | |
128 <param name="database" type="hidden" value="" /> | |
129 <param name="histdb" type="data" format="blastdbp" label="Protein BLAST database" /> | |
130 <param name="subject" type="hidden" value="" /> | |
131 </when> | |
132 <when value="file"> | |
133 <param name="database" type="hidden" value="" /> | |
134 <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/> | |
135 </when> | |
136 <when value="remote"> | |
137 <param name="database" type="select" label="Protein BLAST database"> | |
138 <option value="nr" selected="True">Non-redundant protein sequences (nr)</option> | |
139 <option value="refseq_protein">Reference proteins (refseq_protein)</option> | |
140 <option value="swissprot">UniProtKB/Swiss-Prot(swissprot)</option> | |
141 <option value="pat">Patented protein sequences(pat)</option> | |
142 <option value="pdb">Protein Data Bank proteins(pdb)</option> | |
143 <option value="env_nr">Metagenomic proteins(env_nr)</option> | |
144 </param> | |
145 <repeat name="taxid_repeat" title="Search Organism Restriction" min="0"> | |
146 <param name="taxid" type="integer" value="" label="NCBI Taxon ID" help="For example: Human is 9606 (see: http://www.ncbi.nlm.nih.gov/taxonomy ) "> | |
147 <validator type="in_range" min="0" /> | |
148 </param> | |
149 <param name="exclude" type="boolean" checked="false" label="Exclude this NCBI Taxon ID" help=""/> | |
150 </repeat> | |
151 </when> | |
152 </conditional> | |
153 <param name="blast_type" type="select" display="radio" label="Type of BLAST"> | |
154 <option value="blastp">blastp - Traditional BLASTP to compare a protein query to a protein database</option> | |
155 <option value="blastp-fast">blastp-fast - Use longer words for seeding, faster but less accurate</option> | |
156 <option value="blastp-short">blastp-short - BLASTP optimized for queries shorter than 30 residues</option> | |
157 </param> | |
158 <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> | |
159 <conditional name="fmt_opt"> | |
160 <param name="out_format" type="select" label="Output format"> | |
161 <option value="tabular" selected="True">Tabular</option> | |
162 <option value="cols">Tabular (select which columns)</option> | |
163 <option value="blastxml">BLAST XML</option> | |
164 <option value="text">Text Report</option> | |
165 </param> | |
166 <when value="tabular"> | |
167 <param name="outfmt" type="select" label="Tabular columns"> | |
168 <option value="6" selected="True">Tabular (standard 12 columns)</option> | |
169 <option value="7">Tabular (standard 12 columns) with comments</option> | |
170 <option value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen">Tabular (extended 24 columns)</option> | |
171 <option value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles">Tabular (extended 25 columns)</option> | |
172 </param> | |
173 <param name="max_target_seqs" type="integer" value="500" optional="true" label="Maximum hits to show" help="Use zero for default limits"> | |
174 <validator type="in_range" min="0" /> | |
175 </param> | |
176 </when> | |
177 <when value="cols"> | |
178 <param name="std_cols" type="select" multiple="true" display="checkboxes" label="Standard columns"> | |
179 <option selected="true" value="qseqid">qseqid = Query Seq-id (ID of your sequence)</option> | |
180 <option selected="true" value="sseqid">sseqid = Subject Seq-id (ID of the database hit)</option> | |
181 <option selected="true" value="pident">pident = Percentage of identical matches</option> | |
182 <option selected="true" value="length">length = Alignment length</option> | |
183 <option selected="true" value="mismatch">mismatch = Number of mismatches</option> | |
184 <option selected="true" value="gapopen">gapopen = Number of gap openings</option> | |
185 <option selected="true" value="qstart">qstart = Start of alignment in query</option> | |
186 <option selected="true" value="qend">qend = End of alignment in query</option> | |
187 <option selected="true" value="sstart">sstart = Start of alignment in subject (database hit)</option> | |
188 <option selected="true" value="send">send = End of alignment in subject (database hit)</option> | |
189 <option selected="true" value="evalue">evalue = Expectation value (E-value)</option> | |
190 <option selected="true" value="bitscore">bitscore = Bit score</option> | |
191 </param> | |
192 <param name="ext_cols" type="select" multiple="true" display="checkboxes" label="Extended columns"> | |
193 <option value="sallseqid">sallseqid = All subject Seq-id(s), separated by a ';'</option> | |
194 <option value="score">score = Raw score</option> | |
195 <option value="nident">nident = Number of identical matches</option> | |
196 <option value="positive">positive = Number of positive-scoring matches</option> | |
197 <option value="gaps">gaps = Total number of gaps</option> | |
198 <option value="ppos">ppos = Percentage of positive-scoring matches</option> | |
199 <option value="qframe">qframe = Query frame</option> | |
200 <option value="sframe">sframe = Subject frame</option> | |
201 <option value="qseq">qseq = Aligned part of query sequence</option> | |
202 <option value="sseq">sseq = Aligned part of subject sequence</option> | |
203 <option value="qlen">qlen = Query sequence length</option> | |
204 <option value="slen">slen = Subject sequence length</option> | |
205 <option value="salltitles">salltitles = All subject title(s), separated by a '<>'</option> | |
206 </param> | |
207 <param name="ids_cols" type="select" multiple="true" display="checkboxes" label="Other identifier columns"> | |
208 <option value="qgi">qgi = Query GI</option> | |
209 <option value="qacc">qacc = Query accesion</option> | |
210 <option value="qaccver">qaccver = Query accesion.version</option> | |
211 <option value="sallseqid">sallseqid = All subject Seq-id(s), separated by a ';'</option> | |
212 <option value="sgi">sgi = Subject GI</option> | |
213 <option value="sallgi">sallgi = All subject GIs</option> | |
214 <option value="sacc">sacc = Subject accession</option> | |
215 <option value="saccver">saccver = Subject accession.version</option> | |
216 <option value="sallacc">sallacc = All subject accessions</option> | |
217 <option value="stitle">stitle = Subject Title</option> | |
218 </param> | |
219 <param name="misc_cols" type="select" multiple="true" display="checkboxes" label="Miscellaneous columns"> | |
220 <option value="sstrand">sstrand = Subject Strand</option> | |
221 <!-- Is it really worth including 'frames' given have 'qframe' and 'sframe'? --> | |
222 <option value="frames">frames = Query and subject frames separated by a '/'</option> | |
223 <option value="btop">btop = Blast traceback operations (BTOP)</option> | |
224 <option value="qcovs">qcovs = Query Coverage Per Subject</option> | |
225 <option value="qcovhsp">qcovhsp = Query Coverage Per HSP</option> | |
226 </param> | |
227 <param name="tax_cols" type="select" multiple="true" display="checkboxes" label="Taxonomy columns"> | |
228 <option value="staxids">staxids = unique Subject Taxonomy ID(s), separated by a ';' (in numerical order)</option> | |
229 <!-- TODO, how to handle the taxonomy data file dependency? If missing these give N/A --> | |
230 <option value="sscinames">sscinames = unique Subject Scientific Name(s), separated by a ';'</option> | |
231 <option value="scomnames">scomnames = unique Subject Common Name(s), separated by a ';'</option> | |
232 <option value="sblastnames">sblastnames = unique Subject Blast Name(s), separated by a ';' (in alphabetical order)</option> | |
233 <option value="sskingdoms">sskingdoms = unique Subject Super Kingdom(s), separated by a ';' (in alphabetical order)</option> | |
234 </param> | |
235 <param name="max_target_seqs" type="integer" value="500" optional="true" label="Maximum hits to show" help="Use zero for default limits"> | |
236 <validator type="in_range" min="0" /> | |
237 </param> | |
238 </when> | |
239 <when value="blastxml"> | |
240 <param name="outfmt" type="hidden" value="5"/> | |
241 <param name="max_target_seqs" type="integer" value="500" optional="true" label="Maximum hits to show" help="Use zero for default limits"> | |
242 <validator type="in_range" min="0" /> | |
243 </param> | |
244 </when> | |
245 <when value="text"> | |
246 <param name="outfmt" type="select" label="Text format"> | |
247 <option value="0">Pairwise text</option> | |
248 <option value="1">Query-anchored text showing identitites</option> | |
249 <option value="2">Query-anchored text</option> | |
250 <option value="3">Flat query-anchored text showing identitites</option> | |
251 <option value="4">Flat query-anchored text</option> | |
252 </param> | |
253 <param name="html" type="boolean" label="Html" truevalue="-html" falsevalue="" checked="true" /> | |
254 <param name="show_gis" type="boolean" label="NCBI-gis" truevalue="-show_gis" falsevalue="" checked="false" | |
255 help="Show the NCBI gis in the Subject def lines"/> | |
256 <param name="num_descriptions" type="integer" value="500" optional="true" label="Maximum Decriptions to show" help="Show one-line descriptions for this number of database sequences."> | |
257 <validator type="in_range" min="0" /> | |
258 </param> | |
259 <param name="num_alignments" type="integer" value="250" optional="true" label="Maximum alignments to show" help="Show alignments for this number of database sequences."> | |
260 <validator type="in_range" min="0" /> | |
261 </param> | |
262 </when> | |
263 </conditional> | |
264 <conditional name="adv_opts"> | |
265 <param name="adv_opts_selector" type="select" label="Advanced Options"> | |
266 <option value="basic" selected="True">Hide Advanced Options</option> | |
267 <option value="advanced">Show Advanced Options</option> | |
268 </param> | |
269 <when value="basic" /> | |
270 <when value="advanced"> | |
271 <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" /> | |
272 <conditional name="scoring"> | |
273 <param name="matrix" type="select" label="Scoring matrix"> | |
274 <option value="BLOSUM90">BLOSUM90</option> | |
275 <option value="BLOSUM80">BLOSUM80</option> | |
276 <option value="BLOSUM62" selected="True">BLOSUM62 (default)</option> | |
277 <option value="BLOSUM50">BLOSUM50</option> | |
278 <option value="BLOSUM45">BLOSUM45</option> | |
279 <option value="PAM250">PAM250</option> | |
280 <option value="PAM70">PAM70</option> | |
281 <option value="PAM30">PAM30</option> | |
282 </param> | |
283 <when value="BLOSUM90"> | |
284 <param name="gap_costs" type="select" label="Gap Costs"> | |
285 <option value="">Use Defaults</option> | |
286 <option value="-gapopen 9 -gapextend 2">Existense: 9 Extension: 2</option> | |
287 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option> | |
288 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> | |
289 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> | |
290 <option value="-gapopen 11 -gapextend 1">Existense: 11 Extension: 1</option> | |
291 <option value="-gapopen 10 -gapextend 1" selected="True">Existense: 10 Extension: 1 (default)</option> | |
292 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option> | |
293 </param> | |
294 | |
295 </when> | |
296 <when value="BLOSUM80"> | |
297 <param name="gap_costs" type="select" label="Gap Costs"> | |
298 <option value="">Use Defaults</option> | |
299 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option> | |
300 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> | |
301 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> | |
302 <option value="-gapopen 11 -gapextend 1">Existense: 11 Extension: 1</option> | |
303 <option value="-gapopen 10 -gapextend 1" selected="True">Existense: 10 Extension: 1 (default)</option> | |
304 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option> | |
305 </param> | |
306 </when> | |
307 <when value="BLOSUM62"> | |
308 <param name="gap_costs" type="select" label="Gap Costs"> | |
309 <option value="">Use Defaults</option> | |
310 <option value="-gapopen 11 -gapextend 2">Existense: 11 Extension: 2</option> | |
311 <option value="-gapopen 10 -gapextend 2">Existense: 10 Extension: 2</option> | |
312 <option value="-gapopen 9 -gapextend 2">Existense: 9 Extension: 2</option> | |
313 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option> | |
314 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> | |
315 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> | |
316 <option value="-gapopen 13 -gapextend 1">Existense: 13 Extension: 1</option> | |
317 <option value="-gapopen 12 -gapextend 1">Existense: 12 Extension: 1</option> | |
318 <option value="-gapopen 11 -gapextend 1" selected="True">Existense: 11 Extension: 1 (default)</option> | |
319 <option value="-gapopen 10 -gapextend 1">Existense: 10 Extension: 1</option> | |
320 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option> | |
321 </param> | |
322 | |
323 </when> | |
324 <when value="BLOSUM50"> | |
325 <param name="gap_costs" type="select" label="Gap Costs"> | |
326 <option value="">Use Defaults</option> | |
327 <option value="-gapopen 13 -gapextend 3">Existense: 13 Extension: 3</option> | |
328 <option value="-gapopen 12 -gapextend 3">Existense: 12 Extension: 3</option> | |
329 <option value="-gapopen 11 -gapextend 3">Existense: 11 Extension: 3</option> | |
330 <option value="-gapopen 10 -gapextend 3">Existense: 10 Extension: 3</option> | |
331 <option value="-gapopen 9 -gapextend 3">Existense: 9 Extension: 3</option> | |
332 <option value="-gapopen 16 -gapextend 2">Existense: 16 Extension: 2</option> | |
333 <option value="-gapopen 15 -gapextend 2">Existense: 15 Extension: 2</option> | |
334 <option value="-gapopen 14 -gapextend 2">Existense: 14 Extension: 2</option> | |
335 <option value="-gapopen 13 -gapextend 2" selected="True">Existense: 13 Extension: 2 (default)</option> | |
336 <option value="-gapopen 12 -gapextend 2">Existense: 12 Extension: 2</option> | |
337 <option value="-gapopen 19 -gapextend 1">Existense: 19 Extension: 1</option> | |
338 <option value="-gapopen 18 -gapextend 1">Existense: 18 Extension: 1</option> | |
339 <option value="-gapopen 17 -gapextend 1">Existense: 17 Extension: 1</option> | |
340 <option value="-gapopen 16 -gapextend 1">Existense: 16 Extension: 1</option> | |
341 <option value="-gapopen 15 -gapextend 1">Existense: 15 Extension: 1</option> | |
342 </param> | |
343 | |
344 </when> | |
345 <when value="BLOSUM45"> | |
346 <param name="gap_costs" type="select" label="Gap Costs"> | |
347 <option value="">Use Defaults</option> | |
348 <option value="-gapopen 13 -gapextend 3">Existense: 13 Extension: 3</option> | |
349 <option value="-gapopen 12 -gapextend 3">Existense: 12 Extension: 3</option> | |
350 <option value="-gapopen 11 -gapextend 3">Existense: 11 Extension: 3</option> | |
351 <option value="-gapopen 10 -gapextend 3">Existense: 10 Extension: 3</option> | |
352 <option value="-gapopen 15 -gapextend 2" selected="True">Existense: 15 Extension: 2 (default)</option> | |
353 <option value="-gapopen 14 -gapextend 2">Existense: 14 Extension: 2</option> | |
354 <option value="-gapopen 13 -gapextend 2">Existense: 13 Extension: 2</option> | |
355 <option value="-gapopen 12 -gapextend 2">Existense: 12 Extension: 2</option> | |
356 <option value="-gapopen 19 -gapextend 1">Existense: 19 Extension: 1</option> | |
357 <option value="-gapopen 18 -gapextend 1">Existense: 18 Extension: 1</option> | |
358 <option value="-gapopen 17 -gapextend 1">Existense: 17 Extension: 1</option> | |
359 <option value="-gapopen 16 -gapextend 1">Existense: 16 Extension: 1</option> | |
360 </param> | |
361 </when> | |
362 <when value="PAM250"> | |
363 <param name="gap_costs" type="select" label="Gap Costs"> | |
364 <option value="">Use Defaults</option> | |
365 <option value="-gapopen 15 -gapextend 3">Existense: 15 Extension: 3</option> | |
366 <option value="-gapopen 14 -gapextend 3">Existense: 14 Extension: 3</option> | |
367 <option value="-gapopen 13 -gapextend 3">Existense: 13 Extension: 3</option> | |
368 <option value="-gapopen 12 -gapextend 3">Existense: 12 Extension: 3</option> | |
369 <option value="-gapopen 17 -gapextend 2">Existense: 17 Extension: 2</option> | |
370 <option value="-gapopen 16 -gapextend 2">Existense: 16 Extension: 2</option> | |
371 <option value="-gapopen 15 -gapextend 2">Existense: 15 Extension: 2</option> | |
372 <option value="-gapopen 14 -gapextend 2" selected="True">Existense: 14 Extension: 2 (default)</option> | |
373 <option value="-gapopen 13 -gapextend 2">Existense: 13 Extension: 2</option> | |
374 <option value="-gapopen 21 -gapextend 1">Existense: 21 Extension: 1</option> | |
375 <option value="-gapopen 20 -gapextend 1">Existense: 20 Extension: 1</option> | |
376 <option value="-gapopen 19 -gapextend 1">Existense: 19 Extension: 1</option> | |
377 <option value="-gapopen 18 -gapextend 1">Existense: 18 Extension: 1</option> | |
378 <option value="-gapopen 17 -gapextend 1">Existense: 17 Extension: 1</option> | |
379 </param> | |
380 | |
381 </when> | |
382 <when value="PAM70"> | |
383 <param name="gap_costs" type="select" label="Gap Costs"> | |
384 <option value="">Use Defaults</option> | |
385 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option> | |
386 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> | |
387 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> | |
388 <option value="-gapopen 11 -gapextend 1">Existense: 11 Extension: 1</option> | |
389 <option value="-gapopen 10 -gapextend 1" selected="True">Existense: 10 Extension: 1 (default)</option> | |
390 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option> | |
391 </param> | |
392 | |
393 </when> | |
394 <when value="PAM30"> | |
395 <param name="gap_costs" type="select" label="Gap Costs"> | |
396 <option value="">Use Defaults</option> | |
397 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> | |
398 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> | |
399 <option value="-gapopen 5 -gapextend 2">Existense: 5 Extension: 2</option> | |
400 <option value="-gapopen 10 -gapextend 1">Existense: 10 Extension: 1</option> | |
401 <option value="-gapopen 9 -gapextend 1" selected="True">Existense: 9 Extension: 1 (default)</option> | |
402 <option value="-gapopen 8 -gapextend 1">Existense: 8 Extension: 1</option> | |
403 </param> | |
404 </when> | |
405 </conditional> | |
406 | |
407 <param name="word_size" type="integer" value="" optional="true" label="Word size for wordfinder algorithm" help="Recommended: blastp 3, blastp-short 2"> | |
408 <validator type="in_range" min="2" /> | |
409 </param> | |
410 <param name="window_size" type="integer" value="" optional="true" label="Multiple hits window size, use 0 to specify 1-hit algorithm." help="Recommended: blastp 40, blastp-short 15"> | |
411 <validator type="in_range" min="0" /> | |
412 </param> | |
413 <param name="threshold" type="integer" value="" optional="true" label="Threshold: Minimum score to add a word to the BLAST lookup table." help="Recommended: blastp 11, blastp-short 16"> | |
414 <validator type="in_range" min="1" /> | |
415 </param> | |
416 <param name="comp_based_stats" type="select" optional="true" label="Use composition-based statistics" | |
417 help="Recommended: blastp: 2 blastp-short: 0"> | |
418 <option value="">Leave Unspecified</option> | |
419 <option value="0">0 or F (No composition-based statistics)</option> | |
420 <option value="1">1 (Composition-based statistics as in NAR 29:2994-3005, 2001)</option> | |
421 <option value="2">2, T, or D (Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence properties)</option> | |
422 <option value="3">3 (Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, unconditionally)</option> | |
423 </param> | |
424 | |
425 <param name="use_sw_tback" type="boolean" label="Compute locally optimal Smith-Waterman alignments?" truevalue="-use_sw_tback" falsevalue="" checked="false" /> | |
426 | |
427 <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/> | |
428 </when> | |
429 </conditional> | |
430 </inputs> | |
431 <outputs> | |
432 <data name="output_tabular" format="tabular" label="${blast_type} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output"> | |
433 <filter>fmt_opt['out_format'] in ("tabular","cols")</filter> | |
434 </data> | |
435 <data name="output_xml" format="blastxml" label="${blast_type} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output"> | |
436 <filter>fmt_opt['out_format'] == "blastxml"</filter> | |
437 </data> | |
438 <data name="output_txt" format="txt" label="${blast_type} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output"> | |
439 <filter>fmt_opt['out_format'] == "text" and not fmt_opt['html']</filter> | |
440 </data> | |
441 <data name="output_html" format="html" label="${blast_type} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output"> | |
442 <filter>fmt_opt['out_format'] == "text" and fmt_opt['html']</filter> | |
443 </data> | |
444 </outputs> | |
445 <tests> | |
446 <test> | |
447 <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> | |
448 <param name="db_opts_selector" value="file" /> | |
449 <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" /> | |
450 <param name="database" value="" /> | |
451 <param name="evalue_cutoff" value="1e-8" /> | |
452 <param name="blast_type" value="blastp" /> | |
453 <param name="out_format" value="blastxml" /> | |
454 <param name="outfmt" value="5" /> | |
455 <param name="adv_opts_selector" value="advanced" /> | |
456 <param name="filter_query" value="False" /> | |
457 <param name="matrix" value="BLOSUM62" /> | |
458 <param name="max_target_seqs" value="" /> | |
459 <param name="word_size" value="" /> | |
460 <param name="parse_deflines" value="False" /> | |
461 <output name="output_xml"> | |
462 <assert_contents> | |
463 <has_text text="sp|Q9BS26|ERP44_HUMAN"/> | |
464 </assert_contents> | |
465 </output> | |
466 </test> | |
467 <test> | |
468 <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> | |
469 <param name="db_opts_selector" value="file" /> | |
470 <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" /> | |
471 <param name="database" value="" /> | |
472 <param name="evalue_cutoff" value="1e-8" /> | |
473 <param name="blast_type" value="blastp" /> | |
474 <param name="out_format" value="tabular" /> | |
475 <param name="outfmt" value="6" /> | |
476 <param name="adv_opts_selector" value="advanced" /> | |
477 <param name="filter_query" value="False" /> | |
478 <param name="matrix" value="BLOSUM62" /> | |
479 <param name="max_target_seqs" value="" /> | |
480 <param name="word_size" value="" /> | |
481 <param name="parse_deflines" value="False" /> | |
482 <output name="output_tabular"> | |
483 <assert_contents> | |
484 <has_text_matching expression="sp.P08100.OPSD_HUMAN\tgi.283855846.gb.ADB45242.1.\t\d+.\d+\t\d+\t\d+\t0\t\d+\t\d+\t\d+\t\d+\t\S+\t\s*\d+" /> | |
485 <has_text text="BAB21486.1"/> | |
486 </assert_contents> | |
487 </output> | |
488 </test> | |
489 <test> | |
490 <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> | |
491 <param name="db_opts_selector" value="file" /> | |
492 <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" /> | |
493 <param name="database" value="" /> | |
494 <param name="evalue_cutoff" value="1e-8" /> | |
495 <param name="blast_type" value="blastp" /> | |
496 <param name="out_format" value="tabular" /> | |
497 <param name="outfmt" value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" /> | |
498 <param name="adv_opts_selector" value="advanced" /> | |
499 <param name="filter_query" value="False" /> | |
500 <param name="matrix" value="BLOSUM62" /> | |
501 <param name="max_target_seqs" value="" /> | |
502 <param name="word_size" value="" /> | |
503 <param name="parse_deflines" value="False" /> | |
504 <output name="output_tabular"> | |
505 <assert_contents> | |
506 <has_text_matching expression="sp.P08100.OPSD_HUMAN\tgi.283855846.gb.ADB45242.1.\t\d+.\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\S+\t\s*\d+\t\S+\t\d+\t\d+\t\d+\t\d+\t\S+\t\d+\t\d+\t\S+\t\S+\t\d+\t\d+" /> | |
507 </assert_contents> | |
508 </output> | |
509 </test> | |
510 <test> | |
511 <param name="query" value="rhodopsin_proteins.fasta" ftype="fasta" /> | |
512 <param name="db_opts_selector" value="file" /> | |
513 <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> | |
514 <param name="database" value="" /> | |
515 <param name="evalue_cutoff" value="1e-8" /> | |
516 <param name="blast_type" value="blastp" /> | |
517 <param name="out_format" value="tabular" /> | |
518 <param name="outfmt" value="6" /> | |
519 <param name="adv_opts_selector" value="basic" /> | |
520 <output name="output_tabular"> | |
521 <assert_contents> | |
522 <has_text_matching expression="gi.283855846.gb.ADB45242.1.\tsp.P08100.OPSD_HUMAN\t\d+.\d+\t\d+\t\d+\t0\t\d+\t\d+\t\d+\t\d+\t\S+\t\s*\d+" /> | |
523 <has_text text="BAB21486.1"/> | |
524 </assert_contents> | |
525 </output> | |
526 </test> | |
527 </tests> | |
528 <help> | |
529 | |
530 .. class:: warningmark | |
531 | |
532 **Note**. Database searches may take a substantial amount of time. | |
533 For large input datasets it is advisable to allow overnight processing. | |
534 | |
535 ----- | |
536 | |
537 **What it does** | |
538 | |
539 Search a *protein database* using a *protein query*, | |
540 using the NCBI BLAST+ blastp command line tool. | |
541 | |
542 The search can be performed using a local database, against a sequence supplied in a fasta file, | |
543 or the blast can be performed remotely at NCBI. | |
544 | |
545 The remote operation allows searches to be targeted at specific organisms. | |
546 | |
547 ----- | |
548 | |
549 **Output format** | |
550 | |
551 Because Galaxy focuses on processing tabular data, the default output of this | |
552 tool is tabular. The standard BLAST+ tabular output contains 12 columns: | |
553 | |
554 ====== ========= ============================================ | |
555 Column NCBI name Description | |
556 ------ --------- -------------------------------------------- | |
557 1 qseqid Query Seq-id (ID of your sequence) | |
558 2 sseqid Subject Seq-id (ID of the database hit) | |
559 3 pident Percentage of identical matches | |
560 4 length Alignment length | |
561 5 mismatch Number of mismatches | |
562 6 gapopen Number of gap openings | |
563 7 qstart Start of alignment in query | |
564 8 qend End of alignment in query | |
565 9 sstart Start of alignment in subject (database hit) | |
566 10 send End of alignment in subject (database hit) | |
567 11 evalue Expectation value (E-value) | |
568 12 bitscore Bit score | |
569 ====== ========= ============================================ | |
570 | |
571 The BLAST+ tools can optionally output additional columns of information, | |
572 but this takes longer to calculate. Most (but not all) of these columns are | |
573 included by selecting the extended tabular output. The extra columns are | |
574 included *after* the standard 12 columns. This is so that you can write | |
575 workflow filtering steps that accept either the 12 or 24 column tabular | |
576 BLAST output. | |
577 | |
578 ====== ============= =========================================== | |
579 Column NCBI name Description | |
580 ------ ------------- ------------------------------------------- | |
581 13 sallseqid All subject Seq-id(s), separated by a ';' | |
582 14 score Raw score | |
583 15 nident Number of identical matches | |
584 16 positive Number of positive-scoring matches | |
585 17 gaps Total number of gaps | |
586 18 ppos Percentage of positive-scoring matches | |
587 19 qframe Query frame | |
588 20 sframe Subject frame | |
589 21 qseq Aligned part of query sequence | |
590 22 sseq Aligned part of subject sequence | |
591 23 qlen Query sequence length | |
592 24 slen Subject sequence length | |
593 25 salltitles All subject title(s), separated by a '<>' | |
594 ====== ============= =========================================== | |
595 | |
596 The third option is BLAST XML output, which is designed to be parsed by | |
597 another program, and is understood by some Galaxy tools. | |
598 | |
599 You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). | |
600 The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. | |
601 The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. | |
602 The two query anchored outputs show a multiple sequence alignment between the query and all the matches, | |
603 and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). | |
604 | |
605 ------- | |
606 | |
607 **References** | |
608 | |
609 Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402. | |
610 | |
611 Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005. | |
612 | |
613 </help> | |
614 <citations> | |
615 <citation type="doi">10.1186/1471-2105-10-421</citation> | |
616 <citation type="doi">10.1186/s13742-015-0080-7</citation> | |
617 </citations> | |
618 </tool> |