Mercurial > repos > devteam > ncbi_blast_plus
comparison ncbi_tblastx_wrapper.xml @ 2:ab1a8640f817 draft
Uploaded v0.0.12 again, without extra path
author | peterjc |
---|---|
date | Thu, 23 Aug 2012 07:32:06 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:27d7e1deada4 | 2:ab1a8640f817 |
---|---|
1 <tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.12"> | |
2 <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description> | |
3 <!-- If job splitting is enabled, break up the query file into parts --> | |
4 <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism> | |
5 <version_command>tblastx -version</version_command> | |
6 <command interpreter="python">hide_stderr.py | |
7 ## The command is a Cheetah template which allows some Python based syntax. | |
8 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces | |
9 tblastx | |
10 -query "$query" | |
11 #if $db_opts.db_opts_selector == "db": | |
12 -db "${db_opts.database.fields.path}" | |
13 #else: | |
14 -subject "$db_opts.subject" | |
15 #end if | |
16 -query_gencode $query_gencode | |
17 -evalue $evalue_cutoff | |
18 -out $output1 | |
19 ##Set the extended list here so if/when we add things, saved workflows are not affected | |
20 #if str($out_format)=="ext": | |
21 -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" | |
22 #else: | |
23 -outfmt $out_format | |
24 #end if | |
25 -num_threads 8 | |
26 #if $adv_opts.adv_opts_selector=="advanced": | |
27 -db_gencode $adv_opts.db_gencode | |
28 $adv_opts.filter_query | |
29 $adv_opts.strand | |
30 -matrix $adv_opts.matrix | |
31 ## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string | |
32 ## Note -max_target_seqs overrides -num_descriptions and -num_alignments | |
33 #if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0): | |
34 -max_target_seqs $adv_opts.max_hits | |
35 #end if | |
36 #if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0): | |
37 -word_size $adv_opts.word_size | |
38 #end if | |
39 $adv_opts.parse_deflines | |
40 ## End of advanced options: | |
41 #end if | |
42 </command> | |
43 <inputs> | |
44 <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> | |
45 <conditional name="db_opts"> | |
46 <param name="db_opts_selector" type="select" label="Subject database/sequences"> | |
47 <option value="db" selected="True">BLAST Database</option> | |
48 <option value="file">FASTA file (pairwise e-values)</option> | |
49 </param> | |
50 <when value="db"> | |
51 <param name="database" type="select" label="Nucleotide BLAST database"> | |
52 <options from_file="blastdb.loc"> | |
53 <column name="value" index="0"/> | |
54 <column name="name" index="1"/> | |
55 <column name="path" index="2"/> | |
56 </options> | |
57 </param> | |
58 <param name="subject" type="hidden" value="" /> | |
59 </when> | |
60 <when value="file"> | |
61 <param name="database" type="hidden" value="" /> | |
62 <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> | |
63 </when> | |
64 </conditional> | |
65 <param name="query_gencode" type="select" label="Query genetic code"> | |
66 <!-- See http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for details --> | |
67 <option value="1" select="True">1. Standard</option> | |
68 <option value="2">2. Vertebrate Mitochondrial</option> | |
69 <option value="3">3. Yeast Mitochondrial</option> | |
70 <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> | |
71 <option value="5">5. Invertebrate Mitochondrial</option> | |
72 <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option> | |
73 <option value="9">9. Echinoderm Mitochondrial</option> | |
74 <option value="10">10. Euplotid Nuclear</option> | |
75 <option value="11">11. Bacteria and Archaea</option> | |
76 <option value="12">12. Alternative Yeast Nuclear</option> | |
77 <option value="13">13. Ascidian Mitochondrial</option> | |
78 <option value="14">14. Flatworm Mitochondrial</option> | |
79 <option value="15">15. Blepharisma Macronuclear</option> | |
80 <option value="16">16. Chlorophycean Mitochondrial Code</option> | |
81 <option value="21">21. Trematode Mitochondrial Code</option> | |
82 <option value="22">22. Scenedesmus obliquus mitochondrial Code</option> | |
83 <option value="23">23. Thraustochytrium Mitochondrial Code</option> | |
84 <option value="24">24. Pterobranchia mitochondrial code</option> | |
85 </param> | |
86 <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> | |
87 <param name="out_format" type="select" label="Output format"> | |
88 <option value="6" selected="True">Tabular (standard 12 columns)</option> | |
89 <option value="ext">Tabular (extended 24 columns)</option> | |
90 <option value="5">BLAST XML</option> | |
91 <option value="0">Pairwise text</option> | |
92 <option value="0 -html">Pairwise HTML</option> | |
93 <option value="2">Query-anchored text</option> | |
94 <option value="2 -html">Query-anchored HTML</option> | |
95 <option value="4">Flat query-anchored text</option> | |
96 <option value="4 -html">Flat query-anchored HTML</option> | |
97 <!-- | |
98 <option value="-outfmt 11">BLAST archive format (ASN.1)</option> | |
99 --> | |
100 </param> | |
101 <conditional name="adv_opts"> | |
102 <param name="adv_opts_selector" type="select" label="Advanced Options"> | |
103 <option value="basic" selected="True">Hide Advanced Options</option> | |
104 <option value="advanced">Show Advanced Options</option> | |
105 </param> | |
106 <when value="basic" /> | |
107 <when value="advanced"> | |
108 <param name="db_gencode" type="select" label="Database/subject genetic code"> | |
109 <!-- See http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for details --> | |
110 <option value="1" select="True">1. Standard</option> | |
111 <option value="2">2. Vertebrate Mitochondrial</option> | |
112 <option value="3">3. Yeast Mitochondrial</option> | |
113 <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> | |
114 <option value="5">5. Invertebrate Mitochondrial</option> | |
115 <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option> | |
116 <option value="9">9. Echinoderm Mitochondrial</option> | |
117 <option value="10">10. Euplotid Nuclear</option> | |
118 <option value="11">11. Bacteria and Archaea</option> | |
119 <option value="12">12. Alternative Yeast Nuclear</option> | |
120 <option value="13">13. Ascidian Mitochondrial</option> | |
121 <option value="14">14. Flatworm Mitochondrial</option> | |
122 <option value="15">15. Blepharisma Macronuclear</option> | |
123 <option value="16">16. Chlorophycean Mitochondrial Code</option> | |
124 <option value="21">21. Trematode Mitochondrial Code</option> | |
125 <option value="22">22. Scenedesmus obliquus mitochondrial Code</option> | |
126 <option value="23">23. Thraustochytrium Mitochondrial Code</option> | |
127 <option value="24">24. Pterobranchia mitochondrial code</option> | |
128 </param> | |
129 <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' --> | |
130 <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" /> | |
131 <param name="strand" type="select" label="Query strand(s) to search against database/subject"> | |
132 <option value="-strand both">Both</option> | |
133 <option value="-strand plus">Plus (forward)</option> | |
134 <option value="-strand minus">Minus (reverse complement)</option> | |
135 </param> | |
136 <param name="matrix" type="select" label="Scoring matrix"> | |
137 <option value="BLOSUM90">BLOSUM90</option> | |
138 <option value="BLOSUM80">BLOSUM80</option> | |
139 <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option> | |
140 <option value="BLOSUM50">BLOSUM50</option> | |
141 <option value="BLOSUM45">BLOSUM45</option> | |
142 <option value="PAM250">PAM250</option> | |
143 <option value="PAM70">PAM70</option> | |
144 <option value="PAM30">PAM30</option> | |
145 </param> | |
146 <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer --> | |
147 <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits"> | |
148 <validator type="in_range" min="0" /> | |
149 </param> | |
150 <!-- I'd like word_size to be optional, with minimum 2 for tblastx --> | |
151 <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2."> | |
152 <validator type="in_range" min="0" /> | |
153 </param> | |
154 <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/> | |
155 </when> | |
156 </conditional> | |
157 </inputs> | |
158 <outputs> | |
159 <data name="output1" format="tabular" label="tblastx on ${db_opts.db_opts_selector}"> | |
160 <change_format> | |
161 <when input="out_format" value="0" format="txt"/> | |
162 <when input="out_format" value="0 -html" format="html"/> | |
163 <when input="out_format" value="2" format="txt"/> | |
164 <when input="out_format" value="2 -html" format="html"/> | |
165 <when input="out_format" value="4" format="txt"/> | |
166 <when input="out_format" value="4 -html" format="html"/> | |
167 <when input="out_format" value="5" format="blastxml"/> | |
168 </change_format> | |
169 </data> | |
170 </outputs> | |
171 <requirements> | |
172 <requirement type="binary">tblastx</requirement> | |
173 </requirements> | |
174 <help> | |
175 | |
176 .. class:: warningmark | |
177 | |
178 **Note**. Database searches may take a substantial amount of time. | |
179 For large input datasets it is advisable to allow overnight processing. | |
180 | |
181 ----- | |
182 | |
183 **What it does** | |
184 | |
185 Search a *translated nucleotide database* using a *protein query*, | |
186 using the NCBI BLAST+ tblastx command line tool. | |
187 | |
188 ----- | |
189 | |
190 **Output format** | |
191 | |
192 Because Galaxy focuses on processing tabular data, the default output of this | |
193 tool is tabular. The standard BLAST+ tabular output contains 12 columns: | |
194 | |
195 ====== ========= ============================================ | |
196 Column NCBI name Description | |
197 ------ --------- -------------------------------------------- | |
198 1 qseqid Query Seq-id (ID of your sequence) | |
199 2 sseqid Subject Seq-id (ID of the database hit) | |
200 3 pident Percentage of identical matches | |
201 4 length Alignment length | |
202 5 mismatch Number of mismatches | |
203 6 gapopen Number of gap openings | |
204 7 qstart Start of alignment in query | |
205 8 qend End of alignment in query | |
206 9 sstart Start of alignment in subject (database hit) | |
207 10 send End of alignment in subject (database hit) | |
208 11 evalue Expectation value (E-value) | |
209 12 bitscore Bit score | |
210 ====== ========= ============================================ | |
211 | |
212 The BLAST+ tools can optionally output additional columns of information, | |
213 but this takes longer to calculate. Most (but not all) of these columns are | |
214 included by selecting the extended tabular output. The extra columns are | |
215 included *after* the standard 12 columns. This is so that you can write | |
216 workflow filtering steps that accept either the 12 or 24 column tabular | |
217 BLAST output. | |
218 | |
219 ====== ============= =========================================== | |
220 Column NCBI name Description | |
221 ------ ------------- ------------------------------------------- | |
222 13 sallseqid All subject Seq-id(s), separated by a ';' | |
223 14 score Raw score | |
224 15 nident Number of identical matches | |
225 16 positive Number of positive-scoring matches | |
226 17 gaps Total number of gaps | |
227 18 ppos Percentage of positive-scoring matches | |
228 19 qframe Query frame | |
229 20 sframe Subject frame | |
230 21 qseq Aligned part of query sequence | |
231 22 sseq Aligned part of subject sequence | |
232 23 qlen Query sequence length | |
233 24 slen Subject sequence length | |
234 ====== ============= =========================================== | |
235 | |
236 The third option is BLAST XML output, which is designed to be parsed by | |
237 another program, and is understood by some Galaxy tools. | |
238 | |
239 You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). | |
240 The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. | |
241 The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. | |
242 The two query anchored outputs show a multiple sequence alignment between the query and all the matches, | |
243 and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). | |
244 | |
245 ------- | |
246 | |
247 **References** | |
248 | |
249 Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402. | |
250 | |
251 </help> | |
252 </tool> |