Mercurial > repos > galaxyp > blastxml_to_tabular_selectable
comparison blastxml_to_tabular_selectable.xml @ 0:2bd0cbccb3c6
Uploaded
author | galaxyp |
---|---|
date | Wed, 08 Oct 2014 19:38:28 -0400 |
parents | |
children | 5da5dcc5e13a |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bd0cbccb3c6 |
---|---|
1 <tool id="blastxml_to_tabular_selectable" name="BLAST XML to selected tabular columns" version="0.0.9"> | |
2 <description>Convert BLAST XML output to tabular</description> | |
3 <command interpreter="python"> | |
4 blastxml_to_tabular_selectable.py -o $tabular_file | |
5 #if $output.out_format == 'cols' and $output.columns: | |
6 -c '$output.columns' | |
7 #else | |
8 -c '$output.out_format' | |
9 #end if | |
10 $qdef | |
11 $allqueries | |
12 #if $unmatched: | |
13 -u $unmatched_file | |
14 #end if | |
15 #if $maxhits.__str__ != '': | |
16 --maxhits $maxhits | |
17 #end if | |
18 #if $maxhsps.__str__ != '': | |
19 --maxhsps $maxhsps | |
20 #end if | |
21 #for i in $blastxml_file#${i} #end for# | |
22 ## $blastxml_file | |
23 </command> | |
24 <inputs> | |
25 <param name="blastxml_file" type="data" format="blastxml" multiple="true" label="BLAST results as XML"/> | |
26 <param name="qdef" type="boolean" truevalue="-d" falsevalue="" checked="False" label="Use Iteration_query-def value for qseqid"/> | |
27 <param name="allqueries" type="boolean" truevalue="-a" falsevalue="" checked="False" label="Output all queries including those with no hits"/> | |
28 <param name="unmatched" type="boolean" truevalue="-u" falsevalue="" checked="False" label="Output a list with queries having no hits"/> | |
29 <param name="maxhits" type="integer" value="1" optional="true" label="Maximum number of Hits to display for a query"> | |
30 <validator type="in_range" min="1" /> | |
31 </param> | |
32 <param name="maxhsps" type="integer" value="1" optional="true" label="Maximum number of HSPs to display for a Hit"> | |
33 <validator type="in_range" min="1" /> | |
34 </param> | |
35 | |
36 <conditional name="output"> | |
37 <param name="out_format" type="select" label="Output format"> | |
38 <option value="std" selected="True">Tabular (standard 12 columns)</option> | |
39 <option value="ext">Tabular (extended 24 columns)</option> | |
40 <option value="cols">Tabular (select columns to output)</option> | |
41 </param> | |
42 <when value="std"/> | |
43 <when value="ext"/> | |
44 <when value="cols"> | |
45 <param name="columns" type="select" multiple="true" display="checkboxes" label="Output columns"> | |
46 <option value="qseqid"> 1 qseqid Query Seq-id (ID of your sequence)</option> | |
47 <option value="sseqid"> 2 sseqid Subject Seq-id (ID of the database hit)</option> | |
48 <option value="pident"> 3 pident Percentage of identical matches</option> | |
49 <option value="length"> 4 length Alignment length</option> | |
50 <option value="mismatch"> 5 mismatch Number of mismatches</option> | |
51 <option value="gapopen"> 6 gapopen Number of gap openings</option> | |
52 <option value="qstart"> 7 qstart Start of alignment in query</option> | |
53 <option value="qend"> 8 qend End of alignment in query</option> | |
54 <option value="sstart"> 9 sstart Start of alignment in subject (database hit)</option> | |
55 <option value="send">10 send End of alignment in subject (database hit)</option> | |
56 <option value="evalue">11 evalue Expectation value (E-value)</option> | |
57 <option value="bitscore">12 bitscore Bit score</option> | |
58 <option value="sallseqid">13 sallseqid All subject Seq-id(s), separated by a ';'</option> | |
59 <option value="score">14 score Raw score</option> | |
60 <option value="nident">15 nident Number of identical matches</option> | |
61 <option value="positive">16 positive Number of positive-scoring matches</option> | |
62 <option value="gaps">17 gaps Total number of gaps</option> | |
63 <option value="ppos">18 ppos Percentage of positive-scoring matches</option> | |
64 <option value="qframe">19 qframe Query frame</option> | |
65 <option value="sframe">20 sframe Subject frame</option> | |
66 <option value="qseq">21 qseq Aligned part of query sequence</option> | |
67 <option value="sseq">22 sseq Aligned part of subject sequence</option> | |
68 <option value="qlen">23 qlen Query sequence length</option> | |
69 <option value="slen">24 slen Subject sequence length</option> | |
70 <option value="salltitles">25 salltitles = All subject title(s), separated by a '<>'</option> | |
71 </param> | |
72 </when> | |
73 </conditional> | |
74 </inputs> | |
75 <outputs> | |
76 <data name="tabular_file" format="tabular" label="BLAST results as tabular for ${on_string}" /> | |
77 <data name="unmatched_file" format="tabular" label="Query sequences with no hits for ${on_string}"> | |
78 <filter>unmatched == True</filter> | |
79 </data> | |
80 </outputs> | |
81 <requirements> | |
82 </requirements> | |
83 <tests> | |
84 <test> | |
85 <param name="blastxml_file" value="blastp_rhodopsin_proteins.xml" ftype="blastxml" /> | |
86 <param name="out_format" value="std" /> | |
87 <output name="tabular_file" file="blastp_rhodopsin_proteins_std.tabular" ftype="tabular" /> | |
88 </test> | |
89 <test> | |
90 <param name="blastxml_file" value="blastp_rhodopsin_proteins.xml" ftype="blastxml" /> | |
91 <param name="out_format" value="ext" /> | |
92 <output name="tabular_file" file="blastp_rhodopsin_proteins_ext.tabular" ftype="tabular" /> | |
93 </test> | |
94 <test> | |
95 <param name="blastxml_file" value="blastp_rhodopsin_proteins.xml" ftype="blastxml" /> | |
96 <param name="out_format" value="cols" /> | |
97 <param name="columns" value="qseqid,sseqid,length,bitscore" /> | |
98 <output name="tabular_file" file="blastp_rhodopsin_proteins_selcol.tabular" ftype="tabular" /> | |
99 </test> | |
100 <test> | |
101 <param name="blastxml_file" value="blastp_rhodopsin_proteins.xml" ftype="blastxml" /> | |
102 <param name="out_format" value="ext" /> | |
103 <param name="maxhits" value="10" /> | |
104 <param name="maxhsps" value="10" /> | |
105 <output name="tabular_file" file="blastp_rhodopsin_proteins_ext_allhits.tabular" ftype="tabular" /> | |
106 </test> | |
107 <test> | |
108 <param name="blastxml_file" value="blastp_rhodopsin_proteins.xml" ftype="blastxml" /> | |
109 <param name="out_format" value="ext" /> | |
110 <param name="maxhits" value="1" /> | |
111 <param name="maxhsps" value="1" /> | |
112 <param name="unmatched" value="True" /> | |
113 <param name="allqueries" value="True" /> | |
114 <output name="tabular_file" file="blastp_rhodopsin_proteins_ext_allqueries.tabular" ftype="tabular" /> | |
115 <output name="unmatched_file" file="unmatched_queries.tabular" ftype="tabular" /> | |
116 </test> | |
117 </tests> | |
118 <help> | |
119 | |
120 **What it does** | |
121 | |
122 NCBI BLAST+ (and the older NCBI 'legacy' BLAST) can output in a range of | |
123 formats including tabular and a more detailed XML format. A complex workflow | |
124 may need both the XML and the tabular output - but running BLAST twice is | |
125 slow and wasteful. | |
126 | |
127 This tool takes the BLAST XML output and by default converts it into the | |
128 standard 12 column tabular equivalent: | |
129 | |
130 ====== ========= ============================================ | |
131 Column NCBI name Description | |
132 ------ --------- -------------------------------------------- | |
133 1 qseqid Query Seq-id (ID of your sequence) | |
134 2 sseqid Subject Seq-id (ID of the database hit) | |
135 3 pident Percentage of identical matches | |
136 4 length Alignment length | |
137 5 mismatch Number of mismatches | |
138 6 gapopen Number of gap openings | |
139 7 qstart Start of alignment in query | |
140 8 qend End of alignment in query | |
141 9 sstart Start of alignment in subject (database hit) | |
142 10 send End of alignment in subject (database hit) | |
143 11 evalue Expectation value (E-value) | |
144 12 bitscore Bit score | |
145 ====== ========= ============================================ | |
146 | |
147 The BLAST+ tools can optionally output additional columns of information, | |
148 but this takes longer to calculate. Most (but not all) of these columns are | |
149 included by selecting the extended tabular output. The extra columns are | |
150 included *after* the standard 12 columns. This is so that you can write | |
151 workflow filtering steps that accept either the 12 or 22 column tabular | |
152 BLAST output. | |
153 | |
154 ====== ============= =========================================== | |
155 Column NCBI name Description | |
156 ------ ------------- ------------------------------------------- | |
157 13 sallseqid All subject Seq-id(s), separated by a ';' | |
158 14 score Raw score | |
159 15 nident Number of identical matches | |
160 16 positive Number of positive-scoring matches | |
161 17 gaps Total number of gaps | |
162 18 ppos Percentage of positive-scoring matches | |
163 19 qframe Query frame | |
164 20 sframe Subject frame | |
165 21 qseq Aligned part of query sequence | |
166 22 sseq Aligned part of subject sequence | |
167 23 qlen Query sequence length | |
168 24 slen Subject sequence length | |
169 25 salltitles All subject title(s), separated by a '<>' | |
170 ====== ============= =========================================== | |
171 | |
172 Beware that the XML file (and thus the conversion) and the tabular output | |
173 direct from BLAST+ may differ in the presence of XXXX masking on regions | |
174 low complexity (columns 21 and 22), and thus also calculated figures like | |
175 the percentage idenity (column 3). | |
176 | |
177 </help> | |
178 </tool> |