annotate blastxml_to_tabular_selectable.xml @ 1:5da5dcc5e13a default tip

Update from 24 to 25 columns and update versions to 1.0.0
author Jim Johnson <jj@umn.edu>
date Wed, 08 Oct 2014 18:57:39 -0500
parents 2bd0cbccb3c6
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
5da5dcc5e13a Update from 24 to 25 columns and update versions to 1.0.0
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
1 <tool id="blastxml_to_tabular_selectable" name="BLAST XML to selected tabular columns" version="1.0.0">
0
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
2 <description>Convert BLAST XML output to tabular</description>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
3 <command interpreter="python">
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
4 blastxml_to_tabular_selectable.py -o $tabular_file
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
5 #if $output.out_format == 'cols' and $output.columns:
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
6 -c '$output.columns'
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
7 #else
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
8 -c '$output.out_format'
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
9 #end if
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
10 $qdef
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
11 $allqueries
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
12 #if $unmatched:
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
13 -u $unmatched_file
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
14 #end if
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
15 #if $maxhits.__str__ != '':
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
16 --maxhits $maxhits
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
17 #end if
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
18 #if $maxhsps.__str__ != '':
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
19 --maxhsps $maxhsps
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
20 #end if
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
21 #for i in $blastxml_file#${i} #end for#
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
22 ## $blastxml_file
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
23 </command>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
24 <inputs>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
25 <param name="blastxml_file" type="data" format="blastxml" multiple="true" label="BLAST results as XML"/>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
26 <param name="qdef" type="boolean" truevalue="-d" falsevalue="" checked="False" label="Use Iteration_query-def value for qseqid"/>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
27 <param name="allqueries" type="boolean" truevalue="-a" falsevalue="" checked="False" label="Output all queries including those with no hits"/>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
28 <param name="unmatched" type="boolean" truevalue="-u" falsevalue="" checked="False" label="Output a list with queries having no hits"/>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
29 <param name="maxhits" type="integer" value="1" optional="true" label="Maximum number of Hits to display for a query">
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
30 <validator type="in_range" min="1" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
31 </param>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
32 <param name="maxhsps" type="integer" value="1" optional="true" label="Maximum number of HSPs to display for a Hit">
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
33 <validator type="in_range" min="1" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
34 </param>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
35
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
36 <conditional name="output">
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
37 <param name="out_format" type="select" label="Output format">
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
38 <option value="std" selected="True">Tabular (standard 12 columns)</option>
1
5da5dcc5e13a Update from 24 to 25 columns and update versions to 1.0.0
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
39 <option value="ext">Tabular (extended 25 columns)</option>
0
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
40 <option value="cols">Tabular (select columns to output)</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
41 </param>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
42 <when value="std"/>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
43 <when value="ext"/>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
44 <when value="cols">
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
45 <param name="columns" type="select" multiple="true" display="checkboxes" label="Output columns">
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
46 <option value="qseqid"> 1 qseqid Query Seq-id (ID of your sequence)</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
47 <option value="sseqid"> 2 sseqid Subject Seq-id (ID of the database hit)</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
48 <option value="pident"> 3 pident Percentage of identical matches</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
49 <option value="length"> 4 length Alignment length</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
50 <option value="mismatch"> 5 mismatch Number of mismatches</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
51 <option value="gapopen"> 6 gapopen Number of gap openings</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
52 <option value="qstart"> 7 qstart Start of alignment in query</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
53 <option value="qend"> 8 qend End of alignment in query</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
54 <option value="sstart"> 9 sstart Start of alignment in subject (database hit)</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
55 <option value="send">10 send End of alignment in subject (database hit)</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
56 <option value="evalue">11 evalue Expectation value (E-value)</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
57 <option value="bitscore">12 bitscore Bit score</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
58 <option value="sallseqid">13 sallseqid All subject Seq-id(s), separated by a ';'</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
59 <option value="score">14 score Raw score</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
60 <option value="nident">15 nident Number of identical matches</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
61 <option value="positive">16 positive Number of positive-scoring matches</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
62 <option value="gaps">17 gaps Total number of gaps</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
63 <option value="ppos">18 ppos Percentage of positive-scoring matches</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
64 <option value="qframe">19 qframe Query frame</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
65 <option value="sframe">20 sframe Subject frame</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
66 <option value="qseq">21 qseq Aligned part of query sequence</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
67 <option value="sseq">22 sseq Aligned part of subject sequence</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
68 <option value="qlen">23 qlen Query sequence length</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
69 <option value="slen">24 slen Subject sequence length</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
70 <option value="salltitles">25 salltitles = All subject title(s), separated by a '&lt;&gt;'</option>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
71 </param>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
72 </when>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
73 </conditional>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
74 </inputs>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
75 <outputs>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
76 <data name="tabular_file" format="tabular" label="BLAST results as tabular for ${on_string}" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
77 <data name="unmatched_file" format="tabular" label="Query sequences with no hits for ${on_string}">
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
78 <filter>unmatched == True</filter>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
79 </data>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
80 </outputs>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
81 <requirements>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
82 </requirements>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
83 <tests>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
84 <test>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
85 <param name="blastxml_file" value="blastp_rhodopsin_proteins.xml" ftype="blastxml" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
86 <param name="out_format" value="std" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
87 <output name="tabular_file" file="blastp_rhodopsin_proteins_std.tabular" ftype="tabular" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
88 </test>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
89 <test>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
90 <param name="blastxml_file" value="blastp_rhodopsin_proteins.xml" ftype="blastxml" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
91 <param name="out_format" value="ext" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
92 <output name="tabular_file" file="blastp_rhodopsin_proteins_ext.tabular" ftype="tabular" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
93 </test>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
94 <test>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
95 <param name="blastxml_file" value="blastp_rhodopsin_proteins.xml" ftype="blastxml" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
96 <param name="out_format" value="cols" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
97 <param name="columns" value="qseqid,sseqid,length,bitscore" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
98 <output name="tabular_file" file="blastp_rhodopsin_proteins_selcol.tabular" ftype="tabular" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
99 </test>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
100 <test>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
101 <param name="blastxml_file" value="blastp_rhodopsin_proteins.xml" ftype="blastxml" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
102 <param name="out_format" value="ext" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
103 <param name="maxhits" value="10" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
104 <param name="maxhsps" value="10" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
105 <output name="tabular_file" file="blastp_rhodopsin_proteins_ext_allhits.tabular" ftype="tabular" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
106 </test>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
107 <test>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
108 <param name="blastxml_file" value="blastp_rhodopsin_proteins.xml" ftype="blastxml" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
109 <param name="out_format" value="ext" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
110 <param name="maxhits" value="1" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
111 <param name="maxhsps" value="1" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
112 <param name="unmatched" value="True" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
113 <param name="allqueries" value="True" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
114 <output name="tabular_file" file="blastp_rhodopsin_proteins_ext_allqueries.tabular" ftype="tabular" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
115 <output name="unmatched_file" file="unmatched_queries.tabular" ftype="tabular" />
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
116 </test>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
117 </tests>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
118 <help>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
119
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
120 **What it does**
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
121
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
122 NCBI BLAST+ (and the older NCBI 'legacy' BLAST) can output in a range of
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
123 formats including tabular and a more detailed XML format. A complex workflow
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
124 may need both the XML and the tabular output - but running BLAST twice is
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
125 slow and wasteful.
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
126
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
127 This tool takes the BLAST XML output and by default converts it into the
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
128 standard 12 column tabular equivalent:
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
129
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
130 ====== ========= ============================================
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
131 Column NCBI name Description
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
132 ------ --------- --------------------------------------------
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
133 1 qseqid Query Seq-id (ID of your sequence)
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
134 2 sseqid Subject Seq-id (ID of the database hit)
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
135 3 pident Percentage of identical matches
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
136 4 length Alignment length
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
137 5 mismatch Number of mismatches
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
138 6 gapopen Number of gap openings
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
139 7 qstart Start of alignment in query
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
140 8 qend End of alignment in query
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
141 9 sstart Start of alignment in subject (database hit)
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
142 10 send End of alignment in subject (database hit)
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
143 11 evalue Expectation value (E-value)
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
144 12 bitscore Bit score
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
145 ====== ========= ============================================
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
146
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
147 The BLAST+ tools can optionally output additional columns of information,
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
148 but this takes longer to calculate. Most (but not all) of these columns are
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
149 included by selecting the extended tabular output. The extra columns are
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
150 included *after* the standard 12 columns. This is so that you can write
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
151 workflow filtering steps that accept either the 12 or 22 column tabular
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
152 BLAST output.
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
153
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
154 ====== ============= ===========================================
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
155 Column NCBI name Description
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
156 ------ ------------- -------------------------------------------
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
157 13 sallseqid All subject Seq-id(s), separated by a ';'
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
158 14 score Raw score
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
159 15 nident Number of identical matches
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
160 16 positive Number of positive-scoring matches
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
161 17 gaps Total number of gaps
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
162 18 ppos Percentage of positive-scoring matches
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
163 19 qframe Query frame
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
164 20 sframe Subject frame
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
165 21 qseq Aligned part of query sequence
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
166 22 sseq Aligned part of subject sequence
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
167 23 qlen Query sequence length
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
168 24 slen Subject sequence length
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
169 25 salltitles All subject title(s), separated by a '&lt;&gt;'
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
170 ====== ============= ===========================================
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
171
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
172 Beware that the XML file (and thus the conversion) and the tabular output
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
173 direct from BLAST+ may differ in the presence of XXXX masking on regions
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
174 low complexity (columns 21 and 22), and thus also calculated figures like
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
175 the percentage idenity (column 3).
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
176
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
177 </help>
2bd0cbccb3c6 Uploaded
galaxyp
parents:
diff changeset
178 </tool>