comparison blastxml_to_tabular_selectable.xml @ 0:2bd0cbccb3c6

Uploaded
author galaxyp
date Wed, 08 Oct 2014 19:38:28 -0400
parents
children 5da5dcc5e13a
comparison
equal deleted inserted replaced
-1:000000000000 0:2bd0cbccb3c6
1 <tool id="blastxml_to_tabular_selectable" name="BLAST XML to selected tabular columns" version="0.0.9">
2 <description>Convert BLAST XML output to tabular</description>
3 <command interpreter="python">
4 blastxml_to_tabular_selectable.py -o $tabular_file
5 #if $output.out_format == 'cols' and $output.columns:
6 -c '$output.columns'
7 #else
8 -c '$output.out_format'
9 #end if
10 $qdef
11 $allqueries
12 #if $unmatched:
13 -u $unmatched_file
14 #end if
15 #if $maxhits.__str__ != '':
16 --maxhits $maxhits
17 #end if
18 #if $maxhsps.__str__ != '':
19 --maxhsps $maxhsps
20 #end if
21 #for i in $blastxml_file#${i} #end for#
22 ## $blastxml_file
23 </command>
24 <inputs>
25 <param name="blastxml_file" type="data" format="blastxml" multiple="true" label="BLAST results as XML"/>
26 <param name="qdef" type="boolean" truevalue="-d" falsevalue="" checked="False" label="Use Iteration_query-def value for qseqid"/>
27 <param name="allqueries" type="boolean" truevalue="-a" falsevalue="" checked="False" label="Output all queries including those with no hits"/>
28 <param name="unmatched" type="boolean" truevalue="-u" falsevalue="" checked="False" label="Output a list with queries having no hits"/>
29 <param name="maxhits" type="integer" value="1" optional="true" label="Maximum number of Hits to display for a query">
30 <validator type="in_range" min="1" />
31 </param>
32 <param name="maxhsps" type="integer" value="1" optional="true" label="Maximum number of HSPs to display for a Hit">
33 <validator type="in_range" min="1" />
34 </param>
35
36 <conditional name="output">
37 <param name="out_format" type="select" label="Output format">
38 <option value="std" selected="True">Tabular (standard 12 columns)</option>
39 <option value="ext">Tabular (extended 24 columns)</option>
40 <option value="cols">Tabular (select columns to output)</option>
41 </param>
42 <when value="std"/>
43 <when value="ext"/>
44 <when value="cols">
45 <param name="columns" type="select" multiple="true" display="checkboxes" label="Output columns">
46 <option value="qseqid"> 1 qseqid Query Seq-id (ID of your sequence)</option>
47 <option value="sseqid"> 2 sseqid Subject Seq-id (ID of the database hit)</option>
48 <option value="pident"> 3 pident Percentage of identical matches</option>
49 <option value="length"> 4 length Alignment length</option>
50 <option value="mismatch"> 5 mismatch Number of mismatches</option>
51 <option value="gapopen"> 6 gapopen Number of gap openings</option>
52 <option value="qstart"> 7 qstart Start of alignment in query</option>
53 <option value="qend"> 8 qend End of alignment in query</option>
54 <option value="sstart"> 9 sstart Start of alignment in subject (database hit)</option>
55 <option value="send">10 send End of alignment in subject (database hit)</option>
56 <option value="evalue">11 evalue Expectation value (E-value)</option>
57 <option value="bitscore">12 bitscore Bit score</option>
58 <option value="sallseqid">13 sallseqid All subject Seq-id(s), separated by a ';'</option>
59 <option value="score">14 score Raw score</option>
60 <option value="nident">15 nident Number of identical matches</option>
61 <option value="positive">16 positive Number of positive-scoring matches</option>
62 <option value="gaps">17 gaps Total number of gaps</option>
63 <option value="ppos">18 ppos Percentage of positive-scoring matches</option>
64 <option value="qframe">19 qframe Query frame</option>
65 <option value="sframe">20 sframe Subject frame</option>
66 <option value="qseq">21 qseq Aligned part of query sequence</option>
67 <option value="sseq">22 sseq Aligned part of subject sequence</option>
68 <option value="qlen">23 qlen Query sequence length</option>
69 <option value="slen">24 slen Subject sequence length</option>
70 <option value="salltitles">25 salltitles = All subject title(s), separated by a '&lt;&gt;'</option>
71 </param>
72 </when>
73 </conditional>
74 </inputs>
75 <outputs>
76 <data name="tabular_file" format="tabular" label="BLAST results as tabular for ${on_string}" />
77 <data name="unmatched_file" format="tabular" label="Query sequences with no hits for ${on_string}">
78 <filter>unmatched == True</filter>
79 </data>
80 </outputs>
81 <requirements>
82 </requirements>
83 <tests>
84 <test>
85 <param name="blastxml_file" value="blastp_rhodopsin_proteins.xml" ftype="blastxml" />
86 <param name="out_format" value="std" />
87 <output name="tabular_file" file="blastp_rhodopsin_proteins_std.tabular" ftype="tabular" />
88 </test>
89 <test>
90 <param name="blastxml_file" value="blastp_rhodopsin_proteins.xml" ftype="blastxml" />
91 <param name="out_format" value="ext" />
92 <output name="tabular_file" file="blastp_rhodopsin_proteins_ext.tabular" ftype="tabular" />
93 </test>
94 <test>
95 <param name="blastxml_file" value="blastp_rhodopsin_proteins.xml" ftype="blastxml" />
96 <param name="out_format" value="cols" />
97 <param name="columns" value="qseqid,sseqid,length,bitscore" />
98 <output name="tabular_file" file="blastp_rhodopsin_proteins_selcol.tabular" ftype="tabular" />
99 </test>
100 <test>
101 <param name="blastxml_file" value="blastp_rhodopsin_proteins.xml" ftype="blastxml" />
102 <param name="out_format" value="ext" />
103 <param name="maxhits" value="10" />
104 <param name="maxhsps" value="10" />
105 <output name="tabular_file" file="blastp_rhodopsin_proteins_ext_allhits.tabular" ftype="tabular" />
106 </test>
107 <test>
108 <param name="blastxml_file" value="blastp_rhodopsin_proteins.xml" ftype="blastxml" />
109 <param name="out_format" value="ext" />
110 <param name="maxhits" value="1" />
111 <param name="maxhsps" value="1" />
112 <param name="unmatched" value="True" />
113 <param name="allqueries" value="True" />
114 <output name="tabular_file" file="blastp_rhodopsin_proteins_ext_allqueries.tabular" ftype="tabular" />
115 <output name="unmatched_file" file="unmatched_queries.tabular" ftype="tabular" />
116 </test>
117 </tests>
118 <help>
119
120 **What it does**
121
122 NCBI BLAST+ (and the older NCBI 'legacy' BLAST) can output in a range of
123 formats including tabular and a more detailed XML format. A complex workflow
124 may need both the XML and the tabular output - but running BLAST twice is
125 slow and wasteful.
126
127 This tool takes the BLAST XML output and by default converts it into the
128 standard 12 column tabular equivalent:
129
130 ====== ========= ============================================
131 Column NCBI name Description
132 ------ --------- --------------------------------------------
133 1 qseqid Query Seq-id (ID of your sequence)
134 2 sseqid Subject Seq-id (ID of the database hit)
135 3 pident Percentage of identical matches
136 4 length Alignment length
137 5 mismatch Number of mismatches
138 6 gapopen Number of gap openings
139 7 qstart Start of alignment in query
140 8 qend End of alignment in query
141 9 sstart Start of alignment in subject (database hit)
142 10 send End of alignment in subject (database hit)
143 11 evalue Expectation value (E-value)
144 12 bitscore Bit score
145 ====== ========= ============================================
146
147 The BLAST+ tools can optionally output additional columns of information,
148 but this takes longer to calculate. Most (but not all) of these columns are
149 included by selecting the extended tabular output. The extra columns are
150 included *after* the standard 12 columns. This is so that you can write
151 workflow filtering steps that accept either the 12 or 22 column tabular
152 BLAST output.
153
154 ====== ============= ===========================================
155 Column NCBI name Description
156 ------ ------------- -------------------------------------------
157 13 sallseqid All subject Seq-id(s), separated by a ';'
158 14 score Raw score
159 15 nident Number of identical matches
160 16 positive Number of positive-scoring matches
161 17 gaps Total number of gaps
162 18 ppos Percentage of positive-scoring matches
163 19 qframe Query frame
164 20 sframe Subject frame
165 21 qseq Aligned part of query sequence
166 22 sseq Aligned part of subject sequence
167 23 qlen Query sequence length
168 24 slen Subject sequence length
169 25 salltitles All subject title(s), separated by a '&lt;&gt;'
170 ====== ============= ===========================================
171
172 Beware that the XML file (and thus the conversion) and the tabular output
173 direct from BLAST+ may differ in the presence of XXXX masking on regions
174 low complexity (columns 21 and 22), and thus also calculated figures like
175 the percentage idenity (column 3).
176
177 </help>
178 </tool>