0
|
1 <tool id="blastxml_to_tabular" name="BLAST XML to tabular" version="0.0.8">
|
|
2 <description>Convert BLAST XML output to tabular</description>
|
|
3 <command interpreter="python">
|
|
4 blastxml_to_tabular.py $blastxml_file $tabular_file $out_format
|
|
5 </command>
|
|
6 <inputs>
|
|
7 <param name="blastxml_file" type="data" format="blastxml" label="BLAST results as XML"/>
|
|
8 <param name="out_format" type="select" label="Output format">
|
|
9 <option value="std" selected="True">Tabular (standard 12 columns)</option>
|
|
10 <option value="ext">Tabular (extended 24 columns)</option>
|
|
11 </param>
|
|
12 </inputs>
|
|
13 <outputs>
|
|
14 <data name="tabular_file" format="tabular" label="BLAST results as tabular" />
|
|
15 </outputs>
|
|
16 <requirements>
|
|
17 </requirements>
|
|
18 <tests>
|
|
19 <test>
|
|
20 <param name="blastxml_file" value="blastp_four_human_vs_rhodopsin.xml" ftype="blastxml" />
|
|
21 <param name="out_format" value="std" />
|
|
22 <!-- Note this has some white space differences from the actual blastp output blast_four_human_vs_rhodopsin.tabluar -->
|
|
23 <output name="tabular_file" file="blastp_four_human_vs_rhodopsin_converted.tabular" ftype="tabular" />
|
|
24 </test>
|
|
25 <test>
|
|
26 <param name="blastxml_file" value="blastp_four_human_vs_rhodopsin.xml" ftype="blastxml" />
|
|
27 <param name="out_format" value="ext" />
|
|
28 <!-- Note this has some white space differences from the actual blastp output blast_four_human_vs_rhodopsin_22c.tabluar -->
|
|
29 <output name="tabular_file" file="blastp_four_human_vs_rhodopsin_converted_ext.tabular" ftype="tabular" />
|
|
30 </test>
|
|
31 <test>
|
|
32 <param name="blastxml_file" value="blastp_sample.xml" ftype="blastxml" />
|
|
33 <param name="out_format" value="std" />
|
|
34 <!-- Note this has some white space differences from the actual blastp output -->
|
|
35 <output name="tabular_file" file="blastp_sample_converted.tabular" ftype="tabular" />
|
|
36 </test>
|
|
37 <test>
|
|
38 <param name="blastxml_file" value="blastx_rhodopsin_vs_four_human.xml" ftype="blastxml" />
|
|
39 <param name="out_format" value="std" />
|
|
40 <!-- Note this has some white space differences from the actual blastx output -->
|
|
41 <output name="tabular_file" file="blastx_rhodopsin_vs_four_human_converted.tabular" ftype="tabular" />
|
|
42 </test>
|
|
43 <test>
|
|
44 <param name="blastxml_file" value="blastx_rhodopsin_vs_four_human.xml" ftype="blastxml" />
|
|
45 <param name="out_format" value="ext" />
|
|
46 <!-- Note this has some white space and XXXX masking differences from the actual blastx output -->
|
|
47 <output name="tabular_file" file="blastx_rhodopsin_vs_four_human_converted_ext.tabular" ftype="tabular" />
|
|
48 </test>
|
|
49 <test>
|
|
50 <param name="blastxml_file" value="blastx_sample.xml" ftype="blastxml" />
|
|
51 <param name="out_format" value="std" />
|
|
52 <!-- Note this has some white space differences from the actual blastx output -->
|
|
53 <output name="tabular_file" file="blastx_sample_converted.tabular" ftype="tabular" />
|
|
54 </test>
|
|
55 <test>
|
|
56 <param name="blastxml_file" value="blastp_human_vs_pdb_seg_no.xml" ftype="blastxml" />
|
|
57 <param name="out_format" value="std" />
|
|
58 <!-- Note this has some white space differences from the actual blastp output -->
|
|
59 <output name="tabular_file" file="blastp_human_vs_pdb_seg_no_converted_std.tabular" ftype="tabular" />
|
|
60 </test>
|
|
61 <test>
|
|
62 <param name="blastxml_file" value="blastp_human_vs_pdb_seg_no.xml" ftype="blastxml" />
|
|
63 <param name="out_format" value="ext" />
|
|
64 <!-- Note this has some white space differences from the actual blastp output -->
|
|
65 <output name="tabular_file" file="blastp_human_vs_pdb_seg_no_converted_ext.tabular" ftype="tabular" />
|
|
66 </test>
|
|
67 </tests>
|
|
68 <help>
|
|
69
|
|
70 **What it does**
|
|
71
|
|
72 NCBI BLAST+ (and the older NCBI 'legacy' BLAST) can output in a range of
|
|
73 formats including tabular and a more detailed XML format. A complex workflow
|
|
74 may need both the XML and the tabular output - but running BLAST twice is
|
|
75 slow and wasteful.
|
|
76
|
|
77 This tool takes the BLAST XML output and by default converts it into the
|
|
78 standard 12 column tabular equivalent:
|
|
79
|
|
80 ====== ========= ============================================
|
|
81 Column NCBI name Description
|
|
82 ------ --------- --------------------------------------------
|
|
83 1 qseqid Query Seq-id (ID of your sequence)
|
|
84 2 sseqid Subject Seq-id (ID of the database hit)
|
|
85 3 pident Percentage of identical matches
|
|
86 4 length Alignment length
|
|
87 5 mismatch Number of mismatches
|
|
88 6 gapopen Number of gap openings
|
|
89 7 qstart Start of alignment in query
|
|
90 8 qend End of alignment in query
|
|
91 9 sstart Start of alignment in subject (database hit)
|
|
92 10 send End of alignment in subject (database hit)
|
|
93 11 evalue Expectation value (E-value)
|
|
94 12 bitscore Bit score
|
|
95 ====== ========= ============================================
|
|
96
|
|
97 The BLAST+ tools can optionally output additional columns of information,
|
|
98 but this takes longer to calculate. Most (but not all) of these columns are
|
|
99 included by selecting the extended tabular output. The extra columns are
|
|
100 included *after* the standard 12 columns. This is so that you can write
|
|
101 workflow filtering steps that accept either the 12 or 22 column tabular
|
|
102 BLAST output.
|
|
103
|
|
104 ====== ============= ===========================================
|
|
105 Column NCBI name Description
|
|
106 ------ ------------- -------------------------------------------
|
|
107 13 sallseqid All subject Seq-id(s), separated by a ';'
|
|
108 14 score Raw score
|
|
109 15 nident Number of identical matches
|
|
110 16 positive Number of positive-scoring matches
|
|
111 17 gaps Total number of gaps
|
|
112 18 ppos Percentage of positive-scoring matches
|
|
113 19 qframe Query frame
|
|
114 20 sframe Subject frame
|
|
115 21 qseq Aligned part of query sequence
|
|
116 22 sseq Aligned part of subject sequence
|
|
117 23 qlen Query sequence length
|
|
118 24 slen Subject sequence length
|
|
119 ====== ============= ===========================================
|
|
120
|
|
121 Beware that the XML file (and thus the conversion) and the tabular output
|
|
122 direct from BLAST+ may differ in the presence of XXXX masking on regions
|
|
123 low complexity (columns 21 and 22), and thus also calculated figures like
|
|
124 the percentage idenity (column 3).
|
|
125
|
|
126 </help>
|
|
127 </tool>
|