Mercurial > repos > devteam > ncbi_blast_plus
comparison blastxml_to_tabular.xml @ 0:d375502056f1 draft
Uploaded
author | devteam |
---|---|
date | Fri, 17 Aug 2012 09:11:43 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d375502056f1 |
---|---|
1 <tool id="blastxml_to_tabular" name="BLAST XML to tabular" version="0.0.8"> | |
2 <description>Convert BLAST XML output to tabular</description> | |
3 <command interpreter="python"> | |
4 blastxml_to_tabular.py $blastxml_file $tabular_file $out_format | |
5 </command> | |
6 <inputs> | |
7 <param name="blastxml_file" type="data" format="blastxml" label="BLAST results as XML"/> | |
8 <param name="out_format" type="select" label="Output format"> | |
9 <option value="std" selected="True">Tabular (standard 12 columns)</option> | |
10 <option value="ext">Tabular (extended 24 columns)</option> | |
11 </param> | |
12 </inputs> | |
13 <outputs> | |
14 <data name="tabular_file" format="tabular" label="BLAST results as tabular" /> | |
15 </outputs> | |
16 <requirements> | |
17 </requirements> | |
18 <tests> | |
19 <test> | |
20 <param name="blastxml_file" value="blastp_four_human_vs_rhodopsin.xml" ftype="blastxml" /> | |
21 <param name="out_format" value="std" /> | |
22 <!-- Note this has some white space differences from the actual blastp output blast_four_human_vs_rhodopsin.tabluar --> | |
23 <output name="tabular_file" file="blastp_four_human_vs_rhodopsin_converted.tabular" ftype="tabular" /> | |
24 </test> | |
25 <test> | |
26 <param name="blastxml_file" value="blastp_four_human_vs_rhodopsin.xml" ftype="blastxml" /> | |
27 <param name="out_format" value="ext" /> | |
28 <!-- Note this has some white space differences from the actual blastp output blast_four_human_vs_rhodopsin_22c.tabluar --> | |
29 <output name="tabular_file" file="blastp_four_human_vs_rhodopsin_converted_ext.tabular" ftype="tabular" /> | |
30 </test> | |
31 <test> | |
32 <param name="blastxml_file" value="blastp_sample.xml" ftype="blastxml" /> | |
33 <param name="out_format" value="std" /> | |
34 <!-- Note this has some white space differences from the actual blastp output --> | |
35 <output name="tabular_file" file="blastp_sample_converted.tabular" ftype="tabular" /> | |
36 </test> | |
37 <test> | |
38 <param name="blastxml_file" value="blastx_rhodopsin_vs_four_human.xml" ftype="blastxml" /> | |
39 <param name="out_format" value="std" /> | |
40 <!-- Note this has some white space differences from the actual blastx output --> | |
41 <output name="tabular_file" file="blastx_rhodopsin_vs_four_human_converted.tabular" ftype="tabular" /> | |
42 </test> | |
43 <test> | |
44 <param name="blastxml_file" value="blastx_rhodopsin_vs_four_human.xml" ftype="blastxml" /> | |
45 <param name="out_format" value="ext" /> | |
46 <!-- Note this has some white space and XXXX masking differences from the actual blastx output --> | |
47 <output name="tabular_file" file="blastx_rhodopsin_vs_four_human_converted_ext.tabular" ftype="tabular" /> | |
48 </test> | |
49 <test> | |
50 <param name="blastxml_file" value="blastx_sample.xml" ftype="blastxml" /> | |
51 <param name="out_format" value="std" /> | |
52 <!-- Note this has some white space differences from the actual blastx output --> | |
53 <output name="tabular_file" file="blastx_sample_converted.tabular" ftype="tabular" /> | |
54 </test> | |
55 <test> | |
56 <param name="blastxml_file" value="blastp_human_vs_pdb_seg_no.xml" ftype="blastxml" /> | |
57 <param name="out_format" value="std" /> | |
58 <!-- Note this has some white space differences from the actual blastp output --> | |
59 <output name="tabular_file" file="blastp_human_vs_pdb_seg_no_converted_std.tabular" ftype="tabular" /> | |
60 </test> | |
61 <test> | |
62 <param name="blastxml_file" value="blastp_human_vs_pdb_seg_no.xml" ftype="blastxml" /> | |
63 <param name="out_format" value="ext" /> | |
64 <!-- Note this has some white space differences from the actual blastp output --> | |
65 <output name="tabular_file" file="blastp_human_vs_pdb_seg_no_converted_ext.tabular" ftype="tabular" /> | |
66 </test> | |
67 </tests> | |
68 <help> | |
69 | |
70 **What it does** | |
71 | |
72 NCBI BLAST+ (and the older NCBI 'legacy' BLAST) can output in a range of | |
73 formats including tabular and a more detailed XML format. A complex workflow | |
74 may need both the XML and the tabular output - but running BLAST twice is | |
75 slow and wasteful. | |
76 | |
77 This tool takes the BLAST XML output and by default converts it into the | |
78 standard 12 column tabular equivalent: | |
79 | |
80 ====== ========= ============================================ | |
81 Column NCBI name Description | |
82 ------ --------- -------------------------------------------- | |
83 1 qseqid Query Seq-id (ID of your sequence) | |
84 2 sseqid Subject Seq-id (ID of the database hit) | |
85 3 pident Percentage of identical matches | |
86 4 length Alignment length | |
87 5 mismatch Number of mismatches | |
88 6 gapopen Number of gap openings | |
89 7 qstart Start of alignment in query | |
90 8 qend End of alignment in query | |
91 9 sstart Start of alignment in subject (database hit) | |
92 10 send End of alignment in subject (database hit) | |
93 11 evalue Expectation value (E-value) | |
94 12 bitscore Bit score | |
95 ====== ========= ============================================ | |
96 | |
97 The BLAST+ tools can optionally output additional columns of information, | |
98 but this takes longer to calculate. Most (but not all) of these columns are | |
99 included by selecting the extended tabular output. The extra columns are | |
100 included *after* the standard 12 columns. This is so that you can write | |
101 workflow filtering steps that accept either the 12 or 22 column tabular | |
102 BLAST output. | |
103 | |
104 ====== ============= =========================================== | |
105 Column NCBI name Description | |
106 ------ ------------- ------------------------------------------- | |
107 13 sallseqid All subject Seq-id(s), separated by a ';' | |
108 14 score Raw score | |
109 15 nident Number of identical matches | |
110 16 positive Number of positive-scoring matches | |
111 17 gaps Total number of gaps | |
112 18 ppos Percentage of positive-scoring matches | |
113 19 qframe Query frame | |
114 20 sframe Subject frame | |
115 21 qseq Aligned part of query sequence | |
116 22 sseq Aligned part of subject sequence | |
117 23 qlen Query sequence length | |
118 24 slen Subject sequence length | |
119 ====== ============= =========================================== | |
120 | |
121 Beware that the XML file (and thus the conversion) and the tabular output | |
122 direct from BLAST+ may differ in the presence of XXXX masking on regions | |
123 low complexity (columns 21 and 22), and thus also calculated figures like | |
124 the percentage idenity (column 3). | |
125 | |
126 </help> | |
127 </tool> |