comparison blast_reporting.xml @ 0:7db7ecc78ad6 draft

Uploaded
author damion
date Mon, 02 Mar 2015 20:46:00 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:7db7ecc78ad6
1 <tool id="blast_reporting" name="BLAST Reporting" version="1.0.5">
2 <description>BLAST search results reporting tool</description>
3 <command interpreter="python"><![CDATA[
4 blast_reporting.py
5 $blastxml_file
6 $out_format
7 $tabular_file
8 $html_file
9 $selection_file:$selection_file.hid:$selection_file.dataset_id:$selection_file.id
10 $html_template
11 -f "
12 #for $my_repeat in $filter_num
13 $my_repeat.filter_column:
14 #for $my_repeat2 in $my_repeat.constraint
15 $my_repeat2.filter_comparison $my_repeat2.filter_value,
16 #end for
17 ;
18 #end for
19 #for $my_repeat in $filter_text
20 $my_repeat.filter_column2:
21 #for $my_repeat2 in $my_repeat.constraint2
22 $my_repeat2.filter_comparison2 $my_repeat2.filter_value2.replace(',','|'),
23 #end for
24 ;
25 #end for
26 "
27 #if len($bins)
28 -b "
29 #for $my_repeat in $bins
30 $my_repeat.bin_column:$my_repeat.group:$my_repeat.filter:$my_repeat.description;
31 #end for
32 "
33 #end if
34 #if $drop_redundant_hits
35 -r
36 #end if
37 #if $column_labels
38 -l "${column_labels}"
39 #end if
40 #if len($fields)
41 -c "
42 #for $my_repeat in $fields
43 $my_repeat.field:$my_repeat.group:$my_repeat.sort:$my_repeat.label;
44 #end for
45 "
46 #end if
47 #if not str($row_limit) == "None"
48 -n "${row_limit}"
49 #end if
50 ###if $library_datasets
51 ## -B "
52 ## #for $i, $dataset in enumerate($library_datasets)
53 ## #$dataset.get_file_name()
54 ## $dataset.id
55 ## #end for
56 ## "
57 ###end if
58 ]]></command>
59
60 <!-- target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/seq/${path}</target -->
61
62 <inputs>
63 <param name="blastxml_file" type="data" format="blastxml" label="BLAST results as XML"/>
64
65 <repeat name="filter_num" title="Numeric Filter" min="0" max="4">
66 <!-- TRIED conditional here, but it does not allow for <repeat> children. -->
67
68 <param name="filter_column" type="select" label="Col">
69 <options from_data_table="blast_reporting_fields">
70 <filter type="static_value" value="numeric" column="type" />
71 <filter type="sort_by" column="name"/>
72 </options>
73 </param>
74
75 <repeat name="constraint" title="Constraint" min="1" max="3">
76 <param name="filter_comparison" type="select" label="Comparison">
77 <option value="gte">&gt;= </option>
78 <option value="gt">&gt;</option>
79 <option value="lt">&lt; </option>
80 <option value="lte">&lt;= </option>
81 <option value="==">equal to </option>
82 <option value="!=">not equal to </option>
83 </param>
84
85 <param name="filter_value" type="text" value="" label="Value">
86 <validator type="regex" message="Please input a number">([0-9]+|[0-9]*\.[0-9]+)</validator>
87 </param>
88
89 </repeat>
90 </repeat>
91
92 <repeat name="filter_text" title="Text Filter" min="0" max="3">
93 <param name="filter_column2" type="select" label="Column">
94 <options from_data_table="blast_reporting_fields">
95 <filter type="sort_by" column="name"/>
96 <filter type="static_value" value="text" column="type" />
97 </options>
98 </param>
99 <repeat name="constraint2" title="Constraint" min="1" max="6">
100 <param name="filter_comparison2" type="select" label="Comparison">
101 <option value="includes">has text</option>
102 <option value="excludes">excludes text</option>
103 </param>
104 <param name="filter_value2" type="text" size="25" label="Phrase(s), comma separated" value=""/>
105 </repeat>
106 </repeat>
107
108 <param name="drop_redundant_hits" type="boolean" checked="true" label="Throw out redundant hits" help="Keep only the best hit when query matches multiple locales in a subject sequence." />
109
110 <param name="row_limit" type="integer" label="Row limit (per query)" help="Limit each query's results to this many rows. 0=unlimited." value="0" />
111
112 <param name="out_format" type="select" label="Basic Report Field Output" help="Use the field selectors below to add or customize fields that end up in the output HTML or tabular report. By default results are presented by query, with table data sorted by score, descending. Enter a preferred label in the text field to override the default field labeling.">
113 <option value="std" selected="True">Standard 12 columns</option>
114 <option value="std+seqs">Standard 12 columns + sequences</option>
115 <option value="ext">Extended 24 columns</option>
116 <option value="ext+">Extended 26 columns</option>
117 <option value="custom">Only field selections below</option>
118 </param>
119
120 <repeat name="fields" title="Field" min="0">
121 <!-- acc, descr, score, p_cov, p_ident, -->
122 <param name="field" type="select" label="Include">
123 <options from_data_table="blast_reporting_fields">
124 <filter type="sort_by" column="name"/>
125 <filter type="static_value" value="1" column="choose" />
126 </options>
127 </param>
128 <param name="group" type="select" label="Group">
129 <option value="column" selected="true">A column</option>
130 <option value="hidden">A hidden column</option>
131 <option value="table">A table section</option>
132 <option value="section">A report section</option>
133 </param>
134 <param name="sort" type="select" label="Sort">
135 <option value="" selected="true">no sorting</option>
136 <option value="desc">descend</option>
137 <option value="asc">ascend</option>
138 </param>
139
140 <param name="label" type="text" label="Customize label for this column or section" size="15" value=""/>
141 </repeat>
142
143 <!-- Disabled until Galaxy bug fixed for this.
144 <param name="library_datasets" type="library_data" label="Reference Bin file" help="Select one reference bin at a time from the popup window's Reference Bin library."/>
145 -->
146
147 <repeat name="bins" title="Reference Bin" min="0" max="5">
148
149 <param name="bin_column" type="select" label="Sort by database bin match">
150 <options from_data_table="fasta_reference_dbs"></options>
151 </param>
152 <param name="group" type="select" label="Grouping">
153 <option value="column" selected="true">A column</option>
154 <option value="hidden">A hidden column</option>
155 <option value="table">A table section</option>
156 </param>
157 <param name="filter" type="select" label="Filtering">
158 <option value="" selected="true">No filtering </option>
159 <option value="include">keep only matches </option>
160 <option value="exclude">Exclude matches </option>
161 </param>
162 <param name="description" type="boolean" checked="true" label="Show Description" help="Show description" />
163 </repeat>
164
165 <param name="column_labels" type="select" label="Tabular Report Column Labels" help="">
166 <option value="label" selected="True">Short name</option>
167 <option value="field">Field name</option>
168 <option value="">No labels</option>
169 </param>
170
171 <param name="html_template" type="select" optional="true" label="HTML Report template">
172 <options from_data_table="blast_reporting_templates"/>
173 </param>
174
175 </inputs>
176 <outputs>
177
178 <data format="tabular" name="tabular_file" label="Tabular report for data $blastxml_file.hid" />
179 <data format="html" name="html_file" label="HTML report for data $blastxml_file.hid">
180 <filter>html_template != ""</filter>
181 </data>
182 <data format="tabular" name="selection_file" label="Sequence Selection List for data $blastxml_file.hid">
183 <filter>out_format != "std"</filter>
184 </data>
185 </outputs>
186
187 <tests>
188 <test><!-- Test taken from original BLAST to xml tool -->
189 <param name="blastxml_file" value="blastx_sample.xml"/>
190 <output name="tabular_file" file="blastx_sample_converted.tabular"/>
191 <param name="out_format" value="std"/>
192 <param name="column_labels" value="" />
193 <param name="drop_redundant_hits" value="False"/>
194 </test>
195
196 <test>
197 <param name="blastxml_file" value="blast_reporting_1.blastxml"/>
198 <output name="tabular_file" file="blast_reporting_1a.tabular"/>
199 <param name="out_format" value="std"/>
200 <param name="column_labels" value="" />
201 <param name="drop_redundant_hits" value="True"/>
202 </test>
203
204 <test>
205 <param name="blastxml_file" value="blast_reporting_1.blastxml"/>
206 <output name="tabular_file" file="blast_reporting_1b.tabular"/>
207 <param name="out_format" value="std"/>
208 <param name="column_labels" value="" />
209 <param name="drop_redundant_hits" value="True"/>
210
211 <param name="filter_num_0|filter_column" value="pident"/>
212 <param name="filter_num_0|constraint_0|filter_comparison" value="gte"/>
213 <param name="filter_num_0|constraint_0|filter_value" value="97"/>
214
215 </test>
216
217 <test>
218 <param name="blastxml_file" value="blast_reporting_1.blastxml"/>
219 <output name="tabular_file" file="blast_reporting_1c.tabular"/>
220 <output name="selection_file" file="blast_reporting_1c1.tabular"/>
221
222 <param name="out_format" value="ext+"/>
223 <param name="column_labels" value="" />
224 <param name="drop_redundant_hits" value="True"/>
225
226 <param name="filter_num_0|filter_column" value="pident"/>
227 <param name="filter_num_0|constraint_0|filter_comparison" value="gte"/>
228 <param name="filter_num_0|constraint_0|filter_value" value="97"/>
229
230 <param name="fields_0|field" value="pident"/>
231 <param name="fields_0|group" value="column"/>
232 <param name="fields_0|sort" value="asc"/>
233
234 <param name="column_labels" value="label"/>
235
236 </test>
237
238 </tests>
239
240 <help><![CDATA[
241
242 .. class:: infomark
243
244 **What it does**
245
246 NCBI BLAST+ searches can output in a range of formats, but in the past only
247 the XML format included fields like sequence description.
248 This tool converts the BLAST XML report into 12, 24, 26 or custom column tabular
249 and HTML reports. This tool is loosely based on the ''BLAST XML to tabular'' tool
250 available in main toolshed. For the default 12 and 24 column reports, it should
251 produce the same output although whitespace differences may exist.
252
253 ====== ============= ============================================
254 Column NCBI name Description
255 ====== ============= ============================================
256 1 qseqid Query Seq-id (ID of your sequence)
257 2 sseqid Subject Seq-id (ID of the database hit)
258 3 pident Percentage of identical matches
259 4 length Alignment length
260 5 mismatch Number of mismatches
261 6 gapopen Number of gap openings
262 7 qstart Start of alignment in query
263 8 qend End of alignment in query
264 9 sstart Start of alignment in subject (database hit)
265 10 send End of alignment in subject (database hit)
266 11 evalue Expectation value (E-value)
267 12 bitscore Bit score
268 .
269 13 sallseqid All subject Seq-id(s), separated by a ';'
270 14 score Raw score
271 15 nident Number of identical matches
272 16 positive Number of positive-scoring matches
273 17 gaps Total number of gaps
274 18 ppos Percentage of positive-scoring matches
275 19 qframe Query frame
276 20 sframe Subject frame
277 21 qseq Aligned part of query sequence
278 22 sseq Aligned part of subject sequence
279 23 qlen Query sequence length
280 24 slen Subject sequence length
281 .
282 25 pcov Percentage coverage
283 26 sallseqdescr All subject Seq-descr(s), separated by a ','
284 ====== ============= ============================================
285
286 An option also exists to select particular columns for the output
287 report, and to cross-reference each result with one or more reference bins.l
288 A command line version can be used. Type blast_reporting.py -h for help.
289
290 **python blast_reporting.py in_file out_file out_format [options]**
291
292 .. class:: warningmark
293
294 As noted in the original BLAST XML to tabular tool, ''Be aware that the XML file (and thus the conversion) and the tabular output direct from BLAST+ may differ in the presence of XXXX masking on regions low complexity (columns 21 and 22), and thus also calculated figures like the percentage identity (column 3) and gap openings.''
295
296 **References**
297
298 If using this tool for publishing results, you may need to cite its origin in the BLAST XML to tabular tool:
299
300 Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
301 Galaxy tools and workflows for sequence analysis with applications
302 in molecular plant pathology. PeerJ 1:e167
303 http://dx.doi.org/10.7717/peerj.167
304
305 ]]></help>
306 <citations>
307 <citation type="doi">10.7717/peerj.167</citation>
308 </citations>
309 </tool>