Mercurial > repos > damion > blast_reporting
diff blast_reporting.xml @ 0:7db7ecc78ad6 draft
Uploaded
author | damion |
---|---|
date | Mon, 02 Mar 2015 20:46:00 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blast_reporting.xml Mon Mar 02 20:46:00 2015 -0500 @@ -0,0 +1,309 @@ +<tool id="blast_reporting" name="BLAST Reporting" version="1.0.5"> + <description>BLAST search results reporting tool</description> + <command interpreter="python"><![CDATA[ + blast_reporting.py + $blastxml_file + $out_format + $tabular_file + $html_file + $selection_file:$selection_file.hid:$selection_file.dataset_id:$selection_file.id + $html_template + -f " + #for $my_repeat in $filter_num + $my_repeat.filter_column: + #for $my_repeat2 in $my_repeat.constraint + $my_repeat2.filter_comparison $my_repeat2.filter_value, + #end for + ; + #end for + #for $my_repeat in $filter_text + $my_repeat.filter_column2: + #for $my_repeat2 in $my_repeat.constraint2 + $my_repeat2.filter_comparison2 $my_repeat2.filter_value2.replace(',','|'), + #end for + ; + #end for + " + #if len($bins) + -b " + #for $my_repeat in $bins + $my_repeat.bin_column:$my_repeat.group:$my_repeat.filter:$my_repeat.description; + #end for + " + #end if + #if $drop_redundant_hits + -r + #end if + #if $column_labels + -l "${column_labels}" + #end if + #if len($fields) + -c " + #for $my_repeat in $fields + $my_repeat.field:$my_repeat.group:$my_repeat.sort:$my_repeat.label; + #end for + " + #end if + #if not str($row_limit) == "None" + -n "${row_limit}" + #end if + ###if $library_datasets + ## -B " + ## #for $i, $dataset in enumerate($library_datasets) + ## #$dataset.get_file_name() + ## $dataset.id + ## #end for + ## " + ###end if + ]]></command> + +<!-- target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/seq/${path}</target --> + + <inputs> + <param name="blastxml_file" type="data" format="blastxml" label="BLAST results as XML"/> + + <repeat name="filter_num" title="Numeric Filter" min="0" max="4"> + <!-- TRIED conditional here, but it does not allow for <repeat> children. --> + + <param name="filter_column" type="select" label="Col"> + <options from_data_table="blast_reporting_fields"> + <filter type="static_value" value="numeric" column="type" /> + <filter type="sort_by" column="name"/> + </options> + </param> + + <repeat name="constraint" title="Constraint" min="1" max="3"> + <param name="filter_comparison" type="select" label="Comparison"> + <option value="gte">>= </option> + <option value="gt">></option> + <option value="lt">< </option> + <option value="lte"><= </option> + <option value="==">equal to </option> + <option value="!=">not equal to </option> + </param> + + <param name="filter_value" type="text" value="" label="Value"> + <validator type="regex" message="Please input a number">([0-9]+|[0-9]*\.[0-9]+)</validator> + </param> + + </repeat> + </repeat> + + <repeat name="filter_text" title="Text Filter" min="0" max="3"> + <param name="filter_column2" type="select" label="Column"> + <options from_data_table="blast_reporting_fields"> + <filter type="sort_by" column="name"/> + <filter type="static_value" value="text" column="type" /> + </options> + </param> + <repeat name="constraint2" title="Constraint" min="1" max="6"> + <param name="filter_comparison2" type="select" label="Comparison"> + <option value="includes">has text</option> + <option value="excludes">excludes text</option> + </param> + <param name="filter_value2" type="text" size="25" label="Phrase(s), comma separated" value=""/> + </repeat> + </repeat> + + <param name="drop_redundant_hits" type="boolean" checked="true" label="Throw out redundant hits" help="Keep only the best hit when query matches multiple locales in a subject sequence." /> + + <param name="row_limit" type="integer" label="Row limit (per query)" help="Limit each query's results to this many rows. 0=unlimited." value="0" /> + + <param name="out_format" type="select" label="Basic Report Field Output" help="Use the field selectors below to add or customize fields that end up in the output HTML or tabular report. By default results are presented by query, with table data sorted by score, descending. Enter a preferred label in the text field to override the default field labeling."> + <option value="std" selected="True">Standard 12 columns</option> + <option value="std+seqs">Standard 12 columns + sequences</option> + <option value="ext">Extended 24 columns</option> + <option value="ext+">Extended 26 columns</option> + <option value="custom">Only field selections below</option> + </param> + + <repeat name="fields" title="Field" min="0"> + <!-- acc, descr, score, p_cov, p_ident, --> + <param name="field" type="select" label="Include"> + <options from_data_table="blast_reporting_fields"> + <filter type="sort_by" column="name"/> + <filter type="static_value" value="1" column="choose" /> + </options> + </param> + <param name="group" type="select" label="Group"> + <option value="column" selected="true">A column</option> + <option value="hidden">A hidden column</option> + <option value="table">A table section</option> + <option value="section">A report section</option> + </param> + <param name="sort" type="select" label="Sort"> + <option value="" selected="true">no sorting</option> + <option value="desc">descend</option> + <option value="asc">ascend</option> + </param> + + <param name="label" type="text" label="Customize label for this column or section" size="15" value=""/> + </repeat> + + <!-- Disabled until Galaxy bug fixed for this. + <param name="library_datasets" type="library_data" label="Reference Bin file" help="Select one reference bin at a time from the popup window's Reference Bin library."/> + --> + + <repeat name="bins" title="Reference Bin" min="0" max="5"> + + <param name="bin_column" type="select" label="Sort by database bin match"> + <options from_data_table="fasta_reference_dbs"></options> + </param> + <param name="group" type="select" label="Grouping"> + <option value="column" selected="true">A column</option> + <option value="hidden">A hidden column</option> + <option value="table">A table section</option> + </param> + <param name="filter" type="select" label="Filtering"> + <option value="" selected="true">No filtering </option> + <option value="include">keep only matches </option> + <option value="exclude">Exclude matches </option> + </param> + <param name="description" type="boolean" checked="true" label="Show Description" help="Show description" /> + </repeat> + + <param name="column_labels" type="select" label="Tabular Report Column Labels" help=""> + <option value="label" selected="True">Short name</option> + <option value="field">Field name</option> + <option value="">No labels</option> + </param> + + <param name="html_template" type="select" optional="true" label="HTML Report template"> + <options from_data_table="blast_reporting_templates"/> + </param> + + </inputs> + <outputs> + + <data format="tabular" name="tabular_file" label="Tabular report for data $blastxml_file.hid" /> + <data format="html" name="html_file" label="HTML report for data $blastxml_file.hid"> + <filter>html_template != ""</filter> + </data> + <data format="tabular" name="selection_file" label="Sequence Selection List for data $blastxml_file.hid"> + <filter>out_format != "std"</filter> + </data> + </outputs> + + <tests> + <test><!-- Test taken from original BLAST to xml tool --> + <param name="blastxml_file" value="blastx_sample.xml"/> + <output name="tabular_file" file="blastx_sample_converted.tabular"/> + <param name="out_format" value="std"/> + <param name="column_labels" value="" /> + <param name="drop_redundant_hits" value="False"/> + </test> + + <test> + <param name="blastxml_file" value="blast_reporting_1.blastxml"/> + <output name="tabular_file" file="blast_reporting_1a.tabular"/> + <param name="out_format" value="std"/> + <param name="column_labels" value="" /> + <param name="drop_redundant_hits" value="True"/> + </test> + + <test> + <param name="blastxml_file" value="blast_reporting_1.blastxml"/> + <output name="tabular_file" file="blast_reporting_1b.tabular"/> + <param name="out_format" value="std"/> + <param name="column_labels" value="" /> + <param name="drop_redundant_hits" value="True"/> + + <param name="filter_num_0|filter_column" value="pident"/> + <param name="filter_num_0|constraint_0|filter_comparison" value="gte"/> + <param name="filter_num_0|constraint_0|filter_value" value="97"/> + + </test> + + <test> + <param name="blastxml_file" value="blast_reporting_1.blastxml"/> + <output name="tabular_file" file="blast_reporting_1c.tabular"/> + <output name="selection_file" file="blast_reporting_1c1.tabular"/> + + <param name="out_format" value="ext+"/> + <param name="column_labels" value="" /> + <param name="drop_redundant_hits" value="True"/> + + <param name="filter_num_0|filter_column" value="pident"/> + <param name="filter_num_0|constraint_0|filter_comparison" value="gte"/> + <param name="filter_num_0|constraint_0|filter_value" value="97"/> + + <param name="fields_0|field" value="pident"/> + <param name="fields_0|group" value="column"/> + <param name="fields_0|sort" value="asc"/> + + <param name="column_labels" value="label"/> + + </test> + + </tests> + + <help><![CDATA[ + +.. class:: infomark + +**What it does** + +NCBI BLAST+ searches can output in a range of formats, but in the past only +the XML format included fields like sequence description. +This tool converts the BLAST XML report into 12, 24, 26 or custom column tabular + and HTML reports. This tool is loosely based on the ''BLAST XML to tabular'' tool + available in main toolshed. For the default 12 and 24 column reports, it should +produce the same output although whitespace differences may exist. + +====== ============= ============================================ +Column NCBI name Description +====== ============= ============================================ + 1 qseqid Query Seq-id (ID of your sequence) + 2 sseqid Subject Seq-id (ID of the database hit) + 3 pident Percentage of identical matches + 4 length Alignment length + 5 mismatch Number of mismatches + 6 gapopen Number of gap openings + 7 qstart Start of alignment in query + 8 qend End of alignment in query + 9 sstart Start of alignment in subject (database hit) + 10 send End of alignment in subject (database hit) + 11 evalue Expectation value (E-value) + 12 bitscore Bit score + . + 13 sallseqid All subject Seq-id(s), separated by a ';' + 14 score Raw score + 15 nident Number of identical matches + 16 positive Number of positive-scoring matches + 17 gaps Total number of gaps + 18 ppos Percentage of positive-scoring matches + 19 qframe Query frame + 20 sframe Subject frame + 21 qseq Aligned part of query sequence + 22 sseq Aligned part of subject sequence + 23 qlen Query sequence length + 24 slen Subject sequence length + . + 25 pcov Percentage coverage + 26 sallseqdescr All subject Seq-descr(s), separated by a ',' +====== ============= ============================================ + +An option also exists to select particular columns for the output +report, and to cross-reference each result with one or more reference bins.l +A command line version can be used. Type blast_reporting.py -h for help. + +**python blast_reporting.py in_file out_file out_format [options]** + +.. class:: warningmark + +As noted in the original BLAST XML to tabular tool, ''Be aware that the XML file (and thus the conversion) and the tabular output direct from BLAST+ may differ in the presence of XXXX masking on regions low complexity (columns 21 and 22), and thus also calculated figures like the percentage identity (column 3) and gap openings.'' + +**References** + +If using this tool for publishing results, you may need to cite its origin in the BLAST XML to tabular tool: + +Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). +Galaxy tools and workflows for sequence analysis with applications +in molecular plant pathology. PeerJ 1:e167 +http://dx.doi.org/10.7717/peerj.167 + + ]]></help> + <citations> + <citation type="doi">10.7717/peerj.167</citation> + </citations> +</tool>