Mercurial > repos > devteam > ncbi_blast_plus
diff tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml @ 11:4c4a0da938ff draft
Uploaded v0.0.22, now wraps BLAST+ 2.2.28 allowing extended tabular output to include the hit descriptions as column 25.
Supports $GALAXY_SLOTS.
Includes more tests and heavy use of macros.
author | peterjc |
---|---|
date | Thu, 05 Dec 2013 06:55:59 -0500 |
parents | 70e7dcbf6573 |
children | 623f727cdff1 |
line wrap: on
line diff
--- a/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Mon Sep 23 06:14:13 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Thu Dec 05 06:55:59 2013 -0500 @@ -1,12 +1,12 @@ -<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.0.4"> +<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.0.22"> <description>Search protein domain database (PSSMs) with protein query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> - <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism> - <requirements> - <requirement type="binary">rpsblast</requirement> - <requirement type="package" version="2.2.26+">blast+</requirement> - </requirements> - <version_command>rpsblast -version</version_command> + <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" /> + <macros> + <token name="@BINARY@">deltablast</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> <command> ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -18,121 +18,43 @@ -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" #end if -evalue $evalue_cutoff --out "$output1" -##Set the extended list here so if/when we add things, saved workflows are not affected -#if str($out_format)=="ext": - -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" -#else: - -outfmt $out_format -#end if --num_threads 8 +@BLAST_OUTPUT@ +@THREADS@ #if $adv_opts.adv_opts_selector=="advanced": -$adv_opts.filter_query -## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string -## Note -max_target_seqs overrides -num_descriptions and -num_alignments -#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0): --max_target_seqs $adv_opts.max_hits -#end if -#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0): --word_size $adv_opts.word_size -#end if -$adv_opts.parse_deflines +@ADVANCED_OPTIONS@ ## End of advanced options: #end if </command> - <stdio> - <!-- Anything other than zero is an error --> - <exit_code range="1:" /> - <exit_code range=":-1" /> - <!-- In case the return code has not been set propery check stderr too --> - <regex match="Error:" /> - <regex match="Exception:" /> - </stdio> + + <expand macro="stdio" /> + <inputs> <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> - <conditional name="db_opts"> - <param name="db_opts_selector" type="select" label="Protein domain database (PSSM)"> - <option value="db" selected="True">Locally installed BLAST database</option> - <!-- TODO - define new datatype - <option value="histdb">BLAST protein domain database from your history</option> - --> - </param> - <when value="db"> - <param name="database" type="select" label="Protein domain database"> - <options from_file="blastdb_d.loc"> - <column name="value" index="0"/> - <column name="name" index="1"/> - <column name="path" index="2"/> - </options> - </param> - <param name="histdb" type="hidden" value="" /> - <param name="subject" type="hidden" value="" /> - </when> - <!-- TODO - define new datatype - <when value="histdb"> - <param name="database" type="hidden" value="" /> - <param name="histdb" type="data" format="blastdbd" label="Protein domain database" /> - <param name="subject" type="hidden" value="" /> - </when> - --> - </conditional> - <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> - <param name="out_format" type="select" label="Output format"> - <option value="6">Tabular (standard 12 columns)</option> - <option value="ext" selected="True">Tabular (extended 24 columns)</option> - <option value="5">BLAST XML</option> - <option value="0">Pairwise text</option> - <option value="0 -html">Pairwise HTML</option> - <option value="2">Query-anchored text</option> - <option value="2 -html">Query-anchored HTML</option> - <option value="4">Flat query-anchored text</option> - <option value="4 -html">Flat query-anchored HTML</option> - <!-- - <option value="-outfmt 11">BLAST archive format (ASN.1)</option> - --> - </param> - <conditional name="adv_opts"> - <param name="adv_opts_selector" type="select" label="Advanced Options"> - <option value="basic" selected="True">Hide Advanced Options</option> - <option value="advanced">Show Advanced Options</option> - </param> - <when value="basic" /> - <when value="advanced"> - <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' --> - <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" /> - <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer --> - <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits"> - <validator type="in_range" min="0" /> - </param> - <!-- I'd like word_size to be optional, with minimum 2 for rpsblast --> - <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2."> - <validator type="in_range" min="0" /> - </param> - <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/> - </when> - </conditional> + + <expand macro="input_conditional_pssm" /> + + <expand macro="input_evalue" /> + + <expand macro="input_out_format" /> + + <expand macro="advanced_options"> + <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' --> + <expand macro="input_filter_query_default_false" /> + <expand macro="input_max_hits" /> + <expand macro="input_word_size" /> + <expand macro="input_parse_deflines" /> + </expand> </inputs> <outputs> <data name="output1" format="tabular" label="rpsblast on ${on_string}"> - <change_format> - <when input="out_format" value="0" format="txt"/> - <when input="out_format" value="0 -html" format="html"/> - <when input="out_format" value="2" format="txt"/> - <when input="out_format" value="2 -html" format="html"/> - <when input="out_format" value="4" format="txt"/> - <when input="out_format" value="4 -html" format="html"/> - <when input="out_format" value="5" format="blastxml"/> - </change_format> + + <expand macro="output_change_format" /> + </data> </outputs> <help> -.. class:: warningmark - -**Note**. Database searches may take a substantial amount of time. -For large input datasets it is advisable to allow overnight processing. - ------ +@SEARCH_TIME_WARNING@ **What it does** @@ -171,60 +93,7 @@ ----- -**Output format** - -Because Galaxy focuses on processing tabular data, the default output of this -tool is tabular. The standard BLAST+ tabular output contains 12 columns: - -====== ========= ============================================ -Column NCBI name Description ------- --------- -------------------------------------------- - 1 qseqid Query Seq-id (ID of your sequence) - 2 sseqid Subject Seq-id (ID of the database hit) - 3 pident Percentage of identical matches - 4 length Alignment length - 5 mismatch Number of mismatches - 6 gapopen Number of gap openings - 7 qstart Start of alignment in query - 8 qend End of alignment in query - 9 sstart Start of alignment in subject (database hit) - 10 send End of alignment in subject (database hit) - 11 evalue Expectation value (E-value) - 12 bitscore Bit score -====== ========= ============================================ - -The BLAST+ tools can optionally output additional columns of information, -but this takes longer to calculate. Most (but not all) of these columns are -included by selecting the extended tabular output. The extra columns are -included *after* the standard 12 columns. This is so that you can write -workflow filtering steps that accept either the 12 or 24 column tabular -BLAST output. Galaxy now uses this extended 24 column output by default. - -====== ============= =========================================== -Column NCBI name Description ------- ------------- ------------------------------------------- - 13 sallseqid All subject Seq-id(s), separated by a ';' - 14 score Raw score - 15 nident Number of identical matches - 16 positive Number of positive-scoring matches - 17 gaps Total number of gaps - 18 ppos Percentage of positive-scoring matches - 19 qframe Query frame - 20 sframe Subject frame - 21 qseq Aligned part of query sequence - 22 sseq Aligned part of subject sequence - 23 qlen Query sequence length - 24 slen Subject sequence length -====== ============= =========================================== - -The third option is BLAST XML output, which is designed to be parsed by -another program, and is understood by some Galaxy tools. - -You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). -The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. -The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. -The two query anchored outputs show a multiple sequence alignment between the query and all the matches, -and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). +@OUTPUT_FORMAT@ ------- @@ -233,17 +102,6 @@ If you use this Galaxy tool in work leading to a scientific publication please cite the following papers: -Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). -Galaxy tools and workflows for sequence analysis with applications -in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 - -Christiam Camacho et al. (2009). -BLAST+: architecture and applications. -BMC Bioinformatics. 15;10:421. -http://dx.doi.org/10.1186/1471-2105-10-421 - -This wrapper is available to install into other Galaxy Instances via the Galaxy -Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus +@REFERENCES@ </help> </tool>