Mercurial > repos > devteam > ncbi_blast_plus
diff ncbi_blastn_wrapper.xml @ 2:ab1a8640f817 draft
Uploaded v0.0.12 again, without extra path
author | peterjc |
---|---|
date | Thu, 23 Aug 2012 07:32:06 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ncbi_blastn_wrapper.xml Thu Aug 23 07:32:06 2012 -0400 @@ -0,0 +1,211 @@ +<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.12"> + <description>Search nucleotide database with nucleotide query sequence(s)</description> + <!-- If job splitting is enabled, break up the query file into parts --> + <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism> + <version_command>blastn -version</version_command> + <command interpreter="python">hide_stderr.py +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +blastn +-query "$query" +#if $db_opts.db_opts_selector == "db": + -db "${db_opts.database.fields.path}" +#else: + -subject "$db_opts.subject" +#end if +-task $blast_type +-evalue $evalue_cutoff +-out $output1 +##Set the extended list here so if/when we add things, saved workflows are not affected +#if str($out_format)=="ext": + -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" +#else: + -outfmt $out_format +#end if +-num_threads 8 +#if $adv_opts.adv_opts_selector=="advanced": +$adv_opts.filter_query +$adv_opts.strand +## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string +## Note -max_target_seqs overrides -num_descriptions and -num_alignments +#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0): +-max_target_seqs $adv_opts.max_hits +#end if +#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0): +-word_size $adv_opts.word_size +#end if +$adv_opts.ungapped +$adv_opts.parse_deflines +## End of advanced options: +#end if + </command> + <inputs> + <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> + <conditional name="db_opts"> + <param name="db_opts_selector" type="select" label="Subject database/sequences"> + <option value="db" selected="True">BLAST Database</option> + <option value="file">FASTA file (pairwise e-values)</option> + </param> + <when value="db"> + <param name="database" type="select" label="Nucleotide BLAST database"> + <options from_file="blastdb.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + <param name="subject" type="hidden" value="" /> + </when> + <when value="file"> + <param name="database" type="hidden" value="" /> + <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> + </when> + </conditional> + <param name="blast_type" type="select" display="radio" label="Type of BLAST"> + <option value="megablast">megablast</option> + <option value="blastn">blastn</option> + <option value="blastn-short">blastn-short</option> + <option value="dc-megablast">dc-megablast</option> + <!-- Using BLAST 2.2.24+ this gives an error: + BLAST engine error: Program type 'vecscreen' not supported + <option value="vecscreen">vecscreen</option> + --> + </param> + <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> + <param name="out_format" type="select" label="Output format"> + <option value="6" selected="True">Tabular (standard 12 columns)</option> + <option value="ext">Tabular (extended 24 columns)</option> + <option value="5">BLAST XML</option> + <option value="0">Pairwise text</option> + <option value="0 -html">Pairwise HTML</option> + <option value="2">Query-anchored text</option> + <option value="2 -html">Query-anchored HTML</option> + <option value="4">Flat query-anchored text</option> + <option value="4 -html">Flat query-anchored HTML</option> + <!-- + <option value="-outfmt 11">BLAST archive format (ASN.1)</option> + --> + </param> + <conditional name="adv_opts"> + <param name="adv_opts_selector" type="select" label="Advanced Options"> + <option value="basic" selected="True">Hide Advanced Options</option> + <option value="advanced">Show Advanced Options</option> + </param> + <when value="basic" /> + <when value="advanced"> + <!-- Could use a select (yes, no, other) where other allows setting 'level window linker' --> + <param name="filter_query" type="boolean" label="Filter out low complexity regions (with DUST)" truevalue="-dust yes" falsevalue="-dust no" checked="true" /> + <param name="strand" type="select" label="Query strand(s) to search against database/subject"> + <option value="-strand both">Both</option> + <option value="-strand plus">Plus (forward)</option> + <option value="-strand minus">Minus (reverse complement)</option> + </param> + <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer --> + <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits"> + <validator type="in_range" min="0" /> + </param> + <!-- I'd like word_size to be optional, with minimum 4 for blastn --> + <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 4."> + <validator type="in_range" min="0" /> + </param> + <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" /> + <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="output1" format="tabular" label="${blast_type.value_label} on ${db_opts.db_opts_selector}"> + <change_format> + <when input="out_format" value="0" format="txt"/> + <when input="out_format" value="0 -html" format="html"/> + <when input="out_format" value="2" format="txt"/> + <when input="out_format" value="2 -html" format="html"/> + <when input="out_format" value="4" format="txt"/> + <when input="out_format" value="4 -html" format="html"/> + <when input="out_format" value="5" format="blastxml"/> + </change_format> + </data> + </outputs> + <requirements> + <requirement type="binary">blastn</requirement> + </requirements> + <help> + +.. class:: warningmark + +**Note**. Database searches may take a substantial amount of time. +For large input datasets it is advisable to allow overnight processing. + +----- + +**What it does** + +Search a *nucleotide database* using a *nucleotide query*, +using the NCBI BLAST+ blastn command line tool. +Algorithms include blastn, megablast, and discontiguous megablast. + +----- + +**Output format** + +Because Galaxy focuses on processing tabular data, the default output of this +tool is tabular. The standard BLAST+ tabular output contains 12 columns: + +====== ========= ============================================ +Column NCBI name Description +------ --------- -------------------------------------------- + 1 qseqid Query Seq-id (ID of your sequence) + 2 sseqid Subject Seq-id (ID of the database hit) + 3 pident Percentage of identical matches + 4 length Alignment length + 5 mismatch Number of mismatches + 6 gapopen Number of gap openings + 7 qstart Start of alignment in query + 8 qend End of alignment in query + 9 sstart Start of alignment in subject (database hit) + 10 send End of alignment in subject (database hit) + 11 evalue Expectation value (E-value) + 12 bitscore Bit score +====== ========= ============================================ + +The BLAST+ tools can optionally output additional columns of information, +but this takes longer to calculate. Most (but not all) of these columns are +included by selecting the extended tabular output. The extra columns are +included *after* the standard 12 columns. This is so that you can write +workflow filtering steps that accept either the 12 or 24 column tabular +BLAST output. + +====== ============= =========================================== +Column NCBI name Description +------ ------------- ------------------------------------------- + 13 sallseqid All subject Seq-id(s), separated by a ';' + 14 score Raw score + 15 nident Number of identical matches + 16 positive Number of positive-scoring matches + 17 gaps Total number of gaps + 18 ppos Percentage of positive-scoring matches + 19 qframe Query frame + 20 sframe Subject frame + 21 qseq Aligned part of query sequence + 22 sseq Aligned part of subject sequence + 23 qlen Query sequence length + 24 slen Subject sequence length +====== ============= =========================================== + +The third option is BLAST XML output, which is designed to be parsed by +another program, and is understood by some Galaxy tools. + +You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). +The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. +The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. +The two query anchored outputs show a multiple sequence alignment between the query and all the matches, +and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). + +------- + +**References** + +Zhang et al. A Greedy Algorithm for Aligning DNA Sequences. 2000. JCB: 203-214. + + </help> +</tool>