Mercurial > repos > xuebing > sharplabtool
diff tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Fri Mar 09 19:37:19 2012 -0500 @@ -0,0 +1,286 @@ +<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.11"> + <description>Search translated nucleotide database with protein query sequence(s)</description> + <version_command>tblastn -version</version_command> + <command interpreter="python">hide_stderr.py +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +tblastn +-query "$query" +#if $db_opts.db_opts_selector == "db": + -db "${db_opts.database.fields.path}" +#else: + -subject "$db_opts.subject" +#end if +-evalue $evalue_cutoff +-out $output1 +##Set the extended list here so if/when we add things, saved workflows are not affected +#if str($out_format)=="ext": + -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" +#else: + -outfmt $out_format +#end if +-num_threads 8 +#if $adv_opts.adv_opts_selector=="advanced": +$adv_opts.filter_query +-matrix $adv_opts.matrix +## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string +## Note -max_target_seqs overrides -num_descriptions and -num_alignments +#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0): +-max_target_seqs $adv_opts.max_hits +#end if +#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0): +-word_size $adv_opts.word_size +#end if +##Ungapped disabled for now - see comments below +##$adv_opts.ungapped +$adv_opts.parse_deflines +## End of advanced options: +#end if + </command> + <inputs> + <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> + <conditional name="db_opts"> + <param name="db_opts_selector" type="select" label="Subject database/sequences"> + <option value="db" selected="True">BLAST Database</option> + <option value="file">FASTA file</option> + </param> + <when value="db"> + <param name="database" type="select" label="Nucleotide BLAST database"> + <options from_file="blastdb.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + <param name="subject" type="hidden" value="" /> + </when> + <when value="file"> + <param name="database" type="hidden" value="" /> + <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> + </when> + </conditional> + <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> + <param name="out_format" type="select" label="Output format"> + <option value="6" selected="True">Tabular (standard 12 columns)</option> + <option value="ext">Tabular (extended 24 columns)</option> + <option value="5">BLAST XML</option> + <option value="0">Pairwise text</option> + <option value="0 -html">Pairwise HTML</option> + <option value="2">Query-anchored text</option> + <option value="2 -html">Query-anchored HTML</option> + <option value="4">Flat query-anchored text</option> + <option value="4 -html">Flat query-anchored HTML</option> + <!-- + <option value="-outfmt 11">BLAST archive format (ASN.1)</option> + --> + </param> + <conditional name="adv_opts"> + <param name="adv_opts_selector" type="select" label="Advanced Options"> + <option value="basic" selected="True">Hide Advanced Options</option> + <option value="advanced">Show Advanced Options</option> + </param> + <when value="basic" /> + <when value="advanced"> + <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' --> + <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" /> + <param name="matrix" type="select" label="Scoring matrix"> + <option value="BLOSUM90">BLOSUM90</option> + <option value="BLOSUM80">BLOSUM80</option> + <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option> + <option value="BLOSUM50">BLOSUM50</option> + <option value="BLOSUM45">BLOSUM45</option> + <option value="PAM250">PAM250</option> + <option value="PAM70">PAM70</option> + <option value="PAM30">PAM30</option> + </param> + <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer --> + <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits"> + <validator type="in_range" min="0" /> + </param> + <!-- I'd like word_size to be optional, with minimum 2 for blastp --> + <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2."> + <validator type="in_range" min="0" /> + </param> + <!-- + Can't use '-ungapped' on its own, error back is: + Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search + Tried using '-ungapped -comp_based_stats F' and tblastn crashed with 'Attempt to access NULL pointer.' + <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" /> + --> + <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="output1" format="tabular" label="tblastn on ${db_opts.db_opts_selector}"> + <change_format> + <when input="out_format" value="0" format="txt"/> + <when input="out_format" value="0 -html" format="html"/> + <when input="out_format" value="2" format="txt"/> + <when input="out_format" value="2 -html" format="html"/> + <when input="out_format" value="4" format="txt"/> + <when input="out_format" value="4 -html" format="html"/> + <when input="out_format" value="5" format="blastxml"/> + </change_format> + </data> + </outputs> + <requirements> + <requirement type="binary">tblastn</requirement> + </requirements> + <tests> + <test> + <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-10" /> + <param name="out_format" value="5" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="filter_query" value="false" /> + <param name="matrix" value="BLOSUM80" /> + <param name="max_hits" value="0" /> + <param name="word_size" value="0" /> + <param name="parse_deflines" value="false" /> + <output name="output1" file="tblastn_four_human_vs_rhodopsin.xml" ftype="blastxml" /> + </test> + <test> + <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-10" /> + <param name="out_format" value="ext" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="filter_query" value="false" /> + <param name="matrix" value="BLOSUM80" /> + <param name="max_hits" value="0" /> + <param name="word_size" value="0" /> + <param name="parse_deflines" value="false" /> + <output name="output1" file="tblastn_four_human_vs_rhodopsin_ext.tabular" ftype="tabular" /> + </test> + <test> + <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-10" /> + <param name="out_format" value="6" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="filter_query" value="false" /> + <param name="matrix" value="BLOSUM80" /> + <param name="max_hits" value="0" /> + <param name="word_size" value="0" /> + <param name="parse_deflines" value="false" /> + <output name="output1" file="tblastn_four_human_vs_rhodopsin.tabular" ftype="tabular" /> + </test> + <test> + <!-- Same as above, but parse deflines - on BLAST 2.2.25+ makes no difference --> + <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-10" /> + <param name="out_format" value="6" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="filter_query" value="false" /> + <param name="matrix" value="BLOSUM80" /> + <param name="max_hits" value="0" /> + <param name="word_size" value="0" /> + <param name="parse_deflines" value="true" /> + <output name="output1" file="tblastn_four_human_vs_rhodopsin.tabular" ftype="tabular" /> + </test> + <test> + <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-10" /> + <param name="out_format" value="0 -html" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="filter_query" value="false" /> + <param name="matrix" value="BLOSUM80" /> + <param name="max_hits" value="0" /> + <param name="word_size" value="0" /> + <param name="parse_deflines" value="false" /> + <output name="output1" file="tblastn_four_human_vs_rhodopsin.html" ftype="html" /> + </test> + </tests> + <help> + +.. class:: warningmark + +**Note**. Database searches may take a substantial amount of time. +For large input datasets it is advisable to allow overnight processing. + +----- + +**What it does** + +Search a *translated nucleotide database* using a *protein query*, +using the NCBI BLAST+ tblastn command line tool. + +----- + +**Output format** + +Because Galaxy focuses on processing tabular data, the default output of this +tool is tabular. The standard BLAST+ tabular output contains 12 columns: + +====== ========= ============================================ +Column NCBI name Description +------ --------- -------------------------------------------- + 1 qseqid Query Seq-id (ID of your sequence) + 2 sseqid Subject Seq-id (ID of the database hit) + 3 pident Percentage of identical matches + 4 length Alignment length + 5 mismatch Number of mismatches + 6 gapopen Number of gap openings + 7 qstart Start of alignment in query + 8 qend End of alignment in query + 9 sstart Start of alignment in subject (database hit) + 10 send End of alignment in subject (database hit) + 11 evalue Expectation value (E-value) + 12 bitscore Bit score +====== ========= ============================================ + +The BLAST+ tools can optionally output additional columns of information, +but this takes longer to calculate. Most (but not all) of these columns are +included by selecting the extended tabular output. The extra columns are +included *after* the standard 12 columns. This is so that you can write +workflow filtering steps that accept either the 12 or 24 column tabular +BLAST output. + +====== ============= =========================================== +Column NCBI name Description +------ ------------- ------------------------------------------- + 13 sallseqid All subject Seq-id(s), separated by a ';' + 14 score Raw score + 15 nident Number of identical matches + 16 positive Number of positive-scoring matches + 17 gaps Total number of gaps + 18 ppos Percentage of positive-scoring matches + 19 qframe Query frame + 20 sframe Subject frame + 21 qseq Aligned part of query sequence + 22 sseq Aligned part of subject sequence + 23 qlen Query sequence length + 24 slen Subject sequence length +====== ============= =========================================== + +The third option is BLAST XML output, which is designed to be parsed by +another program, and is understood by some Galaxy tools. + +You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). +The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. +The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. +The two query anchored outputs show a multiple sequence alignment between the query and all the matches, +and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). + +------- + +**References** + +Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402. + + </help> +</tool>