Mercurial > repos > galaxyp > blast_plus_remote_blastp
diff tools/blast_plus_remote_blastp.xml @ 0:820c41bff462
Uploaded
author | galaxyp |
---|---|
date | Wed, 01 Oct 2014 20:47:55 -0400 |
parents | |
children | db990c5edc14 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blast_plus_remote_blastp.xml Wed Oct 01 20:47:55 2014 -0400 @@ -0,0 +1,540 @@ +<tool id="blast_plus_remote_blastp" name="NCBI BLAST+ remote blastp" version="1.0"> + <description>Search protein database with protein query sequence(s)</description> + <!-- If job splitting is enabled, break up the query file into four (This only works if output is tabular) --> + <parallelism method="multi" split_inputs="query" split_mode="number_of_parts" split_size="4" shared_inputs="subject" merge_outputs="output1"></parallelism> + <version_command>blastp -version</version_command> + <requirements> + <requirement type="package" version="333">binaries_for_blast_plus</requirement> + </requirements> + <command> +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +blastp +-query "$query" +#if $db_opts.db_opts_selector == "db": + -db "${db_opts.database.fields.path}" +#elif $db_opts.db_opts_selector == "remote": + -db $db_opts.database + -remote + #set $txids = [] + #set $ntxids = [] + #for $i, $org in enumerate($db_opts.taxid_repeat): + #if $org.exclude: + #set $ntxids = $ntxids + ["txid" + $org.taxid.__str__] + #else + #set $txids = $txids + ["txid" + $org.taxid.__str__] + #end if + #end for + #if (len($txids) + len($ntxids)) > 0: + #set $entrez_query = '' + #if len($txids) > 0: + #set $entrez_query = $entrez_query + '(' + ' OR '.join($txids) + ')' + #end if + #if len($ntxids) > 0: + #set $entrez_query = $entrez_query + ' NOT (' + ' OR '.join($ntxids) + ')' + #end if + -entrez_query '$entrez_query' + #end if +#else: + -subject "$db_opts.subject" +#end if +-task $blast_type +-evalue $evalue_cutoff +-out blast_output +##Set the extended list here so if/when we add things, saved workflows are not affected +#if str($fmt_opt.out_format)=="text": + -outfmt "$fmt_opt.outfmt" $fmt_opt.html + #if $fmt_opt.num_descriptions.__str__.strip() != '': + -num_descriptions $fmt_opt.num_descriptions + #end if + #if $fmt_opt.num_alignments.__str__.strip() != '': + -num_alignments $fmt_opt.num_alignments + #end if +#else: + -outfmt "$fmt_opt.outfmt" + #if $fmt_opt.max_target_seqs.__str__.strip() != '': + -max_target_seqs $fmt_opt.max_target_seqs + #end if +#end if +#if $db_opts.db_opts_selector != "remote": + -num_threads 8 +#end if +#if $adv_opts.adv_opts_selector=="advanced": + $adv_opts.filter_query + -matrix $adv_opts.scoring.matrix + $adv_opts.scoring.gap_costs + + #if $adv_opts.word_size.__str__.strip() != '': + -word_size $adv_opts.word_size + #end if + + #if $adv_opts.window_size.__str__.strip() != '': + -window_size $adv_opts.window_size + #end if + + #if $adv_opts.threshold.__str__.strip() != '': + -threshold $adv_opts.threshold + #end if + + #if $adv_opts.comp_based_stats.__str__.strip() != '': + -comp_based_stats $adv_opts.comp_based_stats + #end if + + ##Ungapped disabled for now - see comments below + ##$adv_opts.ungapped + $adv_opts.use_sw_tback + $adv_opts.parse_deflines + ## End of advanced options: +#end if + </command> + <inputs> + <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> + <conditional name="db_opts"> + <param name="db_opts_selector" type="select" label="Subject database/sequences"> + <option value="db" selected="True">Local BLAST Database</option> + <option value="file">Local FASTA file</option> + <option value="remote">NCBI Remote Database</option> + </param> + <when value="db"> + <param name="database" type="select" label="Protein BLAST database"> + <options from_file="blastdb_p.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + <param name="subject" type="hidden" value="" /> + </when> + <when value="file"> + <param name="database" type="hidden" value="" /> + <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/> + </when> + <when value="remote"> + <param name="database" type="select" label="Protein BLAST database"> + <option value="nr" selected="selected" >Non-redundant protein sequences (nr)</option> + <option value="refseq_protein" >Reference proteins (refseq_protein)</option> + <option value="swissprot" >UniProtKB/Swiss-Prot(swissprot)</option> + <option value="pat" >Patented protein sequences(pat)</option> + <option value="pdb" >Protein Data Bank proteins(pdb)</option> + <option value="env_nr" >Metagenomic proteins(env_nr)</option> + </param> + <repeat name="taxid_repeat" title="Search Organism Restriction" min="0"> + <param name="taxid" type="integer" value="" label="NCBI Taxon ID" help="For example: Human is 9606 (see: http://www.ncbi.nlm.nih.gov/taxonomy ) "> + <dsvalidator type="in_range" min="0" /> + </param> + <param name="exclude" type="boolean" checked="false" label="Exclude this NCBI Taxon ID" help=""/> + </repeat> + </when> + </conditional> + <param name="blast_type" type="select" display="radio" label="Type of BLAST"> + <option value="blastp">blastp</option> + <option value="blastp-short">blastp-short</option> + </param> + <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> + <conditional name="fmt_opt"> + <param name="out_format" type="select" label="Output format"> + <option value="tabular" selected="True">Tabular</option> + <option value="blastxml">BLAST XML</option> + <option value="text">Text Report</option> + </param> + <when value="tabular"> + <param name="outfmt" type="select" label="Tabular columns"> + <option value="6" selected="True">Tabular (standard 12 columns)</option> + <option value="7">Tabular (standard 12 columns) with comments</option> + <option value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen">Tabular (extended 24 columns)</option> + </param> + <param name="max_target_seqs" type="integer" value="500" optional="true" label="Maximum hits to show" help="Use zero for default limits"> + <validator type="in_range" min="0" /> + </param> + </when> + <when value="blastxml"> + <param name="outfmt" type="hidden" value="5"/> + <param name="max_target_seqs" type="integer" value="500" optional="true" label="Maximum hits to show" help="Use zero for default limits"> + <validator type="in_range" min="0" /> + </param> + </when> + <when value="text"> + <param name="outfmt" type="select" label="Text format"> + <option value="0">Pairwise text</option> + <option value="1">Query-anchored text showing identitites</option> + <option value="2">Query-anchored text</option> + <option value="3">Flat query-anchored text showing identitites</option> + <option value="4">Flat query-anchored text</option> + </param> + <param name="html" type="boolean" label="Html" truevalue="-html" falsevalue="" checked="true" /> + <param name="num_descriptions" type="integer" value="500" optional="true" label="Maximum Decriptions to show" help="Show one-line descriptions for this number of database sequences."> + <validator type="in_range" min="0" /> + </param> + <param name="num_alignments" type="integer" value="250" optional="true" label="Maximum alignments to show" help="Show alignments for this number of database sequences."> + <validator type="in_range" min="0" /> + </param> + </when> + </conditional> + <conditional name="adv_opts"> + <param name="adv_opts_selector" type="select" label="Advanced Options"> + <option value="basic" selected="True">Hide Advanced Options</option> + <option value="advanced">Show Advanced Options</option> + </param> + <when value="basic" /> + <when value="advanced"> + <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' --> + <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" /> + <conditional name="scoring"> + <param name="matrix" type="select" label="Scoring matrix"> + <option value="BLOSUM90">BLOSUM90</option> + <option value="BLOSUM80">BLOSUM80</option> + <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option> + <option value="BLOSUM50">BLOSUM50</option> + <option value="BLOSUM45">BLOSUM45</option> + <option value="PAM250">PAM250</option> + <option value="PAM70">PAM70</option> + <option value="PAM30">PAM30</option> + </param> + <when value="BLOSUM90"> + <param name="gap_costs" type="select" label="Gap Costs"> + <option value="">Use Defaults</option> + <option value="-gapopen 9 -gapextend 2">Existense: 9 Extension: 2</option> + <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option> + <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> + <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> + <option value="-gapopen 11 -gapextend 1">Existense: 11 Extension: 1</option> + <option value="-gapopen 10 -gapextend 1" selected="true">Existense: 10 Extension: 1 (default)</option> + <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option> + </param> + + </when> + <when value="BLOSUM80"> + <param name="gap_costs" type="select" label="Gap Costs"> + <option value="">Use Defaults</option> + <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option> + <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> + <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> + <option value="-gapopen 11 -gapextend 1">Existense: 11 Extension: 1</option> + <option value="-gapopen 10 -gapextend 1" selected="true">Existense: 10 Extension: 1 (default)</option> + <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option> + </param> + </when> + <when value="BLOSUM62"> + <param name="gap_costs" type="select" label="Gap Costs"> + <option value="">Use Defaults</option> + <option value="-gapopen 11 -gapextend 2">Existense: 11 Extension: 2</option> + <option value="-gapopen 10 -gapextend 2">Existense: 10 Extension: 2</option> + <option value="-gapopen 9 -gapextend 2">Existense: 9 Extension: 2</option> + <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option> + <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> + <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> + <option value="-gapopen 13 -gapextend 1">Existense: 13 Extension: 1</option> + <option value="-gapopen 12 -gapextend 1">Existense: 12 Extension: 1</option> + <option value="-gapopen 11 -gapextend 1" selected="true">Existense: 11 Extension: 1 (default)</option> + <option value="-gapopen 10 -gapextend 1">Existense: 10 Extension: 1</option> + <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option> + </param> + + </when> + <when value="BLOSUM50"> + <param name="gap_costs" type="select" label="Gap Costs"> + <option value="">Use Defaults</option> + <option value="-gapopen 13 -gapextend 3">Existense: 13 Extension: 3</option> + <option value="-gapopen 12 -gapextend 3">Existense: 12 Extension: 3</option> + <option value="-gapopen 11 -gapextend 3">Existense: 11 Extension: 3</option> + <option value="-gapopen 10 -gapextend 3">Existense: 10 Extension: 3</option> + <option value="-gapopen 9 -gapextend 3">Existense: 9 Extension: 3</option> + <option value="-gapopen 16 -gapextend 2">Existense: 16 Extension: 2</option> + <option value="-gapopen 15 -gapextend 2">Existense: 15 Extension: 2</option> + <option value="-gapopen 14 -gapextend 2">Existense: 14 Extension: 2</option> + <option value="-gapopen 13 -gapextend 2" selected="true">Existense: 13 Extension: 2 (default)</option> + <option value="-gapopen 12 -gapextend 2">Existense: 12 Extension: 2</option> + <option value="-gapopen 19 -gapextend 1">Existense: 19 Extension: 1</option> + <option value="-gapopen 18 -gapextend 1">Existense: 18 Extension: 1</option> + <option value="-gapopen 17 -gapextend 1">Existense: 17 Extension: 1</option> + <option value="-gapopen 16 -gapextend 1">Existense: 16 Extension: 1</option> + <option value="-gapopen 15 -gapextend 1">Existense: 15 Extension: 1</option> + </param> + + </when> + <when value="BLOSUM45"> + <param name="gap_costs" type="select" label="Gap Costs"> + <option value="">Use Defaults</option> + <option value="-gapopen 13 -gapextend 3">Existense: 13 Extension: 3</option> + <option value="-gapopen 12 -gapextend 3">Existense: 12 Extension: 3</option> + <option value="-gapopen 11 -gapextend 3">Existense: 11 Extension: 3</option> + <option value="-gapopen 10 -gapextend 3">Existense: 10 Extension: 3</option> + <option value="-gapopen 15 -gapextend 2" selected="true">Existense: 15 Extension: 2 (default)</option> + <option value="-gapopen 14 -gapextend 2">Existense: 14 Extension: 2</option> + <option value="-gapopen 13 -gapextend 2">Existense: 13 Extension: 2</option> + <option value="-gapopen 12 -gapextend 2">Existense: 12 Extension: 2</option> + <option value="-gapopen 19 -gapextend 1">Existense: 19 Extension: 1</option> + <option value="-gapopen 18 -gapextend 1">Existense: 18 Extension: 1</option> + <option value="-gapopen 17 -gapextend 1">Existense: 17 Extension: 1</option> + <option value="-gapopen 16 -gapextend 1">Existense: 16 Extension: 1</option> + </param> + </when> + <when value="PAM250"> + <param name="gap_costs" type="select" label="Gap Costs"> + <option value="">Use Defaults</option> + <option value="-gapopen 15 -gapextend 3">Existense: 15 Extension: 3</option> + <option value="-gapopen 14 -gapextend 3">Existense: 14 Extension: 3</option> + <option value="-gapopen 13 -gapextend 3">Existense: 13 Extension: 3</option> + <option value="-gapopen 12 -gapextend 3">Existense: 12 Extension: 3</option> + <option value="-gapopen 17 -gapextend 2">Existense: 17 Extension: 2</option> + <option value="-gapopen 16 -gapextend 2">Existense: 16 Extension: 2</option> + <option value="-gapopen 15 -gapextend 2">Existense: 15 Extension: 2</option> + <option value="-gapopen 14 -gapextend 2" selected="true">Existense: 14 Extension: 2 (default)</option> + <option value="-gapopen 13 -gapextend 2">Existense: 13 Extension: 2</option> + <option value="-gapopen 21 -gapextend 1">Existense: 21 Extension: 1</option> + <option value="-gapopen 20 -gapextend 1">Existense: 20 Extension: 1</option> + <option value="-gapopen 19 -gapextend 1">Existense: 19 Extension: 1</option> + <option value="-gapopen 18 -gapextend 1">Existense: 18 Extension: 1</option> + <option value="-gapopen 17 -gapextend 1">Existense: 17 Extension: 1</option> + </param> + + </when> + <when value="PAM70"> + <param name="gap_costs" type="select" label="Gap Costs"> + <option value="">Use Defaults</option> + <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option> + <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> + <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> + <option value="-gapopen 11 -gapextend 1">Existense: 11 Extension: 1</option> + <option value="-gapopen 10 -gapextend 1" selected="true">Existense: 10 Extension: 1 (default)</option> + <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option> + </param> + + </when> + <when value="PAM30"> + <param name="gap_costs" type="select" label="Gap Costs"> + <option value="">Use Defaults</option> + <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> + <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> + <option value="-gapopen 5 -gapextend 2">Existense: 5 Extension: 2</option> + <option value="-gapopen 10 -gapextend 1">Existense: 10 Extension: 1</option> + <option value="-gapopen 9 -gapextend 1" selected="true">Existense: 9 Extension: 1 (default)</option> + <option value="-gapopen 8 -gapextend 1">Existense: 8 Extension: 1</option> + </param> + </when> + <!-- + Can't use '-ungapped' on its own, error back is: + Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search + Tried using '-ungapped -comp_based_stats F' and blastp crashed with 'Attempt to access NULL pointer.' + <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" /> + --> + </conditional> + + <!-- I'd like word_size to be optional, with minimum 2 for blastp --> + <param name="word_size" type="integer" value="" optional="true" label="Word size for wordfinder algorithm" help="Recommended: blastp 3, blastp-short 2"> + <validator type="in_range" min="2" /> + </param> + <param name="window_size" type="integer" value="" optional="true" label="Multiple hits window size, use 0 to specify 1-hit algorithm." help="Recommended: blastp 40, blastp-short 15"> + <validator type="in_range" min="0" /> + </param> + <param name="threshold" type="integer" value="" optional="true" label="Threshold: Minimum score to add a word to the BLAST lookup table." help="Recommended: blastp 11, blastp-short 16"> + <validator type="in_range" min="1" /> + </param> + <param name="comp_based_stats" type="select" optional="true" label="Use composition-based statistics" + help="Recommended: blastp: 2 blastp-short: 0"> + <option value="">Leave Unspecified</option> + <option value="0">0 or F (No composition-based statistics)</option> + <option value="1">1 (Composition-based statistics as in NAR 29:2994-3005, 2001)</option> + <option value="2">2, T, or D (Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence properties)</option> + <option value="3">3 (Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, unconditionally)</option> + </param> + + <param name="use_sw_tback" type="boolean" label="Compute locally optimal Smith-Waterman alignments?" truevalue="-use_sw_tback" falsevalue="" checked="false" /> + + <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/> + </when> + </conditional> + </inputs> + + <outputs> + <data name="output_tabular" format="tabular" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output"> + <filter>fmt_opt['out_format'] == "tabular"</filter> + </data> + <data name="output_xml" format="blastxml" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output"> + <filter>fmt_opt['out_format'] == "blastxml"</filter> + </data> + <data name="output_txt" format="txt" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output"> + <filter>fmt_opt['out_format'] == "text" and not fmt_opt['html']</filter> + </data> + <data name="output_html" format="html" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output"> + <filter>fmt_opt['out_format'] == "text" and fmt_opt['html']</filter> + </data> + </outputs> + + <stdio> + <exit_code range="1" level="fatal" description="Bad input dataset or BLAST options" /> + <exit_code range="2" level="fatal" description="Error in BLAST database" /> + <exit_code range="3" level="fatal" description="Error in BLAST engine" /> + <exit_code range="4" level="fatal" description="Out of Memory" /> + <exit_code range="5:" level="fatal" description="Unknown Error" /> + </stdio> + + <tests> + <test> + <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-8" /> + <param name="blast_type" value="blastp" /> + <param name="out_format" value="blastxml" /> + <param name="outfmt" value="5" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="filter_query" value="False" /> + <param name="matrix" value="BLOSUM62" /> + <param name="max_target_seqs" value="" /> + <param name="word_size" value="" /> + <param name="parse_deflines" value="True" /> + <output name="output_xml"> + <assert_contents> + <has_text text="sp|Q9BS26|ERP44_HUMAN"/> + </assert_contents> + </output> + </test> + <test> + <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-8" /> + <param name="blast_type" value="blastp" /> + <param name="out_format" value="tabular" /> + <param name="outfmt" value="6" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="filter_query" value="False" /> + <param name="matrix" value="BLOSUM62" /> + <param name="max_target_seqs" value="" /> + <param name="word_size" value="" /> + <param name="parse_deflines" value="True" /> + <output name="output_tabular"> + <assert_contents> + <has_text_matching expression="sp.P08100.OPSD_HUMAN\tgi.283855846.gb.ADB45242.1.\t\d+.\d+\t\d+\t\d+\t0\t\d+\t\d+\t\d+\t\d+\t\S+\t\s*\d+" /> + <has_text text="BAB21486.1"/> + </assert_contents> + </output> + </test> + <test> + <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-8" /> + <param name="blast_type" value="blastp" /> + <param name="out_format" value="tabular" /> + <param name="outfmt" value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="filter_query" value="False" /> + <param name="matrix" value="BLOSUM62" /> + <param name="max_target_seqs" value="" /> + <param name="word_size" value="" /> + <param name="parse_deflines" value="True" /> + <output name="output_tabular"> + <assert_contents> + <has_text_matching expression="sp.P08100.OPSD_HUMAN\tgi.283855846.gb.ADB45242.1.\t\d+.\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\S+\t\s*\d+\t\S+\t\d+\t\d+\t\d+\t\d+\t\S+\t\d+\t\d+\t\S+\t\S+\t\d+\t\d+" /> + </assert_contents> + </output> + </test> + <test> + <param name="query" value="rhodopsin_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-8" /> + <param name="blast_type" value="blastp" /> + <param name="out_format" value="tabular" /> + <param name="outfmt" value="6" /> + <param name="adv_opts_selector" value="basic" /> + <output name="output_tabular"> + <assert_contents> + <has_text_matching expression="gi.283855846.gb.ADB45242.1.\tsp.P08100.OPSD_HUMAN\t\d+.\d+\t\d+\t\d+\t0\t\d+\t\d+\t\d+\t\d+\t\S+\t\s*\d+" /> + <has_text text="BAB21486.1"/> + </assert_contents> + </output> + </test> + </tests> + <help> + +.. class:: warningmark + +**Note**. Database searches may take a substantial amount of time. +For large input datasets it is advisable to allow overnight processing. + +----- + +**What it does** + +Search a *protein database* using a *protein query*, +using the NCBI BLAST+ blastp command line tool. + +The search can be performed using a local database, against a sequence supplied in a fasta file, +or the blast can be performed remotely at NCBI. + +The remote operation allows searches to be targeted at specific organisms. + +----- + +**Output format** + +Because Galaxy focuses on processing tabular data, the default output of this +tool is tabular. The standard BLAST+ tabular output contains 12 columns: + +====== ========= ============================================ +Column NCBI name Description +------ --------- -------------------------------------------- + 1 qseqid Query Seq-id (ID of your sequence) + 2 sseqid Subject Seq-id (ID of the database hit) + 3 pident Percentage of identical matches + 4 length Alignment length + 5 mismatch Number of mismatches + 6 gapopen Number of gap openings + 7 qstart Start of alignment in query + 8 qend End of alignment in query + 9 sstart Start of alignment in subject (database hit) + 10 send End of alignment in subject (database hit) + 11 evalue Expectation value (E-value) + 12 bitscore Bit score +====== ========= ============================================ + +The BLAST+ tools can optionally output additional columns of information, +but this takes longer to calculate. Most (but not all) of these columns are +included by selecting the extended tabular output. The extra columns are +included *after* the standard 12 columns. This is so that you can write +workflow filtering steps that accept either the 12 or 24 column tabular +BLAST output. + +====== ============= =========================================== +Column NCBI name Description +------ ------------- ------------------------------------------- + 13 sallseqid All subject Seq-id(s), separated by a ';' + 14 score Raw score + 15 nident Number of identical matches + 16 positive Number of positive-scoring matches + 17 gaps Total number of gaps + 18 ppos Percentage of positive-scoring matches + 19 qframe Query frame + 20 sframe Subject frame + 21 qseq Aligned part of query sequence + 22 sseq Aligned part of subject sequence + 23 qlen Query sequence length + 24 slen Subject sequence length +====== ============= =========================================== + +The third option is BLAST XML output, which is designed to be parsed by +another program, and is understood by some Galaxy tools. + +You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). +The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. +The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. +The two query anchored outputs show a multiple sequence alignment between the query and all the matches, +and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). + +------- + +**References** + +Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402. + +Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005. + + </help> +</tool>