Mercurial > repos > bgruening > diamond

--- a/diamond.xml	Tue Aug 08 16:33:49 2017 -0400
+++ b/diamond.xml	Thu Sep 27 06:30:30 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="bg_diamond" name="Diamond" version="@VERSION@.1">
+<tool id="bg_diamond" name="Diamond" version="@VERSION@.0">
     <description>alignment tool for short sequences against a protein database</description>
     <macros>
         <import>macros.xml</import>
@@ -18,26 +18,21 @@
     &&

     diamond
-        $method_select.method_select
+        $method_cond.method_select
         --threads "\${GALAXY_SLOTS:-12}"
         --db ./database
         --query '$query'
-        #if $method_select.method_select == "blastx"
-          --query-gencode '$query_gencode'
+        #if $method_cond.method_select == "blastx"
+          --query-gencode '$method_cond.query_gencode'
+          --strand '$method_cond.query_strand'
+	  --min-orf $method_cond.min_orf
+	  #if $method_cond.frameshift_cond.frameshift_select == 'yes'
+	          --frameshift $method_cond.frameshift_cond.frameshift
+		  $method_cond.frameshift_cond.range_culling
+          #end if
         #end if

-        #if $output.outfmt == "5"
-            --outfmt '5'
-            --out '$blast_xml'
-            $output.salltitles
-        #else if $output.outfmt == "6"
-            --outfmt '6' #echo ' '.join(str($output.fields).split(','))
-            --out '$blast_tabular'
-        #else if $output.outfmt == "101"
-            --outfmt '101'
-            --out '$sam_output'
-            $output.salltitles
-        #end if
+        @OUTPUT_ARGS@

         --compress '0'
         #if $sensitivity == "1"
@@ -46,16 +41,17 @@
           --more-sensitive
         #end if

-        --gapopen '$gapopen'
-        --gapextend '$gapextend'
+        #if str($gapopen) != "":
+          --gapopen '$gapopen'
+        #end if
+        #if str($gapextend) != "":
+          --gapextend '$gapextend'
+        #end if
         --matrix '$matrix'
-        --seg '$seg'
+        --comp-based-stats '$comp_based_stats'
+        --masking '$masking'

-        #if str($hit_filter.hit_filter_select) == 'max':
-            --max-target-seqs '$hit_filter.max_target_seqs'
-        #else:
-            --top '$hit_filter.top'
-        #end if
+        @HITFILTER_ARGS@

         #if str($filter_score.filter_score_select) == 'evalue':
             --evalue '$filter_score.evalue'
@@ -65,131 +61,120 @@

         --id '$id'
         --query-cover '$query_cover'
+        --subject-cover '$subject_cover'
         --block-size '$block_size'
+        #if str($unal) == '1':
+            --unal 1 --un '$unalqueries'
+        #end if
+        $no_self_hits
+        #if $tax_cond.tax_select == 'file':
+            --taxonlist `cat '$tax_cond.taxonlistfile' | grep -v "^#" | grep -v "^$" | tr "\n" "," | sed 's/,$//'`
+        #else if  $tax_cond.tax_select == 'list':
+            --taxonlist '$tax_cond.taxonlist'
+        #end if
 ]]>
     </command>
-
     <inputs>
-        <conditional name="method_select">
-          <param name="method_select" type="select" label="What do you want to align?" help="(--blastp/--blastx)">
-              <option value="blastp">Align amino acid query sequences (blastp)</option>
-              <option value="blastx">Align DNA query sequences (blastx)</option>
-          </param>
-          <when value="blastx">
-            <param name="query_gencode" argument="--query-gencode" type="select" label="Genetic code used for translation of query in BLASTX mode" help="">
-                <option value="1">The Standard Code</option>
-                <option value="2">The Vertebrate Mitochondrial Code</option>
-                <option value="3">The Yeast Mitochondrial Code</option>
-                <option value="4">The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
-                <option value="5">The Invertebrate Mitochondrial Code</option>
-                <option value="6">The Ciliate, Dasycladacean and Hexamita Nuclear Code</option>
-                <option value="9">The Echinoderm and Flatworm Mitochondrial Code</option>
-                <option value="10">The Euplotid Nuclear Code</option>
-                <option value="11">The Bacterial, Archaeal and Plant Plastid Code</option>
-                <option value="12">The Alternative Yeast Nuclear Code</option>
-                <option value="13">The Ascidian Mitochondrial Code</option>
-                <option value="14">The Alternative Flatworm Mitochondrial Code</option>
-                <option value="16">Chlorophycean Mitochondrial Code</option>
-                <option value="21">Trematode Mitochondrial Code</option>
-                <option value="22">Scenedesmus obliquus Mitochondrial Code</option>
-                <option value="23">Thraustochytrium Mitochondrial Code</option>
-                <option value="24">Pterobranchia Mitochondrial Code</option>
-                <option value="5">Candidate Division SR1 and Gracilibacteria Code</option>
-                <option value="26">Pachysolen tannophilus Nuclear Code</option>
+        <conditional name="method_cond">
+            <param name="method_select" type="select" label="What do you want to align?" help="(blastp/blastx)">
+                <option value="blastp">Align amino acid query sequences (blastp)</option>
+                <option value="blastx">Align DNA query sequences (blastx)</option>
             </param>
-          </when>
-          <when value="blastp">
-          </when>
+            <when value="blastx">
+                <param name="query_gencode" argument="--query-gencode" type="select" label="Genetic code used for translation of query in BLASTX mode" help="">
+                    <option value="1">The Standard Code</option>
+                    <option value="2">The Vertebrate Mitochondrial Code</option>
+                    <option value="3">The Yeast Mitochondrial Code</option>
+                    <option value="4">The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
+                    <option value="5">The Invertebrate Mitochondrial Code</option>
+                    <option value="6">The Ciliate, Dasycladacean and Hexamita Nuclear Code</option>
+                    <option value="9">The Echinoderm and Flatworm Mitochondrial Code</option>
+                    <option value="10">The Euplotid Nuclear Code</option>
+                    <option value="11">The Bacterial, Archaeal and Plant Plastid Code</option>
+                    <option value="12">The Alternative Yeast Nuclear Code</option>
+                    <option value="13">The Ascidian Mitochondrial Code</option>
+                    <option value="14">The Alternative Flatworm Mitochondrial Code</option>
+                    <option value="16">Chlorophycean Mitochondrial Code</option>
+                    <option value="21">Trematode Mitochondrial Code</option>
+                    <option value="22">Scenedesmus obliquus Mitochondrial Code</option>
+                    <option value="23">Thraustochytrium Mitochondrial Code</option>
+                    <option value="24">Pterobranchia Mitochondrial Code</option>
+                    <option value="25">Candidate Division SR1 and Gracilibacteria Code</option>
+                    <option value="26">Pachysolen tannophilus Nuclear Code</option>
+                </param>
+                <param argument="--min-orf" name="min_orf" type="integer" value="1" label="ignore translated sequences without an open reading frame of at least this length" help="By default this feature is disabled for sequences of length below 30, set to 20 for sequences of length below 100, and set to 40 otherwise. Setting this option to 1 will disable this feature" />
+
+                <param name="query_strand" argument="--strand" type="select" label="query strands to search" help="">
+                    <option value="both" selected="True">Both</option>
+                    <option value="plus">Plus</option>
+                    <option value="minus">Minus</option>
+                </param>
+                <conditional name="frameshift_cond">
+                    <param name="frameshift_select" type="select" label="Allow for frameshifts?" help="">
+                        <option value="yes">yes</option>
+                        <option value="no" selected="true">no</option>
+                    </param>
+                    <when value="yes">
+                        <param argument="--range-culling" name="range_culling" type="boolean" truevalue="--range-culling" falsevalue="" checked="false" label="restrict hit culling to overlapping query ranges" help="This feature is designed for long query DNA sequences that may span several genes. In these cases, the default of reporting the 25 best overall hits could cause hits to a lower scoring gene to be overshadowed. But just increasing the number of alignments reported will bloat the output size and reduce performance. Using this feature along with -k 25 (default), a hit will only be deleted if at least 50% of its query range is spanned by at least 25 higher or equal scoring hits. Using this feature along with --top 10, a hit will only be deleted if its score is more than 10% lower than that of a higher scoring hit over at least 50% of its query range. The percentage is configurable using --range-cover. Note that this feature is currently only available in frameshift alignment mode"/>
+                        <param argument="--frameshift" type="integer" value="0" label="frame shift penalty" help="Values around 15 are reasonable for this parameter. Enabling this feature will have the aligner tolerate missing bases in DNA sequences and is most recommended for long, error-prone sequences like MinION reads. In the pairwise output format, frameshifts will be indicated by \ and / for a shift by +1 and -1 nucleotide in the direction of translation respectively." />
+                    </when>
+                    <when value="no"/>
+                </conditional>
+            </when>
+            <when value="blastp">
+            </when>
         </conditional>
         <param argument="--query" type="data" format="fasta,fastq" label="Input query file in FASTA or FASTQ format" />
         <conditional name="ref_db_source">
-          <param name="db_source" type="select" label="Will you select a reference database from your history or use a built-in index?" help="Built-ins were indexed using default options">
-            <option value="indexed">Use a built-in index</option>
-            <option value="history">Use one from the history</option>
-          </param>
-          <when value="indexed">
-            <param name="index" type="select" label="Select a reference database" help="If your database of interest is not listed, contact your Galaxy admin">
-              <options from_data_table="diamond_database">
-                <filter type="sort_by" column="2"/>
-                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
-              </options>
-            </param>
-          </when>
-          <when value="history">
-            <param name="reference_database" type="data" format="dmnd" label="Select the reference database" />
-          </when>
-        </conditional>
-        <conditional name="output">
-            <param argument="--outfmt" type="select" label="Format of output file " help="">
-                <option value="5">BLAST XML</option>
-                <option value="6">BLAST tabular</option>
-                <option value="101">SAM</option>
+            <param name="db_source" type="select" label="Will you select a reference database from your history or use a built-in index?" help="Built-ins were indexed using default options">
+                <option value="indexed">Use a built-in index</option>
+                <option value="history">Use one from the history</option>
             </param>
-            <when value="5">
-                <param argument="--salltitles" type="boolean" truevalue="--salltitles" falsevalue="" checked="true" label="Include full length subject titles in output?" help=""/>
-            </when>
-            <when value="6">
-                <param name="fields" type="select" label="Tabular fields" help="" multiple="true">
-                    <option value="qseqid" selected="true">Query Seq - id</option>
-                    <option value="sseqid" selected="true">Subject Seq - id</option>
-                    <option value="sallseqid">All subject Seq - id(s)</option>
-                    <option value="qlen">Query sequence length</option>
-                    <option value="slen">Subject sequence length</option>
-                    <option value="pident" selected="true">Percentage of identical matches</option>
-                    <option value="length" selected="true">Alignment length</option>
-                    <option value="nident">Number of identical matches</option>
-                    <option value="mismatch" selected="true">Number of mismatches</option>
-                    <option value="positive">Number of positive - scoring matches</option>
-                    <option value="gapopen" selected="true">Number of gap openings</option>
-                    <option value="gaps">Total number of gaps</option>
-                    <option value="ppos">Percentage of positive - scoring matches</option>
-                    <option value="qstart" selected="true">Start of alignment in query</option>
-                    <option value="qend" selected="true">End of alignment in query</option>
-                    <option value="sstart" selected="true">Start of alignment in subject</option>
-                    <option value="send" selected="true">End of alignment in subject</option>
-                    <option value="qseq">Aligned part of query sequence</option>
-                    <option value="sseq">Aligned part of subject sequence</option>
-                    <option value="evalue" selected="true">Expect value</option>
-                    <option value="bitscore" selected="true">Bit score</option>
-                    <option value="score">Raw score</option>
-                    <option value="qframe">Query frame</option>
-                    <option value="stitle">Subject Title</option>
-                    <option value="salltitles">All Subject Title(s)</option>
-                    <option value="qcovhsp">Query Coverage Per HSP</option>
+            <when value="indexed">
+                <param name="index" type="select" label="Select a reference database" help="If your database of interest is not listed, contact your Galaxy admin">
+                    <options from_data_table="diamond_database">
+                        <filter type="sort_by" column="2"/>
+                        <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+                    </options>
                 </param>
             </when>
-            <when value="101">
-                <param argument="--salltitles" type="boolean" truevalue="--salltitles" falsevalue="" checked="true" label="Include full length subject titles in output?" help=""/>
+            <when value="history">
+                <param name="reference_database" type="data" format="dmnd" label="Select the reference database" />
             </when>
         </conditional>
-        <param name='sensitivity' type="select" label="Sensitivity Mode" help="Choose one of the sensitivity modes. More sensitivity may increase computation time">
-          <option value="0" selected="True">Default</option>
-          <option value="1">Sensitive</option>
-          <option value="2">More Sensitive</option>
+        <expand macro="output_type_macro" />
+        <param name="no_self_hits" argument="--no-self-hits" type="boolean" truevalue="--no-self-hits" falsevalue="" checked="true" label="suppress reporting of identical self hits?" help=""/>
+        <param name='sensitivity' type="select" label="Sensitivity Mode" help="Choose one of the sensitivity modes. The default mode is mainly designed for short read alignment, i.e. finding significant matches of >50 bits on 30-40aa fragments. The sensitive mode is a lot more sensitive than the default and generally recommended for aligning longer sequences. The more sensitive mode provides even more sensitivity. More sensitivity may increase computation time.">
+            <option value="0" selected="True">Default</option>
+            <option value="1">Sensitive</option>
+            <option value="2">More Sensitive</option>
+        </param>
+        <param argument="--matrix" type="select" label="Scoring matrix" help="In parentheses are the supported values for (gap open)/(gap extend). In brackets are default gap penalties">
+            <option value="BLOSUM45">BLOSUM45 ((10-13)/3; (12-16)/2; (16-19)/1) [14/2]</option>
+            <option value="BLOSUM50">BLOSUM50 ((9-13)/3; (12-16)/2; (15-19)/1) [13/2]</option>
+            <option value="BLOSUM62" selected="True">BLOSUM62 ((6-11)/2; (9-13)/1) [11/1]</option>
+            <option value="BLOSUM80">BLOSUM80 ((6-9)/2; 13/2; 25/2; (9-11)/1) [10/1]</option>
+            <option value="BLOSUM90">BLOSUM90 ((6-9)/2; (9-11)/1) [10/1]</option>
+            <option value="PAM250">PAM250 ((11-15)/3; (13-17)/2; (17-21)/1) [14/2]</option>
+            <option value="PAM70">PAM70 ((6-8)/2; (9-11)/1) [10/1]</option>
+            <option value="PAM30">PAM30 ((5-7)/2; (8-10)/1) [9/1]</option>
         </param>
-        <param argument="--gapopen" type="integer" value="11" label="Gap open penalty" help="" />
-        <param argument="--gapextend" type="integer" value="1" label="Gap extension penalty" help="" />
-        <param argument="--matrix" type="select" label="Scoring matrix" help="In brackets are the supported values for (gap open)/(gap extend)">
-            <option value="BLOSUM45">BLOSUM45 ((10-13)/3; (12-16)/2; (16-19)/1)</option>
-            <option value="BLOSUM50">BLOSUM50 ((9-13)/3; (12-16)/2; (15-19)/1)</option>
-            <option value="BLOSUM62" selected="True">BLOSUM62 ((6-11)/2; (9-13)/1)</option>
-            <option value="BLOSUM80">BLOSUM80 ((6-9)/2; 13/2; 25/2; (9-11)/1)</option>
-            <option value="BLOSUM90">BLOSUM90 ((6-9)/2; (9-11)/1)</option>
-            <option value="PAM250">PAM250 ((11-15)/3; (13-17)/2; (17-21)/1)</option>
-            <option value="PAM70">PAM70 ((6-8)/2; (9-11)/1)</option>
-            <option value="PAM30">PAM30 ((5-7)/2; (8-10)/1)</option>
-        </param>
-        <param argument="--seg" type="boolean" truevalue="yes" falsevalue="no" checked="true" label="Enable SEG masking of low complexity segments in the query?" help=""/>
-        <conditional name="hit_filter">
-            <param name="hit_filter_select" type="select" label="Method to restrict the number of hits?">
-                <option value="max">Maximum number of target sequences</option>
-                <option value="top">Percentage of top alignment score</option>
+        <param argument="--gapopen" type="integer" optional="True" value="" label="Gap open penalty" help="leave empty for default (see scoring matrix)" />
+        <param argument="--gapextend" type="integer" optional="True" value="" label="Gap extension penalty" help="leave empty for default (see scoring matrix)" />
+        <param name="comp_based_stats" argument="--comp-based-stats" type="boolean" truevalue="1" falsevalue="0" checked="true" label="enable composition based statistics?" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST"/>
+        <param argument="--masking" type="boolean" truevalue="1" falsevalue="0" checked="true" label="enable masking of low complexity regions?" help="Masked residues appear in the output as X"/>
+        <conditional name="tax_cond">
+            <param name="tax_select" type="select" label="Restrict search taxonomically?" help="Any taxonomic rank can be used, and only reference sequences matching one of the specified taxon ids will be searched against">
+                <option value="no">No</option>
+                <option value="list">list of taxids entered manually</option>
+                <option value="file">list of taxids from single column tabular file</option>
             </param>
-            <when value="max">
-                <param name="max_target_seqs" argument="--max-target-seqs" type="integer" value="25" label="The maximum number of target sequences per query to keep alignments for" help="" />
+            <when value="no"/>
+            <when value="list">
+                <param name="taxonlist" argument="--taxonlist" type="text" value="" label="comma separated list of taxon ids" help="" />
             </when>
-            <when value="top">
-                <param argument="--top" type="integer" value="0" label="Keep alignments within the given percentage range of the top alignment score for a quer" help="" />
+            <when value="file">
+                <param name="taxonlistfile" argument="--taxonlist" type="data" format="tabular" label="Keep alignments within the given percentage range of the top alignment score for a quer" help="" />
             </when>
         </conditional>
         <conditional name="filter_score">
@@ -204,47 +189,131 @@
                 <param name="min_score" argument="--min-score" type="integer" value="0" label="Minimum bit score to keep an alignment" help="(--min-score)" />
             </when>
         </conditional>
+        <expand macro="hit_filter_macro" />
         <param argument="--id" type="integer" value="0" label="Minimum identity percentage to report an alignment" help="" />
         <param name="query_cover" argument="--query-cover" type="integer" value="0" label="Minimum query cover percentage to report an alignment" help="" />
+        <param name="subject_cover" argument="--subject-cover" type="integer" value="0" label="Minimum subject cover percentage to report an alignment" help="" />
         <param name="block_size" argument="--block-size" type="float" value="2" label="Block size in billions of sequence letters to be processed at a time" help="" />
+        <param argument="--unal" type="boolean" truevalue="1" falsevalue="0" checked="false" label="report unaligned queries" help=""/>
     </inputs>
-
     <outputs>
-        <data format="xml" name="blast_xml" label="${tool.name} on ${on_string}">
-            <filter>output["outfmt"] == "5"</filter>
-        </data>
-        <data format="tabular" name="blast_tabular" label="${tool.name} on ${on_string}">
-            <filter>output["outfmt"] == "6"</filter>
-        </data>
-        <data format="sam" name="sam_output" label="${tool.name} on ${on_string}">
-            <filter>output["outfmt"] == "101"</filter>
+        <expand macro="output_macro" />
+        <data format="fasta" name="unalqueries" label="${tool.name} on ${on_string} (unaligned queries)">
+            <filter>unal == "1"</filter>
         </data>
     </outputs>
-
     <tests>
         <test>
-            <param name="method_select" value="blastp" />
+            <conditional name="method_cond">
+                <param name="method_select" value="blastp" />
+            </conditional>
             <param name="query" value="protein.fasta" ftype="fasta"/>
-            <param name="db_source" value="history"/>
-            <param name="reference_database" value="db.dmnd"/>
-            <param name="outfmt" value="6"/>
-            <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
+            <conditional name="ref_db_source">
+                <param name="db_source" value="history"/>
+                <param name="reference_database" value="db.dmnd"/>
+            </conditional>
+            <conditional name="output">
+                <param name="outfmt" value="6"/>
+                <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
+            </conditional>
             <param name="sensitivity" value="0"/>
-            <param name="gapopen" value="11"/>
-            <param name="gapextend" value="1"/>
             <param name="matrix" value="BLOSUM62"/>
-            <param name="seg" value="yes"/>
-            <param name="hit_filter_select" value="max"/>
-            <param name="max_target_seqs" value="25" />
-            <param name="filter_score_select" value="evalue"/>
-            <param name="evalue" value="0.001" />
+            <param name="comp-based-stat" value="1"/>
+            <param name="masking" value="1"/>
+            <conditional name="hit_filter">
+                <param name="hit_filter_select" value="max"/>
+                <param name="max_target_seqs" value="25" />
+            </conditional>
+            <conditional name="filter_score">
+                <param name="filter_score_select" value="evalue"/>
+                <param name="evalue" value="0.001" />
+            </conditional>
             <param name="id" value="0"/>
             <param name="query_cover" value="0"/>
             <param name="block_size" value="2"/>
             <output name="blast_tabular" file="diamond_results.tabular"/>
         </test>
+        <test>
+            <conditional name="method_cond">
+                <param name="method_select" value="blastp" />
+            </conditional>
+            <param name="query" value="protein.fasta" ftype="fasta"/>
+            <conditional name="ref_db_source">
+                <param name="db_source" value="history"/>
+                <param name="reference_database" value="db-wtax.dmnd"/>
+            </conditional>
+            <conditional name="tax_cond">
+		    <param name="tax_select" value="list"/>
+                    <param name="taxonlist" value="2" />
+            </conditional>
+            <conditional name="output">
+                <param name="outfmt" value="6"/>
+                <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
+            </conditional>
+            <param name="sensitivity" value="0"/>
+            <param name="matrix" value="BLOSUM62"/>
+            <param name="comp-based-stat" value="1"/>
+            <param name="masking" value="1"/>
+            <conditional name="hit_filter">
+                <param name="hit_filter_select" value="max"/>
+                <param name="max_target_seqs" value="25" />
+            </conditional>
+            <conditional name="filter_score">
+                <param name="filter_score_select" value="evalue"/>
+                <param name="evalue" value="0.001" />
+            </conditional>
+            <param name="id" value="0"/>
+            <param name="query_cover" value="0"/>
+            <param name="block_size" value="2"/>
+            <output name="blast_tabular" file="diamond_results.wtax.tabular"/>
+        </test>
+        <test>
+            <conditional name="method_cond">
+                <param name="method_select" value="blastx" />
+                <conditional name="frameshift_cond">
+                    <param name="frameshift_select" value="yes"/>
+                </conditional>
+            </conditional>
+            <param name="query" value="nucleotide.fasta" ftype="fasta"/>
+            <conditional name="ref_db_source">
+                <param name="db_source" value="history"/>
+                <param name="reference_database" value="db.dmnd"/>
+            </conditional>
+            <conditional name="output">
+                <param name="outfmt" value="0"/>
+            </conditional>
+            <param name="sensitivity" value="0"/>
+            <param name="matrix" value="BLOSUM62"/>
+            <param name="comp-based-stat" value="1"/>
+            <param name="masking" value="1"/>
+            <conditional name="hit_filter">
+                <param name="hit_filter_select" value="top"/>
+                <param name="top" value="10" />
+            </conditional>
+            <conditional name="filter_score">
+                <param name="filter_score_select" value="score"/>
+                <param name="evalue" value="1" />
+            </conditional>
+            <param name="id" value="0"/>
+            <param name="query_cover" value="0"/>
+            <param name="block_size" value="2"/>
+            <output name="blast_tabular" file="diamond_results.pairwise"/>
+        </test>
+        <test>
+            <conditional name="method_cond">
+                <param name="method_select" value="blastp" />
+            </conditional>
+            <param name="query" value="protein.fasta" ftype="fasta"/>
+            <conditional name="ref_db_source">
+                <param name="db_source" value="history"/>
+                <param name="reference_database" value="db-wtax.dmnd"/>
+            </conditional>
+            <conditional name="output">
+                <param name="outfmt" value="100"/>
+            </conditional>
+            <output name="daa_output" file="diamond_results.daa" compare="sim_size" delta="10"/>
+        </test>
     </tests>
-
     <help>
 <![CDATA[
--- a/diamond_makedb.xml	Tue Aug 08 16:33:49 2017 -0400
+++ b/diamond_makedb.xml	Thu Sep 27 06:30:30 2018 -0400
@@ -10,16 +10,32 @@

     <command>
     <!-- DB has two files, *.dmnd and *.tx -->
-<![CDATA[
+    <![CDATA[
     diamond makedb
         --threads "\${GALAXY_SLOTS:-12}"
         --in '$infile'
         --db ./database
-]]>
+
+      #if str($tax_cond.tax_select) == 'yes':
+        --taxonmap '$tax_cond.taxonmap'
+        --taxonnodes '$tax_cond.taxonnodes'
+      #end if
+    ]]>
     </command>

     <inputs>
-        <param name="infile" type="data" format="fasta" label="Input reference file in FASTA format" />
+      <param name="infile" type="data" format="fasta" label="Input reference file in FASTA format" />
+      <conditional name="tax_cond">
+        <param name="tax_select" type="select" label="Add taxonomic data?" help="Needs to be supplied in order to provide taxonomy features of the aligner">
+          <option value="yes">Yes</option>
+          <option value="no" selected="true">No</option>
+        </param>
+        <when value="yes">
+          <param argument="--taxonmap" type="data" format="tabular" label="protein accession to taxid mapping file" help="" />
+          <param argument="--taxonnodes" type="data" format="tabular" label="taxonomy nodes.dmp from NCBI" help="" />
+        </when>
+        <when value="no"/>
+      </conditional>
     </inputs>

     <outputs>
@@ -29,7 +45,16 @@
     <tests>
         <test>
             <param name="infile" value="db.fasta" ftype="fasta"/>
-            <output name="outfile" value="db.dmnd"/>
+            <output name="outfile" value="db.dmnd" compare="sim_size" delta="2"/>
+        </test>
+        <test>
+            <param name="infile" value="db.fasta" ftype="fasta"/>
+            <conditional name="tax_cond">
+                <param name="tax_select" value="yes"/>
+                <param name="taxonmap" ftype="tabular" value="prot.accession2taxid" />
+                <param name="taxonnodes" ftype="tabular" value="nodes.dmp" />
+            </conditional>
+            <output name="outfile" value="db-wtax.dmnd" compare="sim_size" delta="2"/>
         </test>
     </tests>

@@ -48,6 +73,9 @@
 .. _DIAMOND: http://ab.inf.uni-tuebingen.de/software/diamond/


+- taxonmap: Path to mapping file that maps NCBI protein accession numbers to taxon ids (gzip compressed). This parameter is optional and needs to be supplied in order to provide taxonomy features. The file can be downloaded from NCBI: ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/prot.accession2taxid.gz
+
+- taxonnodes: Path to the nodes.dmp file from the NCBI taxonomy. This parameter is optional and needs to be supplied in order to provide taxonomy features. The file is contained within this archive downloadable at NCBI: ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip
 ]]>
     </help>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/diamond_view.xml	Thu Sep 27 06:30:30 2018 -0400
@@ -0,0 +1,98 @@
+<tool id="bg_diamond_view" name="Diamond" version="@VERSION@">
+    <description>generate formatted output from DAA files</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command><![CDATA[
+    ## need to link because diamont tries to open dataset_xxx.dat.daa
+    ln -s '$daa' input.daa &&
+    diamond
+        view
+        --daa input.daa
+        @OUTPUT_ARGS@
+        @HITFILTER_ARGS@
+        $forwardonly
+        --compress '0'
+    ]]>
+    </command>
+    <inputs>
+        <param argument="--daa" type="data" format="daa" label="input file in DAA format" />
+        <expand macro="output_type_macro" />
+        <expand macro="hit_filter_macro" />
+        <param argument="--forwardonly" type="boolean" truevalue="--forwardonly" falsevalue="" checked="false" label="only show alignments of forward strand" help=""/>
+    </inputs>
+    <outputs>
+        <expand macro="output_macro" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="daa" ftype="daa" value="diamond_results.daa" />
+            <conditional name="output">
+                <param name="outfmt" value="5"/>
+            </conditional>
+            <conditional name="hit_filter">
+                <param name="hit_filter_select" value="max"/>
+                <param name="max_target_seqs" value="1" />
+            </conditional>
+            <output name="blast_tabular" file="diamond_results.xml"/>
+        </test>
+        <test>
+            <param name="daa" ftype="daa" value="diamond_results.daa" />
+            <conditional name="output">
+                <param name="outfmt" value="6"/>
+                <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
+            </conditional>
+            <output name="blast_tabular" file="diamond_results.tabular"/>
+        </test>
+        <test>
+            <param name="daa" ftype="daa" value="diamond_results.daa" />
+            <conditional name="output">
+                <param name="outfmt" value="101"/>
+            </conditional>
+            <conditional name="hit_filter">
+                <param name="hit_filter_select" value="top"/>
+                <param name="max_target_seqs" value="1" />
+            </conditional>
+            <param name="forwardonly" value="--forwardonly" />
+            <output name="blast_tabular" file="diamond_results.sam"/>
+        </test>
+    </tests>
+
+    <help>
+<![CDATA[
+
+**What it does**
+
+Converts diamond daa files to multiple other formats.
+
+**Input**
+
+Input data is a daa file.
+
+
+**Output**
+
+Alignment results in BLAST format (pairwise/tabular), xml, sam, taxonomic (Note the latter does not work with the current diamond version. )
+
+BLAST tables contain the following columns.
+
+Column 	Description
+1 	    Query Seq-id (ID of your sequence)
+2 	    Subject Seq-id (ID of the database hit)
+3 	    Percentage of identical matches
+4 	    Alignment length
+5 	    Number of mismatches
+6 	    Number of gap openings
+7 	    Start of alignment in query
+8 	    End of alignment in query
+9 	    Start of alignment in subject (database hit)
+10 	    End of alignment in subject (database hit)
+11 	    Expectation value (E-value)
+12 	    Bit score
+]]>
+    </help>
+    <expand macro="citations" />
+</tool>
--- a/macros.xml	Tue Aug 08 16:33:49 2017 -0400
+++ b/macros.xml	Thu Sep 27 06:30:30 2018 -0400
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@VERSION@">0.8.24</token>
+    <token name="@VERSION@">0.9.21</token>

     <xml name="requirements">
         <requirements>
@@ -9,6 +9,12 @@

     <xml name="stdio">
         <stdio>
+            <!-- disabled error detection by exit code to be able to detect oom errors by
+                 regex, can be reenabled when https://github.com/galaxyproject/galaxy/pull/6338 is merged
+            <exit_code range=":-1" level="fatal"  description="Error occurred. Please check Tool Standard Error" />
+            <exit_code range="1:" level="fatal"  description="Error occurred. Please check Tool Standard Error" />-->
+            <regex match="Failed to allocate sufficient memory." source="stderr" level="fatal_oom" />
+            <regex match=".+" source="stderr" level="fatal" description=""/>
         </stdio>
     </xml>

@@ -16,9 +22,135 @@
         <version_command>diamond version</version_command>
     </xml>

+    <xml name="output_type_macro">
+        <conditional name="output">
+            <param argument="--outfmt" type="select" label="Format of output file " help="">
+                <option value="0">BLAST pairwise</option>
+                <option value="5">BLAST XML</option>
+                <option value="6">BLAST tabular</option>
+                <option value="100">DAA</option>
+                <option value="101">SAM</option>
+                <option value="102">Taxonomic classification</option>
+            </param>
+            <when value="0"/>
+	    <when value="5"/>
+            <when value="6">
+                <param name="fields" type="select" label="Tabular fields" help="" multiple="true">
+                    <option value="qseqid" selected="true">Query Seq - id</option>
+                    <option value="sseqid" selected="true">Subject Seq - id</option>
+                    <option value="sallseqid">All subject Seq - id(s)</option>
+                    <option value="qlen">Query sequence length</option>
+                    <option value="slen">Subject sequence length</option>
+                    <option value="pident" selected="true">Percentage of identical matches</option>
+                    <option value="length" selected="true">Alignment length</option>
+                    <option value="nident">Number of identical matches</option>
+                    <option value="mismatch" selected="true">Number of mismatches</option>
+                    <option value="positive">Number of positive - scoring matches</option>
+                    <option value="gapopen" selected="true">Number of gap openings</option>
+                    <option value="gaps">Total number of gaps</option>
+                    <option value="ppos">Percentage of positive - scoring matches</option>
+                    <option value="qstart" selected="true">Start of alignment in query</option>
+                    <option value="qend" selected="true">End of alignment in query</option>
+                    <option value="sstart" selected="true">Start of alignment in subject</option>
+                    <option value="send" selected="true">End of alignment in subject</option>
+                    <option value="qseq">Aligned part of query sequence</option>
+                    <option value="sseq">Aligned part of subject sequence</option>
+                    <option value="evalue" selected="true">Expect value</option>
+                    <option value="bitscore" selected="true">Bit score</option>
+                    <option value="score">Raw score</option>
+                    <option value="qframe">Query frame</option>
+                    <option value="btop">Blast traceback operations(BTOP)</option>
+                    <option value="staxids">unique Subject Taxonomy ID(s), separated by a ';' (in numerical order)</option>
+                    <option value="stitle">Subject Title</option>
+                    <option value="salltitles">All Subject Title(s)</option>
+                    <option value="qcovhsp">Query Coverage Per HSP</option>
+                    <option value="qtitle">Query title</option>
+                </param>
+            </when>
+            <when value="100">
+                <param argument="--salltitles" type="boolean" truevalue="--salltitles" falsevalue="" checked="true" label="Include full subject titles in DAA file?" help=""/>
+                <param argument="--sallseqid" type="boolean" truevalue="--sallseqid" falsevalue="" checked="true" label="Include all subject ids in DAA file?" help=""/>
+            </when>
+	    <when value="101">
+                <param argument="--salltitles" type="boolean" truevalue="--salltitles" falsevalue="" checked="true" label="Include full subject titles in DAA file?" help=""/>
+                <param argument="--sallseqid" type="boolean" truevalue="--sallseqid" falsevalue="" checked="true" label="Include all subject ids in DAA file?" help=""/>
+            </when>
+            <when value="102"/>
+        </conditional>
+    </xml>
+
+    <xml name="hit_filter_macro">
+        <conditional name="hit_filter">
+            <param name="hit_filter_select" type="select" label="Method to restrict the number of hits?">
+                <option value="max">Maximum number of target sequences</option>
+                <option value="top">Percentage of top alignment score</option>
+            </param>
+            <when value="max">
+                <param name="max_target_seqs" argument="--max-target-seqs" type="integer" value="25" label="The maximum number of target sequences per query to report alignments for" help="Setting this to 0 will report all alignments that were found." />
+            </when>
+            <when value="top">
+                <param argument="--top" type="integer" value="0" label="Keep alignments within the given percentage range of the top alignment score for a query" help="For example, setting this to 10 will report all align-
+ments whose score is at most 10% lower than the best alignment score for a query." />
+            </when>
+        </conditional>
+    </xml>
+
     <xml name="citations">
         <citations>
             <citation type="doi">10.1038/nmeth.3176</citation>
         </citations>
     </xml>
+
+
+    <xml name="output_macro">
+        <data format="txt" name="blast_pairw" label="${tool.name} on ${on_string}">
+            <filter>output["outfmt"] == "0"</filter>
+        </data>
+        <data format="xml" name="blast_xml" label="${tool.name} on ${on_string}">
+            <filter>output["outfmt"] == "5"</filter>
+        </data>
+        <data format="tabular" name="blast_tabular" label="${tool.name} on ${on_string}">
+            <filter>output["outfmt"] == "6"</filter>
+        </data>
+        <!-- for daa diamond appends the .daa extension -> hence from_work_dir -->
+	<data format="daa" name="daa_output" label="${tool.name} on ${on_string}" from_work_dir="output.daa">
+            <filter>output["outfmt"] == "100"</filter>
+        </data>
+        <data format="sam" name="sam_output" label="${tool.name} on ${on_string}">
+            <filter>output["outfmt"] == "101"</filter>
+        </data>
+        <data format="tabular" name="tax_output" label="${tool.name} on ${on_string}">
+            <filter>output["outfmt"] == "102"</filter>
+        </data>
+    </xml>
+
+    <token name="@OUTPUT_ARGS@">
+        #if $output.outfmt == "0"
+            --outfmt '0'
+            --out '$blast_pairw'
+        #else if $output.outfmt == "5"
+            --outfmt '5'
+            --out '$blast_xml'
+        #else if $output.outfmt == "6"
+            --outfmt '6' #echo ' '.join(str($output.fields).split(','))
+            --out '$blast_tabular'
+        #else if $output.outfmt == "100"
+            --outfmt '100'
+            --out output.daa
+        #else if $output.outfmt == "101"
+            --outfmt '101'
+            --out '$sam_output'
+        #else if $output.outfmt == "102"
+            --outfmt '102'
+            --out '$tax_output'
+        #end if
+    </token>
+
+    <token name="@HITFILTER_ARGS@">
+        #if str($hit_filter.hit_filter_select) == 'max':
+            --max-target-seqs '$hit_filter.max_target_seqs'
+        #else:
+            --top '$hit_filter.top'
+	#end if
+    </token>
 </macros>
Binary file test-data/db-wtax.dmnd has changed
Binary file test-data/db.dmnd has changed
--- a/test-data/db.fasta	Tue Aug 08 16:33:49 2017 -0400
+++ b/test-data/db.fasta	Thu Sep 27 06:30:30 2018 -0400
@@ -4,3 +4,9 @@
 LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
 GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
 IENY
+>gi|5524212|gb|AAD44167.1| cytochrome c [Elephas minimus minimus]
+LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAGGGGGGGWGQMSFWGATVITNLFSAIPYIGTNLV
+EWIWGGFSVDKAAAAAAAAAAAAAAAAAAAAAAAAATFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
+LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
+GLMPFLHTSKHRSMMLRPLSQALAAAAAAAAAAAAAAAAAAAAAAATIIGQMASILYFSIILAFLPIAGX
+IENY
Binary file test-data/diamond_results.daa has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/diamond_results.pairwise	Thu Sep 27 06:30:30 2018 -0400
@@ -0,0 +1,34 @@
+BLASTP 2.3.0+
+
+
+Query= sequence more text
+
+Length=849
+
+>gi|5524211|gb|AAD44166.1| cytochrome b [Elephas maximus maximus]
+Length=284
+
+ Score = 541.2 bits (1393),  Expect = 2.0e-158
+ Identities = 283/284 (99%), Positives = 283/284 (99%), Gaps = 1/284 (0%)
+ Frame = 1
+
+Query    1  LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFS 180
+            LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFS
+Sbjct    1  LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFS 60
+
+Query  181  AIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFIL-FTMVALAGVHLTFLHETGSNNPLGL 357
+            AIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFIL FTMVALAGVHLTFLHETGSNNPLGL
+Sbjct   61  AIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGL 120
+
+Query  358  TSDSDKIPFHPYYTIKDFLGLXXXXXXXXXXXXXSPDMLGDPDNHMPADPLNTPLHIKPE 537
+            TSDSDKIPFHPYYTIKDFLGLXXXXXXXXXXXXXSPDMLGDPDNHMPADPLNTPLHIKPE
+Sbjct  121  TSDSDKIPFHPYYTIKDFLGLXXXXXXXXXXXXXSPDMLGDPDNHMPADPLNTPLHIKPE 180
+
+Query  538  WYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMD 717
+            WYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMD
+Sbjct  181  WYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMD 240
+
+Query  718  LLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGXIENY 849
+            LLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGXIENY
+Sbjct  241  LLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGXIENY 284
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/diamond_results.sam	Thu Sep 27 06:30:30 2018 -0400
@@ -0,0 +1,6 @@
+@HD	VN:1.5	SO:query
+@PG	PN:DIAMOND
+@mm	BlastP
+@CO	BlastP-like alignments
+@CO	Reporting AS: bitScore, ZR: rawScore, ZE: expected, ZI: percent identity, ZL: reference length, ZF: frame, ZS: query start DNA coordinate
+sequence	0	gi|5524211|gb|AAD44166.1|	1	255	94M1D189M	*	0	0	LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFILFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLGLXXXXXXXXXXXXXSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGXIENY	*	AS:i:541	NM:i:1	ZL:i:284	ZR:i:1393	ZE:f:2.0e-158	ZI:i:99	ZF:i:1	ZS:i:1	MD:Z:94^P189
--- a/test-data/diamond_results.tabular	Tue Aug 08 16:33:49 2017 -0400
+++ b/test-data/diamond_results.tabular	Thu Sep 27 06:30:30 2018 -0400
@@ -1,1 +1,2 @@
-sequence	gi|5524211|gb|AAD44166.1|	90.5	284	26	1	1	283	1	284	1.0e-152	521.2
+sequence	gi|5524211|gb|AAD44166.1|	99.6	284	0	1	1	283	1	284	2.0e-158	541.2
+sequence	gi|5524212|gb|AAD44167.1|	74.6	284	71	1	1	283	1	284	1.3e-106	369.0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/diamond_results.wtax.tabular	Thu Sep 27 06:30:30 2018 -0400
@@ -0,0 +1,1 @@
+sequence	gi|5524211|gb|AAD44166.1|	99.6	284	0	1	1	283	1	284	2.0e-158	541.2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/diamond_results.xml	Thu Sep 27 06:30:30 2018 -0400
@@ -0,0 +1,69 @@
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
+<BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>diamond 0.9.21</BlastOutput_version>
+  <BlastOutput_reference>Benjamin Buchfink, Xie Chao, and Daniel Huson (2015), &quot;Fast and sensitive protein alignment using DIAMOND&quot;, Nature Methods 12:59-60.</BlastOutput_reference>
+  <BlastOutput_db>.dmnd</BlastOutput_db>
+  <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
+  <BlastOutput_query-def>sequence</BlastOutput_query-def>
+  <BlastOutput_query-len>283</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>blosum62</Parameters_matrix>
+      <Parameters_expect>0.001</Parameters_expect>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>F</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+<BlastOutput_iterations>
+<Iteration>
+  <Iteration_iter-num>1</Iteration_iter-num>
+  <Iteration_query-ID>Query_1</Iteration_query-ID>
+  <Iteration_query-def>sequence</Iteration_query-def>
+  <Iteration_query-len>283</Iteration_query-len>
+<Iteration_hits>
+<Hit>
+  <Hit_num>1</Hit_num>
+  <Hit_id>gi|5524211|gb|AAD44166.1|</Hit_id>
+  <Hit_def></Hit_def>
+  <Hit_accession>AAD44166.1</Hit_accession>
+  <Hit_len>284</Hit_len>
+  <Hit_hsps>
+    <Hsp>
+      <Hsp_num>1</Hsp_num>
+      <Hsp_bit-score>541.2</Hsp_bit-score>
+      <Hsp_score>1393</Hsp_score>
+      <Hsp_evalue>2.0e-158</Hsp_evalue>
+      <Hsp_query-from>1</Hsp_query-from>
+      <Hsp_query-to>284</Hsp_query-to>
+      <Hsp_hit-from>1</Hsp_hit-from>
+      <Hsp_hit-to>284</Hsp_hit-to>
+      <Hsp_query-frame>0</Hsp_query-frame>
+      <Hsp_hit-frame>0</Hsp_hit-frame>
+      <Hsp_identity>283</Hsp_identity>
+      <Hsp_positive>283</Hsp_positive>
+      <Hsp_gaps>1</Hsp_gaps>
+      <Hsp_align-len>284</Hsp_align-len>
+         <Hsp_qseq>LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFIL-FTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLGLXXXXXXXXXXXXXSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGXIENY</Hsp_qseq>
+         <Hsp_hseq>LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLGLXXXXXXXXXXXXXSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGXIENY</Hsp_hseq>
+      <Hsp_midline>LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFIL FTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLGLXXXXXXXXXXXXXSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGXIENY</Hsp_midline>
+    </Hsp>
+  </Hit_hsps>
+</Hit>
+</Iteration_hits>
+  <Iteration_stat>
+    <Statistics>
+      <Statistics_db-num>2</Statistics_db-num>
+      <Statistics_db-len>568</Statistics_db-len>
+      <Statistics_hsp-len>0</Statistics_hsp-len>
+      <Statistics_eff-space>0</Statistics_eff-space>
+      <Statistics_kappa>0.041000</Statistics_kappa>
+      <Statistics_lambda>0.267000</Statistics_lambda>
+      <Statistics_entropy>0</Statistics_entropy>
+    </Statistics>
+  </Iteration_stat>
+</Iteration>
+</BlastOutput_iterations>
+</BlastOutput>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/nodes.dmp	Thu Sep 27 06:30:30 2018 -0400
@@ -0,0 +1,3 @@
+1	|	1	|	no rank	|		|	8	|	0	|	1	|	0	|	0	|	0	|	0	|	0	|		|
+2	|	1	|	species	|	AC	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+3	|	1	|	species	|	AC	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/nucleotide.fasta	Thu Sep 27 06:30:30 2018 -0400
@@ -0,0 +1,17 @@
+>sequence more text
+CTGTGCCTGTACACCCACATCGGCAGAAACATCTACTACGGCAGCTACCTGTACAGCGAG
+ACCTGGAACACCGGCATCATGCTGCTGCTGATCACCATGGCCACCGCCTTCATGGGCTAC
+GTGCTGCCCTGGGGCCAGATGAGCTTCTGGGGCGCCACCGTGATCACCAACCTGTTCAGC
+GCCATCCCCTACATCGGCACCAACCTGGTGGAGTGGATCTGGGGCGGCTTCAGCGTGGAC
+AAGGCCACCCTGAACAGATTCTTCGCCTTCCACTTCATCCTGTTCACCATGGTGGCCCTG
+GCCGGCGTGCACCTGACCTTCCTGCACGAGACCGGCAGCAACAACCCCCTGGGCCTGACC
+AGCGACAGCGACAAGATCCCCTTCCACCCCTACTACACCATCAAGGACTTCCTGGGCCTG
+CTGATCCTGATCCTGCTGCTGCTGCTGCTGGCCCTGCTGAGCCCCGACATGCTGGGCGAC
+CCCGACAACCACATGCCCGCCGACCCCCTGAACACCCCCCTGCACATCAAGCCCGAGTGG
+TACTTCCTGTTCGCCTACGCCATCCTGAGAAGCGTGCCCAACAAGCTGGGCGGCGTGCTG
+GCCCTGTTCCTGAGCATCGTGATCCTGGGCCTGATGCCCTTCCTGCACACCAGCAAGCAC
+AGAAGCATGATGCTGAGACCCCTGAGCCAGGCCCTGTTCTGGACCCTGACCATGGACCTG
+CTGACCCTGACCTGGATCGGCAGCCAGCCCGTGGAGTACCCCTACACCATCATCGGCCAG
+ATGGCCAGCATCCTGTACTTCAGCATCATCCTGGCCTTCCTGCCCATCGCCGGCNNNATC
+GAGAACTAC
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/prot.accession2taxid	Thu Sep 27 06:30:30 2018 -0400
@@ -0,0 +1,4 @@
+accession	accession.version	taxid	gi
+AAD44166	AAD44166.1	2	5524211
+AAD44167	AAD44167.1	3	5524212
+
--- a/test-data/protein.fasta	Tue Aug 08 16:33:49 2017 -0400
+++ b/test-data/protein.fasta	Thu Sep 27 06:30:30 2018 -0400
@@ -1,4 +1,4 @@
->sequence
+>sequence more text
 LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
 EWIWGGFSVDKATLNRFFAFHFILFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
 LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL