Mercurial > repos > iuc > fastani

diff fastani.xml @ 0:2c7ac12ad75a draft
"planemo upload commit 01a4c6cb401a89acddd991825fcaa304052d275f"
author: iuc
date: Tue, 18 Feb 2020 15:58:57 -0500
children: 3c6e0e8df873
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastani.xml	Tue Feb 18 15:58:57 2020 -0500
@@ -0,0 +1,106 @@
+<tool id="fastani" name="FastANI" version="@VERSION@">
+    <description> fast alignment-free computation of whole-genome Average Nucleotide Identity</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <version_command>fastANI --version</version_command>
+    <command detect_errors="exit_code">
+    <![CDATA[
+    #import re
+
+    ### SET UP INPUTS ###
+    #for $input_q in $query:
+        #set $file_name = re.sub('[^\w_]', '_', $input_q.element_identifier)
+        ln -fs '$input_q' '${file_name}_query' &&
+        echo '${file_name}_query' >> query.lst &&
+    #end for
+
+    #for $input_r in $reference:
+        #set $file_name = re.sub('[^\w_]', '_', $input_r.element_identifier)
+        ln -fs '$input_r' '${file_name}_ref' &&
+        echo '${file_name}_ref' >> ref.lst &&
+    #end for
+    
+
+    ### COMMAND ###
+
+    fastANI --ql query.lst --rl ref.lst -o output -t "\${GALAXY_SLOTS:-1}"
+
+    ]]></command>
+    <inputs>
+        <param name="query" type="data" format="fasta" multiple="true" label="Query Sequence(s)"
+        help="Specify any number of query sequences in fasta format as input." />
+        <param name="reference" type="data" format="fasta" multiple="true" label="Reference Sequence(s)"
+        help="Specify any number of reference sequences in fasta format as input." />
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular" from_work_dir="output" label="${tool.name} on ${on_string} Output"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="query" value="E.coli_1.fasta" />
+            <param name="reference" value="S.flexneri_1.fasta" />
+            <output name="output" file="single.out" compare="sim_size" />
+        </test>
+        <test>
+            <param name="query" value="E.coli_1.fasta,E.coli_2.fasta" />
+            <param name="reference" value="S.flexneri_1.fasta,S.flexneri_2.fasta" />
+            <output name="output" file="multi.out" compare="sim_size" />
+        </test>
+    </tests>
+    <help><![CDATA[
+
+FastANI
+=======
+
+FastANI is developed for fast alignment-free computation of whole-genome
+Average Nucleotide Identity (ANI). ANI is defined as mean nucleotide
+identity of orthologous gene pairs shared between two microbial genomes.
+FastANI supports pairwise comparison of both complete and draft genome
+assemblies. Its underlying procedure follows a similar workflow as described
+by `Goris et al. 2007 <https://doi.org/10.1099/ijs.0.64483-0>`_. However, it avoids expensive sequence alignments and
+uses `Mashmap <https://github.com/marbl/MashMap>`_ as its MinHash based sequence mapping engine to compute the
+orthologous mappings and alignment identity estimates. Based on our
+experiments with complete and draft genomes, its accuracy is on par with
+`BLAST-based ANI solver <http://enve-omics.ce.gatech.edu/ani/>`_ and it achieves two to three orders of magnitude
+speedup. Therefore, it is useful for pairwise ANI computation of large
+number of genome pairs. More details about its speed, accuracy and potential
+applications are described here: `"High Throughput ANI Analysis of 90K
+Prokaryotic Genomes Reveals Clear Species Boundaries" <https://doi-org.uml.idm.oclc.org/10.1038/s41467-018-07641-9>`_.
+
+Please visit the authors at: https://github.com/ParBLiSS/FastANI
+
+
+Inputs
+------
+
+**Query Sequence(s):**
+Input one or more query genomes in fasta format
+
+**Reference Sequence(s):**
+Input one or more reference genomes to be compared to the query genomes
+
+
+Output
+------
+
+Tabular table output with columns: Query Genome, Reference Genome, ANI Value, Count of Bidirectional Fragment Mappings, and Total Query Fragments.
+
+Output table looks as such:
+
++------------+------------+-----------+--------+--------+
+|  Genome A  |  Genome C  |  97.5883  |  1405  |  1594  |
++------------+------------+-----------+--------+--------+
+|  Genome A  |  Genome D  |  95.6663  |  1405  |  1594  |
++------------+------------+-----------+--------+--------+
+|  Genome B  |  Genome C  |  92.4281  |  1409  |  1553  |
++------------+------------+-----------+--------+--------+
+|  Genome B  |  Genome D  |  99.9242  |  1396  |  1553  |
++------------+------------+-----------+--------+--------+
+
+|
+| Thanks to Thanh Lê for building the initial Galaxy wrapper.
+    ]]></help>
+    <expand macro="citations" />
+</tool>
author	iuc
date	Tue, 18 Feb 2020 15:58:57 -0500
parents
children	3c6e0e8df873