view fastani.xml @ 0:2c7ac12ad75a draft

"planemo upload commit 01a4c6cb401a89acddd991825fcaa304052d275f"
author iuc
date Tue, 18 Feb 2020 15:58:57 -0500
parents
children 3c6e0e8df873
line wrap: on
line source

<tool id="fastani" name="FastANI" version="@VERSION@">
    <description> fast alignment-free computation of whole-genome Average Nucleotide Identity</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <version_command>fastANI --version</version_command>
    <command detect_errors="exit_code">
    <![CDATA[
    #import re

    ### SET UP INPUTS ###
    #for $input_q in $query:
        #set $file_name = re.sub('[^\w_]', '_', $input_q.element_identifier)
        ln -fs '$input_q' '${file_name}_query' &&
        echo '${file_name}_query' >> query.lst &&
    #end for

    #for $input_r in $reference:
        #set $file_name = re.sub('[^\w_]', '_', $input_r.element_identifier)
        ln -fs '$input_r' '${file_name}_ref' &&
        echo '${file_name}_ref' >> ref.lst &&
    #end for
    

    ### COMMAND ###

    fastANI --ql query.lst --rl ref.lst -o output -t "\${GALAXY_SLOTS:-1}"

    ]]></command>
    <inputs>
        <param name="query" type="data" format="fasta" multiple="true" label="Query Sequence(s)"
        help="Specify any number of query sequences in fasta format as input." />
        <param name="reference" type="data" format="fasta" multiple="true" label="Reference Sequence(s)"
        help="Specify any number of reference sequences in fasta format as input." />
    </inputs>
    <outputs>
        <data name="output" format="tabular" from_work_dir="output" label="${tool.name} on ${on_string} Output"/>
    </outputs>
    <tests>
        <test>
            <param name="query" value="E.coli_1.fasta" />
            <param name="reference" value="S.flexneri_1.fasta" />
            <output name="output" file="single.out" compare="sim_size" />
        </test>
        <test>
            <param name="query" value="E.coli_1.fasta,E.coli_2.fasta" />
            <param name="reference" value="S.flexneri_1.fasta,S.flexneri_2.fasta" />
            <output name="output" file="multi.out" compare="sim_size" />
        </test>
    </tests>
    <help><![CDATA[

FastANI
=======

FastANI is developed for fast alignment-free computation of whole-genome
Average Nucleotide Identity (ANI). ANI is defined as mean nucleotide
identity of orthologous gene pairs shared between two microbial genomes.
FastANI supports pairwise comparison of both complete and draft genome
assemblies. Its underlying procedure follows a similar workflow as described
by `Goris et al. 2007 <https://doi.org/10.1099/ijs.0.64483-0>`_. However, it avoids expensive sequence alignments and
uses `Mashmap <https://github.com/marbl/MashMap>`_ as its MinHash based sequence mapping engine to compute the
orthologous mappings and alignment identity estimates. Based on our
experiments with complete and draft genomes, its accuracy is on par with
`BLAST-based ANI solver <http://enve-omics.ce.gatech.edu/ani/>`_ and it achieves two to three orders of magnitude
speedup. Therefore, it is useful for pairwise ANI computation of large
number of genome pairs. More details about its speed, accuracy and potential
applications are described here: `"High Throughput ANI Analysis of 90K
Prokaryotic Genomes Reveals Clear Species Boundaries" <https://doi-org.uml.idm.oclc.org/10.1038/s41467-018-07641-9>`_.

Please visit the authors at: https://github.com/ParBLiSS/FastANI


Inputs
------

**Query Sequence(s):**
Input one or more query genomes in fasta format

**Reference Sequence(s):**
Input one or more reference genomes to be compared to the query genomes


Output
------

Tabular table output with columns: Query Genome, Reference Genome, ANI Value, Count of Bidirectional Fragment Mappings, and Total Query Fragments.

Output table looks as such:

+------------+------------+-----------+--------+--------+
|  Genome A  |  Genome C  |  97.5883  |  1405  |  1594  |
+------------+------------+-----------+--------+--------+
|  Genome A  |  Genome D  |  95.6663  |  1405  |  1594  |
+------------+------------+-----------+--------+--------+
|  Genome B  |  Genome C  |  92.4281  |  1409  |  1553  |
+------------+------------+-----------+--------+--------+
|  Genome B  |  Genome D  |  99.9242  |  1396  |  1553  |
+------------+------------+-----------+--------+--------+

|
| Thanks to Thanh LĂȘ for building the initial Galaxy wrapper.
    ]]></help>
    <expand macro="citations" />
</tool>