comparison fastani.xml @ 0:2c7ac12ad75a draft

"planemo upload commit 01a4c6cb401a89acddd991825fcaa304052d275f"
author iuc
date Tue, 18 Feb 2020 15:58:57 -0500
parents
children 3c6e0e8df873
comparison
equal deleted inserted replaced
-1:000000000000 0:2c7ac12ad75a
1 <tool id="fastani" name="FastANI" version="@VERSION@">
2 <description> fast alignment-free computation of whole-genome Average Nucleotide Identity</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <version_command>fastANI --version</version_command>
8 <command detect_errors="exit_code">
9 <![CDATA[
10 #import re
11
12 ### SET UP INPUTS ###
13 #for $input_q in $query:
14 #set $file_name = re.sub('[^\w_]', '_', $input_q.element_identifier)
15 ln -fs '$input_q' '${file_name}_query' &&
16 echo '${file_name}_query' >> query.lst &&
17 #end for
18
19 #for $input_r in $reference:
20 #set $file_name = re.sub('[^\w_]', '_', $input_r.element_identifier)
21 ln -fs '$input_r' '${file_name}_ref' &&
22 echo '${file_name}_ref' >> ref.lst &&
23 #end for
24
25
26 ### COMMAND ###
27
28 fastANI --ql query.lst --rl ref.lst -o output -t "\${GALAXY_SLOTS:-1}"
29
30 ]]></command>
31 <inputs>
32 <param name="query" type="data" format="fasta" multiple="true" label="Query Sequence(s)"
33 help="Specify any number of query sequences in fasta format as input." />
34 <param name="reference" type="data" format="fasta" multiple="true" label="Reference Sequence(s)"
35 help="Specify any number of reference sequences in fasta format as input." />
36 </inputs>
37 <outputs>
38 <data name="output" format="tabular" from_work_dir="output" label="${tool.name} on ${on_string} Output"/>
39 </outputs>
40 <tests>
41 <test>
42 <param name="query" value="E.coli_1.fasta" />
43 <param name="reference" value="S.flexneri_1.fasta" />
44 <output name="output" file="single.out" compare="sim_size" />
45 </test>
46 <test>
47 <param name="query" value="E.coli_1.fasta,E.coli_2.fasta" />
48 <param name="reference" value="S.flexneri_1.fasta,S.flexneri_2.fasta" />
49 <output name="output" file="multi.out" compare="sim_size" />
50 </test>
51 </tests>
52 <help><![CDATA[
53
54 FastANI
55 =======
56
57 FastANI is developed for fast alignment-free computation of whole-genome
58 Average Nucleotide Identity (ANI). ANI is defined as mean nucleotide
59 identity of orthologous gene pairs shared between two microbial genomes.
60 FastANI supports pairwise comparison of both complete and draft genome
61 assemblies. Its underlying procedure follows a similar workflow as described
62 by `Goris et al. 2007 <https://doi.org/10.1099/ijs.0.64483-0>`_. However, it avoids expensive sequence alignments and
63 uses `Mashmap <https://github.com/marbl/MashMap>`_ as its MinHash based sequence mapping engine to compute the
64 orthologous mappings and alignment identity estimates. Based on our
65 experiments with complete and draft genomes, its accuracy is on par with
66 `BLAST-based ANI solver <http://enve-omics.ce.gatech.edu/ani/>`_ and it achieves two to three orders of magnitude
67 speedup. Therefore, it is useful for pairwise ANI computation of large
68 number of genome pairs. More details about its speed, accuracy and potential
69 applications are described here: `"High Throughput ANI Analysis of 90K
70 Prokaryotic Genomes Reveals Clear Species Boundaries" <https://doi-org.uml.idm.oclc.org/10.1038/s41467-018-07641-9>`_.
71
72 Please visit the authors at: https://github.com/ParBLiSS/FastANI
73
74
75 Inputs
76 ------
77
78 **Query Sequence(s):**
79 Input one or more query genomes in fasta format
80
81 **Reference Sequence(s):**
82 Input one or more reference genomes to be compared to the query genomes
83
84
85 Output
86 ------
87
88 Tabular table output with columns: Query Genome, Reference Genome, ANI Value, Count of Bidirectional Fragment Mappings, and Total Query Fragments.
89
90 Output table looks as such:
91
92 +------------+------------+-----------+--------+--------+
93 | Genome A | Genome C | 97.5883 | 1405 | 1594 |
94 +------------+------------+-----------+--------+--------+
95 | Genome A | Genome D | 95.6663 | 1405 | 1594 |
96 +------------+------------+-----------+--------+--------+
97 | Genome B | Genome C | 92.4281 | 1409 | 1553 |
98 +------------+------------+-----------+--------+--------+
99 | Genome B | Genome D | 99.9242 | 1396 | 1553 |
100 +------------+------------+-----------+--------+--------+
101
102 |
103 | Thanks to Thanh LĂȘ for building the initial Galaxy wrapper.
104 ]]></help>
105 <expand macro="citations" />
106 </tool>