Mercurial > repos > iuc > fastani
diff fastani.xml @ 0:2c7ac12ad75a draft
"planemo upload commit 01a4c6cb401a89acddd991825fcaa304052d275f"
author | iuc |
---|---|
date | Tue, 18 Feb 2020 15:58:57 -0500 |
parents | |
children | 3c6e0e8df873 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastani.xml Tue Feb 18 15:58:57 2020 -0500 @@ -0,0 +1,106 @@ +<tool id="fastani" name="FastANI" version="@VERSION@"> + <description> fast alignment-free computation of whole-genome Average Nucleotide Identity</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <version_command>fastANI --version</version_command> + <command detect_errors="exit_code"> + <![CDATA[ + #import re + + ### SET UP INPUTS ### + #for $input_q in $query: + #set $file_name = re.sub('[^\w_]', '_', $input_q.element_identifier) + ln -fs '$input_q' '${file_name}_query' && + echo '${file_name}_query' >> query.lst && + #end for + + #for $input_r in $reference: + #set $file_name = re.sub('[^\w_]', '_', $input_r.element_identifier) + ln -fs '$input_r' '${file_name}_ref' && + echo '${file_name}_ref' >> ref.lst && + #end for + + + ### COMMAND ### + + fastANI --ql query.lst --rl ref.lst -o output -t "\${GALAXY_SLOTS:-1}" + + ]]></command> + <inputs> + <param name="query" type="data" format="fasta" multiple="true" label="Query Sequence(s)" + help="Specify any number of query sequences in fasta format as input." /> + <param name="reference" type="data" format="fasta" multiple="true" label="Reference Sequence(s)" + help="Specify any number of reference sequences in fasta format as input." /> + </inputs> + <outputs> + <data name="output" format="tabular" from_work_dir="output" label="${tool.name} on ${on_string} Output"/> + </outputs> + <tests> + <test> + <param name="query" value="E.coli_1.fasta" /> + <param name="reference" value="S.flexneri_1.fasta" /> + <output name="output" file="single.out" compare="sim_size" /> + </test> + <test> + <param name="query" value="E.coli_1.fasta,E.coli_2.fasta" /> + <param name="reference" value="S.flexneri_1.fasta,S.flexneri_2.fasta" /> + <output name="output" file="multi.out" compare="sim_size" /> + </test> + </tests> + <help><![CDATA[ + +FastANI +======= + +FastANI is developed for fast alignment-free computation of whole-genome +Average Nucleotide Identity (ANI). ANI is defined as mean nucleotide +identity of orthologous gene pairs shared between two microbial genomes. +FastANI supports pairwise comparison of both complete and draft genome +assemblies. Its underlying procedure follows a similar workflow as described +by `Goris et al. 2007 <https://doi.org/10.1099/ijs.0.64483-0>`_. However, it avoids expensive sequence alignments and +uses `Mashmap <https://github.com/marbl/MashMap>`_ as its MinHash based sequence mapping engine to compute the +orthologous mappings and alignment identity estimates. Based on our +experiments with complete and draft genomes, its accuracy is on par with +`BLAST-based ANI solver <http://enve-omics.ce.gatech.edu/ani/>`_ and it achieves two to three orders of magnitude +speedup. Therefore, it is useful for pairwise ANI computation of large +number of genome pairs. More details about its speed, accuracy and potential +applications are described here: `"High Throughput ANI Analysis of 90K +Prokaryotic Genomes Reveals Clear Species Boundaries" <https://doi-org.uml.idm.oclc.org/10.1038/s41467-018-07641-9>`_. + +Please visit the authors at: https://github.com/ParBLiSS/FastANI + + +Inputs +------ + +**Query Sequence(s):** +Input one or more query genomes in fasta format + +**Reference Sequence(s):** +Input one or more reference genomes to be compared to the query genomes + + +Output +------ + +Tabular table output with columns: Query Genome, Reference Genome, ANI Value, Count of Bidirectional Fragment Mappings, and Total Query Fragments. + +Output table looks as such: + ++------------+------------+-----------+--------+--------+ +| Genome A | Genome C | 97.5883 | 1405 | 1594 | ++------------+------------+-----------+--------+--------+ +| Genome A | Genome D | 95.6663 | 1405 | 1594 | ++------------+------------+-----------+--------+--------+ +| Genome B | Genome C | 92.4281 | 1409 | 1553 | ++------------+------------+-----------+--------+--------+ +| Genome B | Genome D | 99.9242 | 1396 | 1553 | ++------------+------------+-----------+--------+--------+ + +| +| Thanks to Thanh LĂȘ for building the initial Galaxy wrapper. + ]]></help> + <expand macro="citations" /> +</tool>