Mercurial > repos > cpt > cpt_gbk_compare
diff gbk_compare.xml @ 1:1909729a1fd3 draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:42:47 +0000 |
parents | |
children | b0ab633db780 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gbk_compare.xml Mon Jun 05 02:42:47 2023 +0000 @@ -0,0 +1,90 @@ +<tool id="edu.tamu.cpt.gbk.gbk_compare" name="Compare Genbanks" version="20.1.0"> + <description>, compare the identity of two genbank files</description> + <macros> + <import>macros.xml</import> + <import>cpt-macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="3.7">python</requirement> + <requirement type="package" version="1.74">biopython</requirement> + </requirements> + <command detect_errors="aggressive"><![CDATA[ +python '$__tool_directory__/gbk_compare.py' +'$annotation_1' +'$annotation_2' +--match_identity_threshold '$match_identity_threshold' +--allowed_skipped_genes '$allowed_skipped_genes' +'$addNotes' +-sumOut '$sumOut' +> '$repOut']]></command> + <inputs> + <param label="First annotated genome (Genbank)" name="annotation_1" type="data" format="genbank"/> + <param label="Second annotated genome (Genbank)" name="annotation_2" type="data" format="genbank"/> + <param label="Two genes must have at least this nucleotide identity to be considerd the same (0.00 to 1.00)" name="match_identity_threshold" type="float" value="0.70" min="0.00" max="1.00"/> + <param label="This many missing genes are allowed when aligning the annotations (see help below)" name="allowed_skipped_genes" type="integer" value="10"/> + <param label="Include 'note' field in output" name="addNotes" type="boolean" truevalue="--addNotes" falsevalue=""/> + </inputs> + <outputs> + <data format="tsv" name="sumOut" label="Report Summary"/> + <data format="tsv" name="repOut" label="Comparison Report of Two Genomes"/> + </outputs> + <tests> + <param name="annotation_1" value="related.gbk"/> + <param name="annotation_2" value="related.1-2000.gbk"/> + <output name="sumOut" value="gbkCompare_summaryOut.tsv"/> + <output name="repOut" value="gbkCompare_reportOut.tsv"/> + </tests> + <help><![CDATA[ +**What it does** + +This tool quantifies the changes in annotations for a genome, such as when it has been reassembled and/or reannotated. +The tool does a global alignment (using pairwise2 from biopython) of the annotated genes in both files. This means that +the input genomes must be roughly aligned at the gene level. If there are structural rearrangements, the tool will not work. +The original script was written by Ryan Wick from Monash University and was adapted to work on Galaxy by the CPT team. + +"Allowed Skipped Genes" is the number of genes that are allowed to be present in one record but not the other. Presence is +determined by whether a pair of genes are above the provided identity threshold. The tool will compare the CDSs in order, +ie the 1st CDS from Genbank 1 and the 1st CDS from the 2nd Genbank, the 2nd CDS and 2nd CDS, and so on. When a pair fails +to meet the identity threshhold, one of the CDSs is skipped and the comparisons will resume but offset, ie if 3rd and 3rd +CDS failed, then the 3rd CDS from Genbank 1 and the 4th from Genbank 2 will be checked next, and if they pass then the +march down the list will resume as 4th and 5th compared to each other, then 5th and 6th, and so on. If the number of these +offsets exceeds the allowed skip number, then it is assumed the genbanks are too different for a meaningful for comparison, +and the job will fail. + +**Input** + +Files *MUST* be in gebank format. +The tool only looks at CDS features in the genome. + +**Output** + +* A tab delimited file of the results +* Summary of the results. +]]></help> + <citations> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {R. Wick}, + title = {CPT Galaxy Tools}, + year = {2020}, + note = {https://github.com/rrwick/Compare-annotations} + } + </citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Ross}, + title = {CPT Galaxy Tools}, + year = {2020-}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + </citations> +</tool>