0
|
1 <?xml version="1.0"?>
|
|
2 <tool id="edu.tamu.cpt.gbk.gbk_compare" name="Compare Genbanks" version="20.1.0">
|
|
3 <description>, compare the identity of two genbank files</description>
|
|
4 <macros>
|
|
5 <import>macros.xml</import>
|
|
6 <import>cpt-macros.xml</import>
|
|
7 </macros>
|
|
8 <requirements>
|
|
9 <requirement type="package" version="3.7">python</requirement>
|
|
10 <requirement type="package" version="1.74">biopython</requirement>
|
|
11 </requirements>
|
|
12 <command detect_errors="aggressive"><![CDATA[
|
|
13 python $__tool_directory__/gbk_compare.py
|
|
14 $annotation_1
|
|
15 $annotation_2
|
|
16 --match_identity_threshold $match_identity_threshold
|
|
17 --allowed_skipped_genes $allowed_skipped_genes
|
|
18 $addNotes
|
|
19 -sumOut $sumOut
|
|
20 > $repOut]]></command>
|
|
21 <inputs>
|
|
22 <param label="First annotated genome (Genbank)" name="annotation_1" type="data" format="genbank"/>
|
|
23 <param label="Second annotated genome (Genbank)" name="annotation_2" type="data" format="genbank"/>
|
|
24 <param label="Two genes must have at least this nucleotide identity to be considerd the same (0.00 to 1.00)" name="match_identity_threshold" type="float" value="0.70" min="0.00" max="1.00"/>
|
|
25 <param label="This many missing genes are allowed when aligning the annotations (see help below)" name="allowed_skipped_genes" type="integer" value="10"/>
|
|
26 <param label="Include 'note' field in output" name="addNotes" type="boolean" truevalue="--addNotes" falsevalue=""/>
|
|
27 </inputs>
|
|
28 <outputs>
|
|
29 <data format="tsv" name="sumOut" label="Report Summary"/>
|
|
30 <data format="tsv" name="repOut" label="Comparison Report of Two Genomes"/>
|
|
31 </outputs>
|
|
32 <tests>
|
|
33 <param name="annotation_1" value="related.gbk" />
|
|
34 <param name="annotation_2" value="related.1-2000.gbk" />
|
|
35 <output name="sumOut" value="gbkCompare_summaryOut.tsv" />
|
|
36 <output name="repOut" value="gbkCompare_reportOut.tsv" />
|
|
37 </tests>
|
|
38 <help><![CDATA[
|
|
39 **What it does**
|
|
40
|
|
41 This tool quantifies the changes in annotations for a genome, such as when it has been reassembled and/or reannotated.
|
|
42 The tool does a global alignment (using pairwise2 from biopython) of the annotated genes in both files. This means that
|
|
43 the input genomes must be roughly aligned at the gene level. If there are structural rearrangements, the tool will not work.
|
|
44 The original script was written by Ryan Wick from Monash University and was adapted to work on Galaxy by the CPT team.
|
|
45
|
|
46 "Allowed Skipped Genes" is the number of genes that are allowed to be present in one record but not the other. Presence is
|
|
47 determined by whether a pair of genes are above the provided identity threshold. The tool will compare the CDSs in order,
|
|
48 ie the 1st CDS from Genbank 1 and the 1st CDS from the 2nd Genbank, the 2nd CDS and 2nd CDS, and so on. When a pair fails
|
|
49 to meet the identity threshhold, one of the CDSs is skipped and the comparisons will resume but offset, ie if 3rd and 3rd
|
|
50 CDS failed, then the 3rd CDS from Genbank 1 and the 4th from Genbank 2 will be checked next, and if they pass then the
|
|
51 march down the list will resume as 4th and 5th compared to each other, then 5th and 6th, and so on. If the number of these
|
|
52 offsets exceeds the allowed skip number, then it is assumed the genbanks are too different for a meaningful for comparison,
|
|
53 and the job will fail.
|
|
54
|
|
55 **Input**
|
|
56
|
|
57 Files *MUST* be in gebank format.
|
|
58 The tool only looks at CDS features in the genome.
|
|
59
|
|
60 **Output**
|
|
61
|
|
62 * A tab delimited file of the results
|
|
63 * Summary of the results.
|
|
64 ]]></help>
|
|
65 <citations>
|
|
66 <citation type="bibtex">
|
|
67 @unpublished{galaxyTools,
|
|
68 author = {R. Wick},
|
|
69 title = {CPT Galaxy Tools},
|
|
70 year = {2020},
|
|
71 note = {https://github.com/rrwick/Compare-annotations}
|
|
72 }
|
|
73 </citation>
|
|
74 <citation type="bibtex">
|
|
75 @unpublished{galaxyTools,
|
|
76 author = {C. Ross},
|
|
77 title = {CPT Galaxy Tools},
|
|
78 year = {2020-},
|
|
79 note = {https://github.com/tamu-cpt/galaxy-tools/}
|
|
80 }
|
|
81 </citation>
|
|
82 <citation type="bibtex">
|
|
83 @unpublished{galaxyTools,
|
|
84 author = {A. Criscione},
|
|
85 title = {CPT Galaxy Tools},
|
|
86 year = {2019-2021},
|
|
87 note = {https://github.com/tamu-cpt/galaxy-tools/}
|
|
88 }
|
|
89 </citation>
|
|
90 </citations>
|
|
91 </tool>
|