Mercurial > repos > cpt > cpt_gbk_compare
comparison gbk_compare.xml @ 1:1909729a1fd3 draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:42:47 +0000 |
parents | |
children | b0ab633db780 |
comparison
equal
deleted
inserted
replaced
0:cae700761678 | 1:1909729a1fd3 |
---|---|
1 <tool id="edu.tamu.cpt.gbk.gbk_compare" name="Compare Genbanks" version="20.1.0"> | |
2 <description>, compare the identity of two genbank files</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 <import>cpt-macros.xml</import> | |
6 </macros> | |
7 <requirements> | |
8 <requirement type="package" version="3.7">python</requirement> | |
9 <requirement type="package" version="1.74">biopython</requirement> | |
10 </requirements> | |
11 <command detect_errors="aggressive"><![CDATA[ | |
12 python '$__tool_directory__/gbk_compare.py' | |
13 '$annotation_1' | |
14 '$annotation_2' | |
15 --match_identity_threshold '$match_identity_threshold' | |
16 --allowed_skipped_genes '$allowed_skipped_genes' | |
17 '$addNotes' | |
18 -sumOut '$sumOut' | |
19 > '$repOut']]></command> | |
20 <inputs> | |
21 <param label="First annotated genome (Genbank)" name="annotation_1" type="data" format="genbank"/> | |
22 <param label="Second annotated genome (Genbank)" name="annotation_2" type="data" format="genbank"/> | |
23 <param label="Two genes must have at least this nucleotide identity to be considerd the same (0.00 to 1.00)" name="match_identity_threshold" type="float" value="0.70" min="0.00" max="1.00"/> | |
24 <param label="This many missing genes are allowed when aligning the annotations (see help below)" name="allowed_skipped_genes" type="integer" value="10"/> | |
25 <param label="Include 'note' field in output" name="addNotes" type="boolean" truevalue="--addNotes" falsevalue=""/> | |
26 </inputs> | |
27 <outputs> | |
28 <data format="tsv" name="sumOut" label="Report Summary"/> | |
29 <data format="tsv" name="repOut" label="Comparison Report of Two Genomes"/> | |
30 </outputs> | |
31 <tests> | |
32 <param name="annotation_1" value="related.gbk"/> | |
33 <param name="annotation_2" value="related.1-2000.gbk"/> | |
34 <output name="sumOut" value="gbkCompare_summaryOut.tsv"/> | |
35 <output name="repOut" value="gbkCompare_reportOut.tsv"/> | |
36 </tests> | |
37 <help><![CDATA[ | |
38 **What it does** | |
39 | |
40 This tool quantifies the changes in annotations for a genome, such as when it has been reassembled and/or reannotated. | |
41 The tool does a global alignment (using pairwise2 from biopython) of the annotated genes in both files. This means that | |
42 the input genomes must be roughly aligned at the gene level. If there are structural rearrangements, the tool will not work. | |
43 The original script was written by Ryan Wick from Monash University and was adapted to work on Galaxy by the CPT team. | |
44 | |
45 "Allowed Skipped Genes" is the number of genes that are allowed to be present in one record but not the other. Presence is | |
46 determined by whether a pair of genes are above the provided identity threshold. The tool will compare the CDSs in order, | |
47 ie the 1st CDS from Genbank 1 and the 1st CDS from the 2nd Genbank, the 2nd CDS and 2nd CDS, and so on. When a pair fails | |
48 to meet the identity threshhold, one of the CDSs is skipped and the comparisons will resume but offset, ie if 3rd and 3rd | |
49 CDS failed, then the 3rd CDS from Genbank 1 and the 4th from Genbank 2 will be checked next, and if they pass then the | |
50 march down the list will resume as 4th and 5th compared to each other, then 5th and 6th, and so on. If the number of these | |
51 offsets exceeds the allowed skip number, then it is assumed the genbanks are too different for a meaningful for comparison, | |
52 and the job will fail. | |
53 | |
54 **Input** | |
55 | |
56 Files *MUST* be in gebank format. | |
57 The tool only looks at CDS features in the genome. | |
58 | |
59 **Output** | |
60 | |
61 * A tab delimited file of the results | |
62 * Summary of the results. | |
63 ]]></help> | |
64 <citations> | |
65 <citation type="bibtex"> | |
66 @unpublished{galaxyTools, | |
67 author = {R. Wick}, | |
68 title = {CPT Galaxy Tools}, | |
69 year = {2020}, | |
70 note = {https://github.com/rrwick/Compare-annotations} | |
71 } | |
72 </citation> | |
73 <citation type="bibtex"> | |
74 @unpublished{galaxyTools, | |
75 author = {C. Ross}, | |
76 title = {CPT Galaxy Tools}, | |
77 year = {2020-}, | |
78 note = {https://github.com/tamu-cpt/galaxy-tools/} | |
79 } | |
80 </citation> | |
81 <citation type="bibtex"> | |
82 @unpublished{galaxyTools, | |
83 author = {A. Criscione}, | |
84 title = {CPT Galaxy Tools}, | |
85 year = {2019-2021}, | |
86 note = {https://github.com/tamu-cpt/galaxy-tools/} | |
87 } | |
88 </citation> | |
89 </citations> | |
90 </tool> |