Mercurial > repos > cpt > cpt_compare_gbk
comparison cpt_gbk_compare/gbk_compare.xml @ 0:fc603e665d75 draft default tip
Uploaded
author | cpt |
---|---|
date | Tue, 21 Jun 2022 19:46:32 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:fc603e665d75 |
---|---|
1 <?xml version="1.0"?> | |
2 <tool id="edu.tamu.cpt.gbk.gbk_compare" name="Compare Genbanks" version="20.1.0"> | |
3 <description>, compare the identity of two genbank files</description> | |
4 <macros> | |
5 <import>macros.xml</import> | |
6 <import>cpt-macros.xml</import> | |
7 </macros> | |
8 <requirements> | |
9 <requirement type="package" version="3.7">python</requirement> | |
10 <requirement type="package" version="1.74">biopython</requirement> | |
11 </requirements> | |
12 <command detect_errors="aggressive"><![CDATA[ | |
13 python $__tool_directory__/gbk_compare.py | |
14 $annotation_1 | |
15 $annotation_2 | |
16 --match_identity_threshold $match_identity_threshold | |
17 --allowed_skipped_genes $allowed_skipped_genes | |
18 $addNotes | |
19 -sumOut $sumOut | |
20 > $repOut]]></command> | |
21 <inputs> | |
22 <param label="First annotated genome (Genbank)" name="annotation_1" type="data" format="genbank"/> | |
23 <param label="Second annotated genome (Genbank)" name="annotation_2" type="data" format="genbank"/> | |
24 <param label="Two genes must have at least this nucleotide identity to be considerd the same (0.00 to 1.00)" name="match_identity_threshold" type="float" value="0.70" min="0.00" max="1.00"/> | |
25 <param label="This many missing genes are allowed when aligning the annotations (see help below)" name="allowed_skipped_genes" type="integer" value="10"/> | |
26 <param label="Include 'note' field in output" name="addNotes" type="boolean" truevalue="--addNotes" falsevalue=""/> | |
27 </inputs> | |
28 <outputs> | |
29 <data format="tsv" name="sumOut" label="Report Summary"/> | |
30 <data format="tsv" name="repOut" label="Comparison Report of Two Genomes"/> | |
31 </outputs> | |
32 <tests> | |
33 <param name="annotation_1" value="related.gbk" /> | |
34 <param name="annotation_2" value="related.1-2000.gbk" /> | |
35 <output name="sumOut" value="gbkCompare_summaryOut.tsv" /> | |
36 <output name="repOut" value="gbkCompare_reportOut.tsv" /> | |
37 </tests> | |
38 <help><![CDATA[ | |
39 **What it does** | |
40 | |
41 This tool quantifies the changes in annotations for a genome, such as when it has been reassembled and/or reannotated. | |
42 The tool does a global alignment (using pairwise2 from biopython) of the annotated genes in both files. This means that | |
43 the input genomes must be roughly aligned at the gene level. If there are structural rearrangements, the tool will not work. | |
44 The original script was written by Ryan Wick from Monash University and was adapted to work on Galaxy by the CPT team. | |
45 | |
46 "Allowed Skipped Genes" is the number of genes that are allowed to be present in one record but not the other. Presence is | |
47 determined by whether a pair of genes are above the provided identity threshold. The tool will compare the CDSs in order, | |
48 ie the 1st CDS from Genbank 1 and the 1st CDS from the 2nd Genbank, the 2nd CDS and 2nd CDS, and so on. When a pair fails | |
49 to meet the identity threshhold, one of the CDSs is skipped and the comparisons will resume but offset, ie if 3rd and 3rd | |
50 CDS failed, then the 3rd CDS from Genbank 1 and the 4th from Genbank 2 will be checked next, and if they pass then the | |
51 march down the list will resume as 4th and 5th compared to each other, then 5th and 6th, and so on. If the number of these | |
52 offsets exceeds the allowed skip number, then it is assumed the genbanks are too different for a meaningful for comparison, | |
53 and the job will fail. | |
54 | |
55 **Input** | |
56 | |
57 Files *MUST* be in gebank format. | |
58 The tool only looks at CDS features in the genome. | |
59 | |
60 **Output** | |
61 | |
62 * A tab delimited file of the results | |
63 * Summary of the results. | |
64 ]]></help> | |
65 <citations> | |
66 <citation type="bibtex"> | |
67 @unpublished{galaxyTools, | |
68 author = {R. Wick}, | |
69 title = {CPT Galaxy Tools}, | |
70 year = {2020}, | |
71 note = {https://github.com/rrwick/Compare-annotations} | |
72 } | |
73 </citation> | |
74 <citation type="bibtex"> | |
75 @unpublished{galaxyTools, | |
76 author = {C. Ross}, | |
77 title = {CPT Galaxy Tools}, | |
78 year = {2020-}, | |
79 note = {https://github.com/tamu-cpt/galaxy-tools/} | |
80 } | |
81 </citation> | |
82 <citation type="bibtex"> | |
83 @unpublished{galaxyTools, | |
84 author = {A. Criscione}, | |
85 title = {CPT Galaxy Tools}, | |
86 year = {2019-2021}, | |
87 note = {https://github.com/tamu-cpt/galaxy-tools/} | |
88 } | |
89 </citation> | |
90 </citations> | |
91 </tool> |