annotate cpt_gbk_compare/gbk_compare.xml @ 0:fc603e665d75 draft default tip

Uploaded
author cpt
date Tue, 21 Jun 2022 19:46:32 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
fc603e665d75 Uploaded
cpt
parents:
diff changeset
1 <?xml version="1.0"?>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
2 <tool id="edu.tamu.cpt.gbk.gbk_compare" name="Compare Genbanks" version="20.1.0">
fc603e665d75 Uploaded
cpt
parents:
diff changeset
3 <description>, compare the identity of two genbank files</description>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
4 <macros>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
5 <import>macros.xml</import>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
6 <import>cpt-macros.xml</import>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
7 </macros>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
8 <requirements>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
9 <requirement type="package" version="3.7">python</requirement>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
10 <requirement type="package" version="1.74">biopython</requirement>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
11 </requirements>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
12 <command detect_errors="aggressive"><![CDATA[
fc603e665d75 Uploaded
cpt
parents:
diff changeset
13 python $__tool_directory__/gbk_compare.py
fc603e665d75 Uploaded
cpt
parents:
diff changeset
14 $annotation_1
fc603e665d75 Uploaded
cpt
parents:
diff changeset
15 $annotation_2
fc603e665d75 Uploaded
cpt
parents:
diff changeset
16 --match_identity_threshold $match_identity_threshold
fc603e665d75 Uploaded
cpt
parents:
diff changeset
17 --allowed_skipped_genes $allowed_skipped_genes
fc603e665d75 Uploaded
cpt
parents:
diff changeset
18 $addNotes
fc603e665d75 Uploaded
cpt
parents:
diff changeset
19 -sumOut $sumOut
fc603e665d75 Uploaded
cpt
parents:
diff changeset
20 > $repOut]]></command>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
21 <inputs>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
22 <param label="First annotated genome (Genbank)" name="annotation_1" type="data" format="genbank"/>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
23 <param label="Second annotated genome (Genbank)" name="annotation_2" type="data" format="genbank"/>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
24 <param label="Two genes must have at least this nucleotide identity to be considerd the same (0.00 to 1.00)" name="match_identity_threshold" type="float" value="0.70" min="0.00" max="1.00"/>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
25 <param label="This many missing genes are allowed when aligning the annotations (see help below)" name="allowed_skipped_genes" type="integer" value="10"/>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
26 <param label="Include 'note' field in output" name="addNotes" type="boolean" truevalue="--addNotes" falsevalue=""/>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
27 </inputs>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
28 <outputs>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
29 <data format="tsv" name="sumOut" label="Report Summary"/>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
30 <data format="tsv" name="repOut" label="Comparison Report of Two Genomes"/>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
31 </outputs>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
32 <tests>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
33 <param name="annotation_1" value="related.gbk" />
fc603e665d75 Uploaded
cpt
parents:
diff changeset
34 <param name="annotation_2" value="related.1-2000.gbk" />
fc603e665d75 Uploaded
cpt
parents:
diff changeset
35 <output name="sumOut" value="gbkCompare_summaryOut.tsv" />
fc603e665d75 Uploaded
cpt
parents:
diff changeset
36 <output name="repOut" value="gbkCompare_reportOut.tsv" />
fc603e665d75 Uploaded
cpt
parents:
diff changeset
37 </tests>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
38 <help><![CDATA[
fc603e665d75 Uploaded
cpt
parents:
diff changeset
39 **What it does**
fc603e665d75 Uploaded
cpt
parents:
diff changeset
40
fc603e665d75 Uploaded
cpt
parents:
diff changeset
41 This tool quantifies the changes in annotations for a genome, such as when it has been reassembled and/or reannotated.
fc603e665d75 Uploaded
cpt
parents:
diff changeset
42 The tool does a global alignment (using pairwise2 from biopython) of the annotated genes in both files. This means that
fc603e665d75 Uploaded
cpt
parents:
diff changeset
43 the input genomes must be roughly aligned at the gene level. If there are structural rearrangements, the tool will not work.
fc603e665d75 Uploaded
cpt
parents:
diff changeset
44 The original script was written by Ryan Wick from Monash University and was adapted to work on Galaxy by the CPT team.
fc603e665d75 Uploaded
cpt
parents:
diff changeset
45
fc603e665d75 Uploaded
cpt
parents:
diff changeset
46 "Allowed Skipped Genes" is the number of genes that are allowed to be present in one record but not the other. Presence is
fc603e665d75 Uploaded
cpt
parents:
diff changeset
47 determined by whether a pair of genes are above the provided identity threshold. The tool will compare the CDSs in order,
fc603e665d75 Uploaded
cpt
parents:
diff changeset
48 ie the 1st CDS from Genbank 1 and the 1st CDS from the 2nd Genbank, the 2nd CDS and 2nd CDS, and so on. When a pair fails
fc603e665d75 Uploaded
cpt
parents:
diff changeset
49 to meet the identity threshhold, one of the CDSs is skipped and the comparisons will resume but offset, ie if 3rd and 3rd
fc603e665d75 Uploaded
cpt
parents:
diff changeset
50 CDS failed, then the 3rd CDS from Genbank 1 and the 4th from Genbank 2 will be checked next, and if they pass then the
fc603e665d75 Uploaded
cpt
parents:
diff changeset
51 march down the list will resume as 4th and 5th compared to each other, then 5th and 6th, and so on. If the number of these
fc603e665d75 Uploaded
cpt
parents:
diff changeset
52 offsets exceeds the allowed skip number, then it is assumed the genbanks are too different for a meaningful for comparison,
fc603e665d75 Uploaded
cpt
parents:
diff changeset
53 and the job will fail.
fc603e665d75 Uploaded
cpt
parents:
diff changeset
54
fc603e665d75 Uploaded
cpt
parents:
diff changeset
55 **Input**
fc603e665d75 Uploaded
cpt
parents:
diff changeset
56
fc603e665d75 Uploaded
cpt
parents:
diff changeset
57 Files *MUST* be in gebank format.
fc603e665d75 Uploaded
cpt
parents:
diff changeset
58 The tool only looks at CDS features in the genome.
fc603e665d75 Uploaded
cpt
parents:
diff changeset
59
fc603e665d75 Uploaded
cpt
parents:
diff changeset
60 **Output**
fc603e665d75 Uploaded
cpt
parents:
diff changeset
61
fc603e665d75 Uploaded
cpt
parents:
diff changeset
62 * A tab delimited file of the results
fc603e665d75 Uploaded
cpt
parents:
diff changeset
63 * Summary of the results.
fc603e665d75 Uploaded
cpt
parents:
diff changeset
64 ]]></help>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
65 <citations>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
66 <citation type="bibtex">
fc603e665d75 Uploaded
cpt
parents:
diff changeset
67 @unpublished{galaxyTools,
fc603e665d75 Uploaded
cpt
parents:
diff changeset
68 author = {R. Wick},
fc603e665d75 Uploaded
cpt
parents:
diff changeset
69 title = {CPT Galaxy Tools},
fc603e665d75 Uploaded
cpt
parents:
diff changeset
70 year = {2020},
fc603e665d75 Uploaded
cpt
parents:
diff changeset
71 note = {https://github.com/rrwick/Compare-annotations}
fc603e665d75 Uploaded
cpt
parents:
diff changeset
72 }
fc603e665d75 Uploaded
cpt
parents:
diff changeset
73 </citation>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
74 <citation type="bibtex">
fc603e665d75 Uploaded
cpt
parents:
diff changeset
75 @unpublished{galaxyTools,
fc603e665d75 Uploaded
cpt
parents:
diff changeset
76 author = {C. Ross},
fc603e665d75 Uploaded
cpt
parents:
diff changeset
77 title = {CPT Galaxy Tools},
fc603e665d75 Uploaded
cpt
parents:
diff changeset
78 year = {2020-},
fc603e665d75 Uploaded
cpt
parents:
diff changeset
79 note = {https://github.com/tamu-cpt/galaxy-tools/}
fc603e665d75 Uploaded
cpt
parents:
diff changeset
80 }
fc603e665d75 Uploaded
cpt
parents:
diff changeset
81 </citation>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
82 <citation type="bibtex">
fc603e665d75 Uploaded
cpt
parents:
diff changeset
83 @unpublished{galaxyTools,
fc603e665d75 Uploaded
cpt
parents:
diff changeset
84 author = {A. Criscione},
fc603e665d75 Uploaded
cpt
parents:
diff changeset
85 title = {CPT Galaxy Tools},
fc603e665d75 Uploaded
cpt
parents:
diff changeset
86 year = {2019-2021},
fc603e665d75 Uploaded
cpt
parents:
diff changeset
87 note = {https://github.com/tamu-cpt/galaxy-tools/}
fc603e665d75 Uploaded
cpt
parents:
diff changeset
88 }
fc603e665d75 Uploaded
cpt
parents:
diff changeset
89 </citation>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
90 </citations>
fc603e665d75 Uploaded
cpt
parents:
diff changeset
91 </tool>