annotate dante.xml @ 32:393fb45bd50f draft

planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
author petr-novak
date Tue, 04 Feb 2025 09:48:33 +0000
parents ae4cebdccf74
children 726b8447eb09
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
1 <tool id="dante" name="Domain based ANnotation of Transposable Elements - DANTE" version="2.6.2">
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
2 <description> Tool for annotation of transposable elements based on the similarity to conserved protein domains database. </description>
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
3 <requirements>
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
4 <requirement type="package">dante=0.2.5</requirement>
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
5 </requirements>
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
6 <stdio>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
7 <regex match="Traceback" source="stderr" level="fatal" description="Unknown error" />
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
8 <regex match="error" source="stderr" level="fatal" description="Unknown error" />
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
9 </stdio>
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
10 <command><![CDATA[
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
11 #if str($input_type.input_type_selector) == "aln"
23
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 22
diff changeset
12 parse_aln.py -a $(input_sequences) -f sequences.fasta -p sequences.profile
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
13 &&
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
14 INPUT_SEQUENCES="sequences.fasta"
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
15 #else
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
16 INPUT_SEQUENCES=$(input_sequences)
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
17 #end if
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
18 &&
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
19 grep -c "^>" \${INPUT_SEQUENCES}
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
20 &&
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
21 NUMBER_OF_SEQUENCES=`grep -c "^>" \${INPUT_SEQUENCES}`
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
22 &&
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
23 if [ \${NUMBER_OF_SEQUENCES} -gt 10000 ]; then
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
24 SHORT_READS="--short_reads";
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
25 else
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
26 SHORT_READS="";
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
27 fi
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
28
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
29 &&
23
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 22
diff changeset
30 dante --query \${INPUT_SEQUENCES} --domain_gff ${DomGff}
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 22
diff changeset
31 --database $database
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 22
diff changeset
32 --scoring_matrix ${scoring_matrix}
24
df99812ded92 "planemo upload commit a0a9b02c60a91942a271b8b35648c0b152fe1ebd-dirty"
petr-novak
parents: 23
diff changeset
33 --cpu \${GALAXY_SLOTS:-1}
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
34 \${SHORT_READS}
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
35
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
36 &&
23
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 22
diff changeset
37 dante_gff_output_filtering.py --dom_gff ${DomGff}
22
1eabd42e00ef Uploaded
petr-novak
parents: 15
diff changeset
38 --domains_prot_seq ${Domains_filtered} --domains_filtered ${DomGff_filtered}
1eabd42e00ef Uploaded
petr-novak
parents: 15
diff changeset
39 --output_dir .
1eabd42e00ef Uploaded
petr-novak
parents: 15
diff changeset
40 --selected_dom All --th_identity 0.35
1eabd42e00ef Uploaded
petr-novak
parents: 15
diff changeset
41 --th_similarity 0.45 --th_length 0.8
1eabd42e00ef Uploaded
petr-novak
parents: 15
diff changeset
42 --interruptions 3 --max_len_proportion 1.2
1eabd42e00ef Uploaded
petr-novak
parents: 15
diff changeset
43 --element_type ''
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
44
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
45 #if str($input_type.input_type_selector) == "aln"
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
46 &&
23
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 22
diff changeset
47 coverage2gff.py -p sequences.profile -g ${DomGff}
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
48 #end if
10
d0431a839606 Uploaded
petr-novak
parents: 6
diff changeset
49
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
50 #if str($iterative) == "Yes"
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
51 &&
23
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 22
diff changeset
52 dante_gff_output_filtering.py --dom_gff ${DomGff}
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
53 --domains_prot_seq domains_filtered.fasta --domains_filtered domains_filtered.gff
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
54 --output_dir .
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
55 --selected_dom All --th_identity 0.35
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
56 --th_similarity 0.45 --th_length 0.9
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
57 --interruptions 1 --max_len_proportion 1.1
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
58 --element_type ''
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
59 &&
10
d0431a839606 Uploaded
petr-novak
parents: 6
diff changeset
60
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
61
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
62
23
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 22
diff changeset
63 fasta2database.py domains_filtered.fasta domains_filtered.db
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
64 domains_filtered.class
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
65 &&
10
d0431a839606 Uploaded
petr-novak
parents: 6
diff changeset
66
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
67 lastdb -p domains_filtered.db domains_filtered.db
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
68 &&
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
69
23
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 22
diff changeset
70 dante.py --query \${INPUT_SEQUENCES} --domain_gff ${DomGff2}
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
71 --protein_database domains_filtered.db
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
72 --classification domains_filtered.class
23
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 22
diff changeset
73 --scoring_matrix BL80
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
74
10
d0431a839606 Uploaded
petr-novak
parents: 6
diff changeset
75
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
76 #if str($input_type.input_type_selector) == "aln"
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
77 &&
23
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 22
diff changeset
78 coverage2gff.py -p sequences.profile -g ${DomGff2}
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
79 #end if
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
80 #end if
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
81 ]]>
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
82 </command>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
83 <inputs>
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
84
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
85 <conditional name="input_type">
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
86 <param name="input_type_selector" type="select" label="Choose the type of sequence data">
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
87 <option value="fasta" selected="true">Fasta</option>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
88 <option value="aln">Aln file</option>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
89 </param>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
90 <when value="fasta">
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
91 <param name="input_sequences" type="data" format="fasta" label="Sequences in fasta format"/>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
92 </when>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
93 <when value="aln">
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
94 <param name="input_sequences" type="data" format="txt" label="Sequences in ALN format (extracted from RepeatExplorer)"/>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
95 </when>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
96 </conditional>
23
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 22
diff changeset
97 <param name="database" type="select" label="Select REXdb database">
30
f0663cdbae66 planemo upload commit 29868d121127a8bb509a42fb917b09f669ad4a09-dirty
petr-novak
parents: 28
diff changeset
98 <option value="Viridiplantae_v4.0" selected="true">Viridiplantae_v4.0</option>
23
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 22
diff changeset
99 <option value="Viridiplantae_v3.0" selected="true">Viridiplantae_v3.0</option>
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 22
diff changeset
100 <option value="Metazoa_v3.1" selected="true">Metazoa_v3.1</option>
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 22
diff changeset
101 <option value="Viridiplantae_v2.2" selected="true">Viridiplantae_v2.2</option>
30
f0663cdbae66 planemo upload commit 29868d121127a8bb509a42fb917b09f669ad4a09-dirty
petr-novak
parents: 28
diff changeset
102 <option value="Metazoa_v3.0" selected="true">Metazoa_v3.0</option>
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
103 </param>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
104 <param name="scoring_matrix" type="select" label="Select scoring matrix">
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
105 <option value="BL80" selected="true" >BLOSUM80</option>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
106 <option value="BL62">BLOSUM62</option>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
107 <option value="MIQS">MIQS</option>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
108 </param>
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
109
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
110 <param name="iterative" type="select" label="Run iterative search" truevalue="true" valsevalue="false"
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
111 help="Second iteration run search against database of proteins extracted from query. Second iteration can yield some extra hits in some cases.">
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
112 <option value="No" selected="true">No</option>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
113 <option value="Yes">Yes</option>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
114 </param>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
115 </inputs>
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
116
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
117 <outputs>
22
1eabd42e00ef Uploaded
petr-novak
parents: 15
diff changeset
118 <data format="gff3" name="DomGff" label="DANTE on ${on_string}, full output" />
1eabd42e00ef Uploaded
petr-novak
parents: 15
diff changeset
119 <data format="gff3" name="DomGff_filtered" label="DANTE on ${on_string}, filtered output" />
1eabd42e00ef Uploaded
petr-novak
parents: 15
diff changeset
120 <data format="fasta" name="Domains_filtered" label="DANTE on ${on_string}, protein domains, filtered output" />
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
121 <data format="gff3" name="DomGff2" label="DANTE on ${on_string}: 2nd pass">
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
122 <filter>iterative == "Yes" </filter>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
123 </data>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
124 </outputs>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
125 <tests>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
126 <test>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
127 <param name="input_type" value="fasta"/>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
128 <param name="input_sequences" value="GEPY_test_long_1.fa"/>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
129 <param name="db_type" value="Viridiplantae_v3.0"/>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
130 <param name="scoring_matrix" value="BL80"/>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
131 <param name="iterative" value="No"/>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
132 <output name="DomGff" value="GEPY_test_long_1_output_unfiltered.gff3"/>
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
133 </test>
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
134
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
135
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
136 </tests>
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
137
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
138
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
139 <help>
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
140
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
141
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
142
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
143
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
144 **WHAT IT DOES**
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
145
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
146 This tool uses external aligning programme `LAST`_ and RepeatExplorer database of TE protein domains(REXdb) (Viridiplantae and Metazoa)
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
147
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
148 .. _LAST: http://last.cbrc.jp/
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
149
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
150 *Lastal* runs similarity search to find hits between query DNA sequence and our database of protein domains from all Viridiplantae repetitive elements. Hits with overlapping positions in the sequence (even through other hits) forms a cluster which represents one potential protein domain. Strand orientation is taken into consideration when forming the clusters which means each cluster is built from forward or reverse stranded hits exclusively. The clusters are subsequently processed separately; within one cluster positions are scanned base-by-base and classification strings are assigned for each of them based on the database sequences which were mapped on that place. These asigned classification strings consist of a domain type as well as class and lineage of the repetitive element where the database protein comes from. Different classification levels are separated by "|" character. Every hit is scored according to the scoring matrix used for DNA-protein alignment (BLOSUM80). For single position only the hits reaching certain percentage (80% by default) of the overall best score within the whole cluster are reported. One cluster of overlapping hits represents one domain region and is recorded as one line in the resulting GFF3 file. Regarding the classition strings assigned to one region (cluster) there are three situations that can occur:
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
151
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
152 1. There is a single classification string assigned to each position as well as classifications along all the positions in the region are mutually uniform, in this case domain's final classification is equivalent to this unique classification.
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
153 2. There are multiple classification strings assigned to one cluster, i.e. one domain, which leads to classification to the common (less specific) level of all the strings
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
154 3. There is a conflict at the domain type level, domains are reported with slash (e.g. RT/INT) and the classification is in this case ambiguous
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
155
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
156 **There are 2 outputs produced by this tool:**
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
157
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
158 1. GFF3 file of all proteins domains built from all hits found by LAST. Domains are reported per line as regions (start - end) on the original DNA sequence including the seq ID, alignment score and strand orientation. The last "Attributes" column contains several semicolon-separated information related to annotation, repetitive classification, alignment and its quality. This file can undergo further filtering using *Protein Domain Filter* tool
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
159
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
160 - Attributes reported always:
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
161
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
162 Name
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
163 type of domain; if ambiguous reported with slash
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
164
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
165 Final_classification
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
166 definite classification based on all partial classifications of Region_hits_classifications attribute or
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
167 "Ambiguous_domain" when there is an ambiguous domain type
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
168
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
169 Region_Hits_Classifications
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
170 all hits classifications (comma separated) from a certain domain region that reach the set score threshold; in case of multiple annotations the square brackets indicate the number of bases having this particular classification
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
171
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
172 - Attributes only reported in case of unambiguous domain type (all the attributes including quality information are related to the Best_Hit of the region):
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
173
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
174 Best_hit
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
175 classification and position of the best alignment with the highest score within the cluster; in the square brackets is the percentage of the whole cluster range that this best hit covers
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
176
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
177 Best_Hit_DB_Pos
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
178 showing which part of the original datatabase domain corresponding to the Best Hit was aligned on query DNA (e.g. **Best_Hit_DB_Pos=17:75of79** means the Best Hit reported in GFF represents region from 17th to 75th of total 79 aminoacids in the original domain from the database)
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
179
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
180 DB_Seq
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
181 database protein sequence of the best hit mapped to the query DNA
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
182
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
183 Query_Seq
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
184 alignment sequence of the query DNA for the best hit
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
185
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
186 Identity
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
187 ratio of identical amino acids in alignment sequence to the length of alignment
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
188
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
189 Similarity
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
190 ratio of alignment positions with positive score (according to the scoring matrix) to the length of alignment
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
191
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
192 Relat_Length
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
193 ratio of gapless length of the aligned protein sequence to the whole length of the database protein
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
194
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
195 Relat_Interruptions
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
196 number of the interruptions (frameshifts + stop codons) in aligned translated query sequence per each starting 100 AA
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
197
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
198 Hit_to_DB_Length
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
199 proportion of alignment length to the original length of the protein domain from database
32
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
200
393fb45bd50f planemo upload commit ca3e700d61477ca5de353babb7f5f1db469d937b-dirty
petr-novak
parents: 31
diff changeset
201
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
202
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
203 !NOTE: Tool can in average process 0.5 Gbps of the DNA sequence per day. This is only a rough estimate and it is highly dependent on input data (repetive elements occurence) as well as computing resources. Maximum running time of the tool is 7 days.
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
204
15
3151a72a6671 Uploaded
petr-novak
parents: 10
diff changeset
205 </help>
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
206 </tool>
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
207