annotate dante_gff_to_dna.xml @ 30:f0663cdbae66 draft default tip

planemo upload commit 29868d121127a8bb509a42fb917b09f669ad4a09-dirty
author petr-novak
date Tue, 05 Nov 2024 15:14:35 +0000
parents 8f2bd3cae2de
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
30
f0663cdbae66 planemo upload commit 29868d121127a8bb509a42fb917b09f669ad4a09-dirty
petr-novak
parents: 29
diff changeset
1 <tool id="domains_extract" name="Extract Domains Nucleotide Sequences" version="2.5.1">
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
2 <description> Tool to extract nucleotide sequences of protein domains found by DANTE </description>
9
ed4d9ede9cb4 Uploaded
petr-novak
parents: 0
diff changeset
3 <requirements>
30
f0663cdbae66 planemo upload commit 29868d121127a8bb509a42fb917b09f669ad4a09-dirty
petr-novak
parents: 29
diff changeset
4 <requirement type="package">dante=0.2.5</requirement>
9
ed4d9ede9cb4 Uploaded
petr-novak
parents: 0
diff changeset
5 </requirements>
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
6 <command>
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
7 TEMP_DIR_LINEAGES=\$(mktemp -d) &amp;&amp;
29
8f2bd3cae2de Uploaded
petr-novak
parents: 28
diff changeset
8 dante_gff_to_dna.py --domains_gff ${domains_gff} --input_dna ${input_dna} --out_dir \$TEMP_DIR_LINEAGES
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
9
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
10 #if $extend_edges:
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
11 --extended True
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
12 #else:
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
13 --extended False
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
14 #end if
23
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 21
diff changeset
15 --database ${database}
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
16 &amp;&amp;
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
17
21
65a6fb89495d Uploaded
petr-novak
parents: 20
diff changeset
18 cat \$TEMP_DIR_LINEAGES/*fasta > $out_fasta &amp;&amp;
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
19 rm -rf \$TEMP_DIR_LINEAGES
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
20 </command>
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
21 <inputs>
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
22 <param format="fasta" type="data" name="input_dna" label="Input DNA" help="Choose input DNA sequence(s) to extract the domains from" />
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
23 <param format="gff" type="data" name="domains_gff" label="Protein domains GFF" help="Choose filtered protein domains GFF3 (DANTE's output)" />
23
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 21
diff changeset
24 <param name="database" type="select" label="Select REXdb database">
30
f0663cdbae66 planemo upload commit 29868d121127a8bb509a42fb917b09f669ad4a09-dirty
petr-novak
parents: 29
diff changeset
25 <option value="Viridiplantae_v4.0" selected="true">Viridiplantae_v4.0</option>
23
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 21
diff changeset
26 <option value="Viridiplantae_v3.0" selected="true">Viridiplantae_v3.0</option>
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 21
diff changeset
27 <option value="Metazoa_v3.1" selected="true">Metazoa_v3.1</option>
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 21
diff changeset
28 <option value="Viridiplantae_v2.2" selected="true">Viridiplantae_v2.2</option>
30
f0663cdbae66 planemo upload commit 29868d121127a8bb509a42fb917b09f669ad4a09-dirty
petr-novak
parents: 29
diff changeset
29 <option value="Metazoa_v3.0" selected="true">Metazoa_v3.0</option>
23
e2bbc79f0fac "planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents: 21
diff changeset
30 </param>
0
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
31
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
32 <param name="extend_edges" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Extend sequence edges" help="Extend extracted sequence edges to the full length of database domains sequences"/>
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
33 </inputs>
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
34 <outputs>
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
35 <data format="fasta" name="out_fasta" label="Concatenated fasta domains NT sequences from ${input_dna.hid}" />
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
36 </outputs>
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
37
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
38 <help>
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
39
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
40 **WHAT IT DOES**
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
41
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
42 This tool extracts nucleotide sequences of protein domains from reference DNA based on DANTE's output. It can be used e.g. for deriving phylogenetic relations of individual mobile elements within a species. This can be done separately for individual protein domains types.
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
43 In this case, prior running this tool use DANTE on input DNA:
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
44
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
45 1. Protein Domains Finder
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
46 2. Protein Domains Filter (quality filter + type of domain, e.g. RT)
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
47
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
48 INPUTS:
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
49 * original DNA sequence in multifasta format to extract the domains from
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
50 * DANTE's output GFF3 file (preferably filtered for quality and specific domain type)
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
51
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
52 OUTPUT:
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
53
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
54 * concatenated fasta file of nucleotide sequences for individual transposons lineages
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
55
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
56 By default sequences will be EXTENDED if the alignment reported by LASTAL does not cover the whole protein sequence from the database.
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
57 As the result, the corresponding nucleotide region of the WHOLE aligned database domain will be reported. For every record in the GFF3 file the sequence is reported for the BEST HIT within the domain region under following conditions:
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
58
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
59 * The domain cannot be ambiguous, i.e. the FINAL CLASSIFICATION of the domains region corresponds to the last classification level
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
60 * The extracted sequences are not reported in the case they contain any Ns within the extracted region
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
61
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
62 </help>
77d9f2ecb28a Uploaded
petr-novak
parents:
diff changeset
63 </tool>