Mercurial > repos > petr-novak > dante
annotate dante_gff_to_dna.xml @ 25:74babe57d739 draft
"planemo upload commit a0a9b02c60a91942a271b8b35648c0b152fe1ebd-dirty"
author | petr-novak |
---|---|
date | Fri, 27 Jan 2023 09:28:06 +0000 |
parents | df99812ded92 |
children | 02c6dff8c381 |
rev | line source |
---|---|
24
df99812ded92
"planemo upload commit a0a9b02c60a91942a271b8b35648c0b152fe1ebd-dirty"
petr-novak
parents:
23
diff
changeset
|
1 <tool id="domains_extract" name="Extract Domains Nucleotide Sequences" version="1.1.5"> |
0 | 2 <description> Tool to extract nucleotide sequences of protein domains found by DANTE </description> |
9 | 3 <requirements> |
24
df99812ded92
"planemo upload commit a0a9b02c60a91942a271b8b35648c0b152fe1ebd-dirty"
petr-novak
parents:
23
diff
changeset
|
4 <requirement type="package">dante=0.1.5</requirement> |
9 | 5 </requirements> |
0 | 6 <command> |
7 TEMP_DIR_LINEAGES=\$(mktemp -d) && | |
23
e2bbc79f0fac
"planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents:
21
diff
changeset
|
8 /mnt/raid/users/petr/workspace/dante/dante_gff_to_dna.py --domains_gff ${domains_gff} --input_dna ${input_dna} --out_dir \$TEMP_DIR_LINEAGES |
0 | 9 |
10 #if $extend_edges: | |
11 --extended True | |
12 #else: | |
13 --extended False | |
14 #end if | |
23
e2bbc79f0fac
"planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents:
21
diff
changeset
|
15 --database ${database} |
0 | 16 && |
17 | |
21 | 18 cat \$TEMP_DIR_LINEAGES/*fasta > $out_fasta && |
0 | 19 rm -rf \$TEMP_DIR_LINEAGES |
20 </command> | |
21 <inputs> | |
22 <param format="fasta" type="data" name="input_dna" label="Input DNA" help="Choose input DNA sequence(s) to extract the domains from" /> | |
23 <param format="gff" type="data" name="domains_gff" label="Protein domains GFF" help="Choose filtered protein domains GFF3 (DANTE's output)" /> | |
23
e2bbc79f0fac
"planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents:
21
diff
changeset
|
24 <param name="database" type="select" label="Select REXdb database"> |
e2bbc79f0fac
"planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents:
21
diff
changeset
|
25 <option value="Viridiplantae_v3.0" selected="true">Viridiplantae_v3.0</option> |
e2bbc79f0fac
"planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents:
21
diff
changeset
|
26 <option value="Metazoa_v3.1" selected="true">Metazoa_v3.1</option> |
e2bbc79f0fac
"planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents:
21
diff
changeset
|
27 <option value="Viridiplantae_v2.2" selected="true">Viridiplantae_v2.2</option> |
e2bbc79f0fac
"planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents:
21
diff
changeset
|
28 <option value="Metazoa_v3.0" selected="true">Metazoa_v3.1</option> |
e2bbc79f0fac
"planemo upload commit baf4ca09569b1b709c37f2df712e778da05edaf9-dirty"
petr-novak
parents:
21
diff
changeset
|
29 </param> |
0 | 30 |
31 <param name="extend_edges" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Extend sequence edges" help="Extend extracted sequence edges to the full length of database domains sequences"/> | |
32 </inputs> | |
33 <outputs> | |
34 <data format="fasta" name="out_fasta" label="Concatenated fasta domains NT sequences from ${input_dna.hid}" /> | |
35 </outputs> | |
36 | |
37 <help> | |
38 | |
39 **WHAT IT DOES** | |
40 | |
41 This tool extracts nucleotide sequences of protein domains from reference DNA based on DANTE's output. It can be used e.g. for deriving phylogenetic relations of individual mobile elements within a species. This can be done separately for individual protein domains types. | |
42 In this case, prior running this tool use DANTE on input DNA: | |
43 | |
44 1. Protein Domains Finder | |
45 2. Protein Domains Filter (quality filter + type of domain, e.g. RT) | |
46 | |
47 INPUTS: | |
48 * original DNA sequence in multifasta format to extract the domains from | |
49 * DANTE's output GFF3 file (preferably filtered for quality and specific domain type) | |
50 | |
51 OUTPUT: | |
52 | |
53 * concatenated fasta file of nucleotide sequences for individual transposons lineages | |
54 | |
55 By default sequences will be EXTENDED if the alignment reported by LASTAL does not cover the whole protein sequence from the database. | |
56 As the result, the corresponding nucleotide region of the WHOLE aligned database domain will be reported. For every record in the GFF3 file the sequence is reported for the BEST HIT within the domain region under following conditions: | |
57 | |
58 * The domain cannot be ambiguous, i.e. the FINAL CLASSIFICATION of the domains region corresponds to the last classification level | |
59 * The extracted sequences are not reported in the case they contain any Ns within the extracted region | |
60 | |
61 </help> | |
62 </tool> |