5
|
1 <?xml version="1.0"?>
|
8
|
2 <tool name="MGEScan" id="mgescan" version="3.0.0">
|
5
|
3 <description>
|
|
4 MGEScan
|
|
5 </description>
|
|
6 <requirements>
|
8
|
7 <requirement type="package" version="3.0.0">mgescan</requirement>
|
15
|
8 <requirement type="package" version="4.0">tandem_repeats_finder</requirement>
|
11
|
9 <requirement type="package" version="5.0.0">emboss</requirement>
|
13
|
10 <requirement type="package" version="3.1b1">hmmer</requirement>
|
5
|
11 </requirements>
|
|
12 <version_command>mgescan --version</version_command>
|
|
13 <command interpreter="bash">
|
|
14 mgescan.sh $input '$input.name' 3 $output $program $clade $qvalue_en $qvalue_rt $ltr_gff3 $nonltr_gff3 $both_gff3 $mpi_yn.nmpi
|
|
15 <!-- mgescan.sh $input $input.name $hmmver $output $program $clade $qvalue_en $qvalue_rt $ltr_gff3 $nonltr_gff3 -->
|
|
16 </command>
|
|
17 <inputs>
|
|
18 <param format="fasta,tabular,data" name="input" type="data" label="Input FASTA file(s)"/>
|
|
19 <!--param name="hmmver" type="select" label="Hmmsearch version">
|
|
20 <option selected="selected" value="3">3</option>
|
|
21 <option value="2">2</option>
|
|
22 </param-->
|
|
23 <param name="program" type="select" label="MGEScan">
|
|
24 <option selected="selected" value="B">Both</option>
|
|
25 <option value="L">LTR</option>
|
|
26 <option value="N">nonLTR</option>
|
|
27 </param>
|
|
28 <conditional name="mpi_yn">
|
|
29 <param name="mpi_select" type="select" label="Enable MPI">
|
|
30 <option value="no_mpi">No</option>
|
|
31 <option value="yes_mpi">Yes</option>
|
|
32 </param>
|
|
33 <when value="yes_mpi">
|
|
34 <param name="nmpi" format="txt" type="text" value="1" label="Number of MPI Processes"/>
|
|
35 </when>
|
|
36 <when value="no_mpi">
|
|
37 <param name="nmpi" type="hidden" value="0"/>
|
|
38 </when>
|
|
39 </conditional>
|
|
40 </inputs>
|
|
41 <outputs>
|
|
42 <data format="ltr.out" name="output" label="LTR Results (ltr.out)">
|
|
43 <filter>program != "N"</filter>
|
|
44 </data>
|
|
45 <data format="fasta" name="clade" label="clade file (FASTA)">
|
|
46 <filter>program != "L"</filter>
|
|
47 </data>
|
|
48 <data format="qfile" name="qvalue_en" label="qvalue_en">
|
|
49 <filter>program != "L"</filter>
|
|
50 </data>
|
|
51 <data format="qfile" name="qvalue_rt" label="qvalue_rt">
|
|
52 <filter>program != "L"</filter>
|
|
53 </data>
|
|
54 <data format="gff3" name="ltr_gff3" label="GFF3 for LTR">
|
|
55 <filter>program != "N"</filter>
|
|
56 </data>
|
|
57 <data format="gff3" name="nonltr_gff3" label="GFF3 for nonLTR">
|
|
58 <filter>program != "L"</filter>
|
|
59 </data>
|
|
60 <data format="gff3" name="both_gff3" label="GFF3 for LTR and nonLTR">
|
|
61 <filter>program == "B"</filter>
|
|
62 </data>
|
|
63
|
|
64 </outputs>
|
|
65 <help>
|
|
66 How to Run MGEScan
|
|
67 ===================
|
|
68
|
|
69 * Select an input genome data from the select box, and choose a program. Both LTR and nonLTR of MGEScan is default.
|
|
70 * Click 'Execute' button.
|
|
71 * MPI will be enabled depending on your system support.
|
|
72
|
|
73 If you like to have more options to run LTR or nonLTR program, use separated tools on the left panel.
|
|
74
|
|
75 For example, in LTR > MGEScan-LTR, preprocessing by repeatmasker and setting other variables are available e.g. distance(bp) between LTRs.
|
|
76
|
|
77 Output
|
|
78 ============
|
|
79
|
|
80 A. MGEScan_LTR:
|
|
81
|
|
82 Upon completion, MGEScan-LTR generates a file "ltr.out". This output file has information
|
|
83 about clusters and coordinates of LTR retrotransposons identified. Each cluster of LTR
|
|
84 retrotransposons starts with the head line of "[cluster_number]---------", followed by
|
|
85 the information of LTR retrotransposons in the cluster. The columns for LTR
|
|
86 retrotransposons are as follows.
|
|
87
|
|
88 1. LTR_id: unique id of LTRs identified. It consist of two components, sequence file name and id in the file. For example, chr1_2 is the second LTR retrotransposon in the chr1 file.
|
|
89 2. start position of 5’ LTR.
|
|
90 3. end position of 5’ LTR.
|
|
91 4. start position of 3’ LTR.
|
|
92 5. end position of 3’ LTR.
|
|
93 6. strand: + or -.
|
|
94 7. length of 5’ LTR.
|
|
95 8. length of 3’ LTR.
|
|
96 9. length of the LTR retrotransposon.
|
|
97 10. TSD on the left side of the LTR retotransposons.
|
|
98 11. TSD on the right side of the LTR retrotransposons.
|
|
99 12. di(tri)nucleotide on the left side of 5’LTR
|
|
100 13. di(tri)nucleotide on the right side of 5’LTR
|
|
101 14. di(tri)nucleotide on the left side of 3’LTR
|
|
102 15. di(tri)nucleotide on the right side of 3’LTR
|
|
103
|
|
104 B. MGEScan_nonLTR:
|
|
105 Upon completion, MGEScan-nonLTR generates the directory, "info" in the data directory you
|
|
106 specified. In this "info" directory, two sub-directories ("full" and "validation") are
|
|
107 generated.
|
|
108
|
|
109 * The "full" directory is for storing sequences of elements. Each subdirectory in "full"
|
|
110 is the name of clade. In each directory of clade, the DNA sequences of nonLTRs identified
|
|
111 are listed. Each sequence is in fasta format. The header contains the position
|
|
112 information of TEs identified: [genome_file_name]_[start position in the sequence]
|
|
113
|
|
114 For example, >chr1_333 means that this element start at 333bp in the "chr1" file.
|
|
115
|
|
116 * The "validation" directory is for storing Q values.
|
|
117 In the files "en" and "rt", the first column corresponds to the element name and the last column Q value.
|
|
118
|
|
119 License
|
|
120 ============
|
|
121 Copyright 2015.
|
|
122 You may redistribute this software under the terms of the GNU General Public License.
|
|
123
|
|
124 </help>
|
|
125 </tool>
|