changeset 0:803c7c39993e draft

Uploaded
author hyungrolee
date Sat, 14 Jun 2014 19:00:14 -0400
parents
children 4ea226638345
files mgescan.xml
diffstat 1 files changed, 101 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mgescan.xml	Sat Jun 14 19:00:14 2014 -0400
@@ -0,0 +1,101 @@
+<?xml version="1.0"?>
+
+<tool name="MGEScan" id="mgescan" version="0.0.1" workflow_compatible="false">
+	<description>
+		MGEScan
+	</description>
+	<command interpreter="bash">
+		mgescan.sh $input '$input.name' 3 $output $program $clade $qvalue_en $qvalue_rt $ltr_gff3 $nonltr_gff3
+		<!-- mgescan.sh $input $input.name $hmmver $output $program $clade $qvalue_en $qvalue_rt $ltr_gff3 $nonltr_gff3 -->
+	</command>
+	<inputs>
+		<param format="txt" name="input" type="data" label="From"/>
+		<!--param name="hmmver" type="select" label="Hmmsearch version">
+			<option selected="selected" value="3">3</option>
+			<option value="2">2</option>
+		</param-->
+		<param name="program" type="select" label="MGEScan">
+			<option selected="selected" value="B">Both</option>
+			<option value="L">LTR</option>
+			<option value="N">nonLTR</option>
+		</param>
+	</inputs>
+	<outputs>
+		<data format="ltr.out" name="output">
+			<filter>program != "N"</filter>
+		</data>
+		<data format="fasta" name="clade">
+			<filter>program != "L"</filter>
+		</data>
+		<data format="qfile" name="qvalue_en">
+			<filter>program != "L"</filter>
+		</data>
+		<data format="qfile" name="qvalue_rt">
+			<filter>program != "L"</filter>
+		</data>
+		<data format="gff3" name="ltr_gff3">
+			<filter>program != "N"</filter>
+		</data>
+		<data format="gff3" name="nonltr_gff3">
+			<filter>program != "L"</filter>
+		</data>
+
+	</outputs>
+	<help>
+Running the program
+===================
+
+To run MGEScan, select input genome data in From select box, and select program either LTR, nonLTR or both.
+
+Click 'Execute' button.
+
+If you like to have more options to run LTR or nonLTR progrma, use separated tools on the left panel.
+In LTR > MGEScan-LTR, preprocessing by repeatmasker and setting other variables are available e.g. distance(bp) between LTRs.
+
+Output
+============
+A. MGEScan_LTR:
+Upon completion, MGEScan-LTR generates a file "ltr.out". This output file has information
+about clusters and coordinates of LTR retrotransposons identified. Each cluster of LTR
+retrotransposons starts with the head line of "[cluster_number]---------", followed by
+the information of LTR retrotransposons in the cluster. The columns for LTR
+retrotransposons are as follows.
+
+1. LTR_id: unique id of LTRs identified. It consist of two components, sequence file name and id in the file. For example, chr1_2 is the second LTR retrotransposon in the chr1 file.
+2. start position of 5’ LTR.
+3. end position of 5’ LTR.
+4. start position of 3’ LTR.
+5. end position of 3’ LTR.
+6. strand: + or -.
+7. length of 5’ LTR.
+8. length of 3’ LTR.
+9. length of the LTR retrotransposon.
+10. TSD on the left side of the LTR retotransposons.
+11. TSD on the right side of the LTR retrotransposons.
+12. di(tri)nucleotide on the left side of 5’LTR
+13. di(tri)nucleotide on the right side of 5’LTR
+14. di(tri)nucleotide on the left side of 3’LTR
+15. di(tri)nucleotide on the right side of 3’LTR 
+
+B. MGEScan_nonLTR:
+   Upon completion, MGEScan-nonLTR generates the directory, "info" in the data directory you
+   specified. In this "info" directory, two sub-directories ("full" and "validation") are
+   generated.
+
+   - The "full" directory is for storing sequences of elements. Each subdirectory in "full" 
+   is the name of clade. In each directory of clade, the DNA sequences of nonLTRs identified
+   are listed. Each sequence is in fasta format. The header contains the position
+   information of TEs identified:
+   [genome_file_name]_[start position in the sequence]
+
+   For example, >chr1_333 means that this element start at 333bp in the "chr1" file.
+
+   - The "validation" directory is for storing Q values. In the files "en" and "rt", the first column corresponds to the element name and the last column Q value. 
+
+License
+============
+Copyright 2014 Mina Rho, Haixu Tang.
+You may redistribute this software under the terms of the GNU General Public License.
+
+</help>
+</tool>