Mercurial > repos > hyungrolee > mgescan

--- a/ltr.xml	Sat Jun 14 19:08:55 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,111 +0,0 @@
-<?xml version="1.0"?>
-
-<tool name="MGEScan-LTR" id="mgescan-ltr" version="0.0.1" workflow_compatible="false">
-	<description>
-		de novo identification of LTR retroelements
-	</description>
-	<!--
-	<action module="galaxy.tools.actions.nonltr" class="nonltrToolAction"/>
-	<requirements>
-		<requirement type="package">HMMER</requirement>
-		<requirement type="package">EMBOSS</requirement>
-	</requirements>
-	-->
-	<command interpreter="bash">
-		<!--
-		/u/lee212/retrotminer/MGEScan_LTR/find_ltr.pl -genome=/u/lee212/retrotminer/MGEScan_nonLTR_v2/anoGam1/
-		-data=/u/lee212/retrotminer/MGEScan_LTR/example/data/  -program=/u/lee212/retrotminer/MGEScan_LTR/
-		-->
-		<!--mgescan.sh $input $input.name $hmmver $output L None None None $ltr_gff3 None-->
-		<!--mgescan.sh $input $input.name 3 $output L None None None $ltr_gff3 None-->
-		mgescan.sh $input $input.name 3 $output L None None None $ltr_gff3 None $sw_rm "$scaffold" $min_dist $max_dist $min_len_ltr $max_len_ltr $ltr_sim_condition $cluster_sim_condition $len_condition $repeatmasker
-	</command>
-	<inputs>
-		<!--
-		<param name="genome" type="text" label="directory where input genome data exists" />
-		<param name="data" type="text" label="directory of the output" />
-		-->
-		<param format="txt" name="input" type="data" label="From"/>
-		<!--param name="hmmver" type="select" label="Hmmsearch version">
-			<option selected="selected" value="3">3</option>
-			<option value="2">2</option>
-		</param-->
-		<!-- path.conf -->
-		<param name="sw_rm" type="select" display="checkboxes" multiple="True" label="enable repeatmasker, if necessary" help="Use this option if you are enable repeatmasker">
-			<option value="Yes">Yes</option>
-		</param>
-		<param name="scaffold" type="text" label="path for the big file that has all scaffolds"/>
-		<!-- value.conf -->
-		<param name="min_dist" type="text" value="2000" label="minimum distance(bp) between LTRs" />
-		<param name="max_dist" type="text" value="20000" label="maximum distance(bp) between LTRs" />
-		<param name="min_len_ltr" type="text" value="130" label="minimum length(bp) of LTR"/>
-		<param name="max_len_ltr" type="text" value="2000" label="maximum length(bp) of LTR"/>
-		<param name="ltr_sim_condition" type="text" value="70" label="minimum similarity(%) for LTRs in an element"/>
-		<param name="cluster_sim_condition" type="text" value="70" label="inimum similarity(%) for LTRs in a cluster"/>
-		<param name="len_condition" type="text" value="70" label="minimum length(bp) for LTRs aligned in local alignment"/>
-	</inputs>
-	<outputs>
-		<data format="ltr.out" name="output" />
-		<data format="gff3" name="ltr_gff3" />
-		<data format="repeatmasker" name="repeatmasker" >
-			<filter>sw_rm == "Yes"</filter>
-		</data>
-	</outputs>
-	<help>
-Running the program
-===================
-
-To run MGEScan-LTR, follow the steps below,
-
-1. Specify options that you like to have:
-
-   * Check repeatmasker if you want to preprocess
-   * Check scaffold if the input file has all scaffolds.
-
-2. Update values:
-
-   * min_dist: minimum distance(bp) between LTRs.
-   * max_dist: maximum distance(bp) between LTRS
-   * min_len_ltr: minimum length(bp) of LTR.
-   * max_len_ltr: maximum length(bp) of LTR.
-   * ltr_sim_condition: minimum similarity(%) for LTRs in an element.
-   * cluster_sim_condition: minimum similarity(%) for LTRs in a cluster
-   * len_condition: minimum length(bp) for LTRs aligned in local alignment.
-
-4. Click 'Execute'
-
-   * mask known repeats other than LTR retrotransposons
-   * identify LTRs
-
-Output
-======
-
-Upon completion, MGEScan-LTR generates a file ltr.out. This output file has information
-about clusters and coordinates of LTR retrotransposons identified. Each cluster of LTR
-retrotransposons starts with the head line of [cluster_number]---------, followed by
-the information of LTR retrotransposons in the cluster. The columns for LTR
-retrotransposons are as follows.
-
-1. LTR_id: unique id of LTRs identified. It consist of two components, sequence file name and id in the file. For example, chr1_2 is the second LTR retrotransposon in the chr1 file.
-2. start position of 5 LTR.
-3. end position of 5 LTR.
-4. start position of 3 LTR.
-5. end position of 3 LTR.
-6. strand: + or -.
-7. length of 5 LTR.
-8. length of 3 LTR.
-9. length of the LTR retrotransposon.
-10. TSD on the left side of the LTR retotransposons.
-11. TSD on the right side of the LTR retrotransposons.
-12. di(tri)nucleotide on the left side of 5LTR
-13. di(tri)nucleotide on the right side of 5LTR
-14. di(tri)nucleotide on the left side of 3LTR
-15. di(tri)nucleotide on the right side of 3LTR
-
-License
-============
-
-Copyright 2014 Mina Rho, Haixu Tang.
-You may redistribute this software under the terms of the GNU General Public License.
-</help>
-</tool>
--- a/mgescan.sh	Sat Jun 14 19:08:55 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,126 +0,0 @@
-#!/bin/bash
-# mgescan.sh $input $input.name 3 $output L None None None $ltr_gff3 None $sw_rm "$scaffold" $min_dist $max_dist $min_len_ltr $max_len_ltr $ltr_sim_condition $cluster_sim_condition $len_condition $repeatmasker
-user_dir=/u/lee212
-#script=$user_dir/retrotminer/wazim/MGEScan1.1/run_MGEScan.pl
-#script=$user_dir/retrotminer/wazim/MGEScan1.3.1/run_MGEScan2.pl
-source $user_dir/virtualenv/retrotminer/bin/activate
-script_program=`which python`
-script=$user_dir/github/retrotminer/retrotminer/retrotminer.py
-input_file=$1
-input_file_name=$2
-hmmsearch_version=$3
-output_file=$4
-program=$5 # N is nonLTR, L is LTR and B is both
-# Optional output parameters for nonLTR
-clade=$6
-en=$7
-rt=$8
-ltr_gff3=$9
-nonltr_gff3=${10}
-#### for ltr between $11 and $20
-sw_rm=${11}
-scaffold=${12}
-min_dist=${13}
-max_dist=${14}
-min_len_ltr=${15}
-max_len_ltr=${16}
-ltr_sim_condition=${17}
-cluster_sim_condition=${18}
-len_condition=${19}
-repeatmasker=${20}
-
-# /nfs/nfs4/home/lee212/retrotminer/galaxy-dist/tools/retrotminer/find_ltr.sh /nfs/nfs4/home/lee212/retrotminer/galaxy-dist/database/files/000/dataset_1.dat /nfs/nfs4/home/lee212/retrotminer/galaxy-dist/database/files/000/dataset_3.dat
-
-#load env?
-source $user_dir/.bashrc
-source $user_dir/.bash_profile
-
-#set path for transeq
-export PATH=$user_dir/retrotminer/EMBOSS/bin:/usr/bin:$PATH
-
-#move to the working directory
-work_dir=`dirname $script`
-cd $work_dir
-
-#create directory for input and output
-mkdir -p input
-t_dir=`mktemp -p input -d` #relative path
-input_dir="$work_dir/$t_dir/seq" # full path
-output_dir="$work_dir/$t_dir/data"
-mkdir -p $input_dir
-mkdir -p $output_dir
-
-#make a copy of input
-/bin/cp $input_file $input_dir/$input_file_name
-
-if [ "2" == "$hmmsearch_version" ]
-then
-	export PATH=$user_dir/retrotminer/HMMER2.0/bin:$PATH
-else
-	export PATH=/usr/bin:$PATH
-fi
-
-if [ "$program" == "L" ]
-then
-	program_name="ltr"
-else
-	programname="nonltr"
-fi
-
-#run
-$script_program $script $program_name $input_dir/ --output=$output_dir/ #-hmmerv=$hmmsearch_version -sw_rm=${11} -scaffold=${12} -min_dist=${13} -max_dist=${14} -min_len_ltr=${15} -max_len_ltr=${16} -ltr_sim_condition=${17} -cluster_sim_condition=${18} -len_condition=${19}
-#/usr/bin/perl $script -genome=$input_dir/ -data=$output_dir/ -hmmerv=$hmmsearch_version -program=$program -sw_rm=${11} -scaffold=${12} -min_dist=${13} -max_dist=${14} -min_len_ltr=${15} -max_len_ltr=${16} -ltr_sim_condition=${17} -cluster_sim_condition=${18} -len_condition=${19}
-
-#RES=`ssh -i $user_dir/.ssh/.internal silo.cs.indiana.edu "/usr/bin/perl $script -genome=$input_dir/ -data=$output_dir/ -hmmerv=$hmmsearch_version -program=$program > /dev/null"`
-
-#make a copy of output
-if [ "$program" != "N" ]
-then
-	/bin/cp $output_dir/ltr/ltr.out $output_file
-	if [ "$ltr_gff3" != "None" ]
-	then
-		/bin/cp $output_dir/ltr/ltr.gff3 $ltr_gff3
-	fi
-
-	if [ "$repeatmasker" != "None" ]
-	then
-		# chr2L.fa.cat.gz  chr2L.fa.masked  chr2L.fa.out  chr2L.fa.out.pos  chr2L.fa.tbl
-		/bin/cp $output_dir/repeatmasker/${input_file_name}.out $repeatmasker
-	fi
-fi
-if [ "$program" != "L" ]
-then
-
-	#compressed_file=$output_dir/$RANDOM.tar.gz
-	#/bin/tar cvzfP $compressed_file $output_dir/info
-	#/bin/cp $compressed_file $output_file
-	/bin/cp $output_dir/info/full/*/* $clade
-	/bin/cp $output_dir/info/validation/en $en
-	/bin/cp $output_dir/info/validation/rt $rt
-	if [ "$nonltr_gff3" != "None" ]
-	then
-		/bin/cp $output_dir/info/nonltr.gff3 $nonltr_gff3
-		# nonltr.gff3
-		##gff-version 3
-		#chr2L.fa        MGEScan_nonLTR  mobile_genetic_element  19670384        19676921        .       .       .       ID=chr2L.fa_19670384
-		#chr2L.fa        MGEScan_nonLTR  mobile_genetic_element  17689430        17695994        .       .       .       ID=chr2L.fa_17689430
-		#chr2L.fa        MGEScan_nonLTR  mobile_genetic_element  11897186        11903717        .       .       .       ID=chr2L.fa_11897186
-		#chr2L.fa        MGEScan_nonLTR  mobile_genetic_element  49574   56174   .       .       .       ID=chr2L.fa_49574
-	fi
-
-#else
-	# Both LTR, nonLTR executed
-	#compressed_file=$output_dir/$RANDOM.tar.gz
-	#/bin/tar cvzfP $compressed_file $output_dir
-	#/bin/cp $compressed_file $output_file
-fi
-
-# delete temp directory
-if [ $? -eq 0 ]
-then
-	rm -rf $work_dir/$t_dir
-	#echo
-else
-	#echo cp -pr $work_dir/$t_dir $work_dir/error-cases/
-	cp -pr $work_dir/$t_dir $work_dir/error-cases/
-fi
--- a/mgescan.xml	Sat Jun 14 19:08:55 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-<?xml version="1.0"?>
-
-<tool name="MGEScan" id="mgescan" version="0.0.1" workflow_compatible="false">
-	<description>
-		MGEScan
-	</description>
-	<command interpreter="bash">
-		mgescan.sh $input '$input.name' 3 $output $program $clade $qvalue_en $qvalue_rt $ltr_gff3 $nonltr_gff3
-		<!-- mgescan.sh $input $input.name $hmmver $output $program $clade $qvalue_en $qvalue_rt $ltr_gff3 $nonltr_gff3 -->
-	</command>
-	<inputs>
-		<param format="txt" name="input" type="data" label="From"/>
-		<!--param name="hmmver" type="select" label="Hmmsearch version">
-			<option selected="selected" value="3">3</option>
-			<option value="2">2</option>
-		</param-->
-		<param name="program" type="select" label="MGEScan">
-			<option selected="selected" value="B">Both</option>
-			<option value="L">LTR</option>
-			<option value="N">nonLTR</option>
-		</param>
-	</inputs>
-	<outputs>
-		<data format="ltr.out" name="output">
-			<filter>program != "N"</filter>
-		</data>
-		<data format="fasta" name="clade">
-			<filter>program != "L"</filter>
-		</data>
-		<data format="qfile" name="qvalue_en">
-			<filter>program != "L"</filter>
-		</data>
-		<data format="qfile" name="qvalue_rt">
-			<filter>program != "L"</filter>
-		</data>
-		<data format="gff3" name="ltr_gff3">
-			<filter>program != "N"</filter>
-		</data>
-		<data format="gff3" name="nonltr_gff3">
-			<filter>program != "L"</filter>
-		</data>
-
-	</outputs>
-	<help>
-Running the program
-===================
-
-To run MGEScan, select input genome data in From select box, and select program either LTR, nonLTR or both.
-
-Click 'Execute' button.
-
-If you like to have more options to run LTR or nonLTR progrma, use separated tools on the left panel.
-In LTR > MGEScan-LTR, preprocessing by repeatmasker and setting other variables are available e.g. distance(bp) between LTRs.
-
-Output
-============
-A. MGEScan_LTR:
-Upon completion, MGEScan-LTR generates a file "ltr.out". This output file has information
-about clusters and coordinates of LTR retrotransposons identified. Each cluster of LTR
-retrotransposons starts with the head line of "[cluster_number]---------", followed by
-the information of LTR retrotransposons in the cluster. The columns for LTR
-retrotransposons are as follows.
-
-1. LTR_id: unique id of LTRs identified. It consist of two components, sequence file name and id in the file. For example, chr1_2 is the second LTR retrotransposon in the chr1 file.
-2. start position of 5’ LTR.
-3. end position of 5’ LTR.
-4. start position of 3’ LTR.
-5. end position of 3’ LTR.
-6. strand: + or -.
-7. length of 5’ LTR.
-8. length of 3’ LTR.
-9. length of the LTR retrotransposon.
-10. TSD on the left side of the LTR retotransposons.
-11. TSD on the right side of the LTR retrotransposons.
-12. di(tri)nucleotide on the left side of 5’LTR
-13. di(tri)nucleotide on the right side of 5’LTR
-14. di(tri)nucleotide on the left side of 3’LTR
-15. di(tri)nucleotide on the right side of 3’LTR
-
-B. MGEScan_nonLTR:
-   Upon completion, MGEScan-nonLTR generates the directory, "info" in the data directory you
-   specified. In this "info" directory, two sub-directories ("full" and "validation") are
-   generated.
-
-   - The "full" directory is for storing sequences of elements. Each subdirectory in "full"
-   is the name of clade. In each directory of clade, the DNA sequences of nonLTRs identified
-   are listed. Each sequence is in fasta format. The header contains the position
-   information of TEs identified:
-   [genome_file_name]_[start position in the sequence]
-
-   For example, >chr1_333 means that this element start at 333bp in the "chr1" file.
-
-   - The "validation" directory is for storing Q values. In the files "en" and "rt", the first column corresponds to the element name and the last column Q value.
-
-License
-============
-Copyright 2014 Mina Rho, Haixu Tang.
-You may redistribute this software under the terms of the GNU General Public License.
-
-</help>
-</tool>
--- a/nonltr.xml	Sat Jun 14 19:08:55 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-<?xml version="1.0"?>
-
-<tool name="MGEScan-nonLTR" id="mgescan-nonltr" version="0.0.1" workflow_compatible="false">
-	<description>
-		computational identification and classification of non-LTR retroelements
-	</description>
-	<!--
-	<action module="galaxy.tools.actions.nonltr" class="nonltrToolAction"/>
-	<requirements>
-		<requirement type="package">HMMER</requirement>
-		<requirement type="package">EMBOSS</requirement>
-	</requirements>
-	-->
-	<command interpreter="bash">
-		<!--	mgescan.sh $input $input.name $hmmver None N $clade $qvalue_en $qvalue_rt None $nonltr_gff3 -->
-			mgescan.sh $input $input.name 3 None N $clade $qvalue_en $qvalue_rt None $nonltr_gff3
-	</command>
-	<inputs>
-		<!--
-		<param name="genome" type="text" label="directory where input genome data exists" />
-		<param name="data" type="text" label="directory of the output" />
-		-->
-		<param format="fasta" name="input" type="data" label="From"/>
-		<!--param name="hmmver" type="select" label="Hmmsearch version">
-			<option selected="selected" value="3">3</option>
-			<option value="2">2</option>
-		</param-->
-	</inputs>
-	<outputs>
-		<data format="fasta" name="clade" />
-		<data format="qfile" name="qvalue_en" />
-		<data format="qfile" name="qvalue_rt" />
-		<data format="gff3" name="nonltr_gff3" />
-	</outputs>
-	<help>
-
-Running the program
-===================
-
-To run MGEScan-nonLTR, follow the steps below:
-
-1. Select genome files a select box. You can upload your genome files through 'Get Data' at Tools menu bar.
-
-2. Click 'Execute' button. This tool reads your genome files and runs the whole process.
-
-Output
-======
-
-Upon completion, MGEScan-nonLTR generates output, "info" in the data directory you specified. In this "info" directory, two sub-directories ("full" and "validation") are generated.
-
- - The "full" directory is for storing sequences of elements. Each subdirectory in "full" is the name of clade. In each directory of clade, the DNA sequences of nonLTRs identified are listed. Each sequence is in fasta format. The header contains the position information of TEs identified, [genome_file_name]_[start position in the sequence]
-
- - For example, >chr1_333 means that this element start at 333bp in the "chr1" file. - The "validation" directory is for storing Q values. In the files "en" and "rt", the first column corresponds to the element name and the last column Q value.
-
-License
-============
-Copyright 2014 Mina Rho, Haixu Tang.
-You may redistribute this software under the terms of the GNU General Public License.
-
-	</help>
-</tool>