# HG changeset patch
# User hyungrolee
# Date 1455523200 18000
# Node ID 088266bbf150f4f37b57a56470e27d86ad0abced
# Parent 33dfa472f8ef09bedabbee930b454f29fee8f6bd
Deleted selected files
diff -r 33dfa472f8ef -r 088266bbf150 mgescan.sh
--- a/mgescan.sh Mon Feb 15 01:50:27 2016 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,183 +0,0 @@
-#!/bin/bash
-# mgescan.sh $input $input.name 3 $output L None None None $ltr_gff3 None None $sw_rm "$scaffold" $min_dist $max_dist $min_len_ltr $max_len_ltr $ltr_sim_condition $cluster_sim_condition $len_condition $repeatmasker
-if [ "" == "$MGESCAN_SRC" ]
-then
- echo "\$MGESCAN_SRC is not defined."
- exit
-fi
-
-script_program=`which python`
-script=$MGESCAN_SRC/mgescan/cmd.py
-input_file=$1
-#input_file_name=$2
-input_file_name=`basename $input_file`
-hmmsearch_version=$3
-output_file=$4
-program=$5 # N is nonLTR, L is LTR and B is both
-# Optional output parameters for nonLTR
-clade=$6
-en=$7
-rt=$8
-ltr_gff3=$9
-nonltr_gff3=${10}
-both_gff3=${11}
-#### for ltr between $11 and $20
-if [ "$program" == "L" ]
-then
- sw_rm=${12}
- scaffold=${13}
- min_dist=${14}
- max_dist=${15}
- min_len_ltr=${16}
- max_len_ltr=${17}
- ltr_sim_condition=${18}
- cluster_sim_condition=${19}
- len_condition=${20}
- repeatmasker=${21}
-fi
-
-#elif [ "$program" == "B" ]
-if [ $# -eq 12 ]
-then
- nmpi=${12}
- if [ ! -z $nmpi ] && [ $nmpi -ge 1 ]
- then
- mpi_enabled="--mpi=$nmpi"
- fi
-
-fi
-
-# /nfs/nfs4/home/lee212/mgescan/galaxy-dist/tools/mgescan/find_ltr.sh /nfs/nfs4/home/lee212/mgescan/galaxy-dist/database/files/000/dataset_1.dat /nfs/nfs4/home/lee212/mgescan/galaxy-dist/database/files/000/dataset_3.dat
-
-#set path for transeq
-#export PATH=$user_dir/mgescan/EMBOSS/bin:/usr/bin:$PATH
-transeq --version 2> /dev/null
-res=$?
-if [ 0 -ne $res ]
-then
- echo "EMBOSS is not available."
- exit
-fi
-
-#move to the working directory
-work_dir=`dirname $script`
-cd $work_dir
-
-#create directory for input and output
-mkdir -p input
-t_dir=`mktemp -p input -d` #relative path
-input_dir="$work_dir/$t_dir/seq" # full path
-output_dir="$work_dir/$t_dir/data"
-mkdir -p $input_dir
-mkdir -p $output_dir
-
-#make a copy of input
-#/bin/cp $input_file $input_dir/$input_file_name
-
-# Check tar.gz
-tar tf $input_file &> /dev/null
-ISGZ=$?
-if [ 0 -eq $ISGZ ]
-then
- # It seems pre_process.pl creates ./data/genome directory and makes a copy of a genome file.
- # Due to this reason, extracts compressed inputs to output directory.
- tar xzf $input_file -C $input_dir 2> /dev/null
- if [ $? -ne 0 ]
- then
- tar xf $input_file -C $input_dir 2> /dev/null
- fi
-else
- /bin/ln -s $input_file $input_dir/$input_file_name
-fi
-
-VERSION2=`hmmsearch -h|grep "HMMER 2" 2> /dev/null`
-VERSION3=`hmmsearch -h|grep "HMMER 3" 2> /dev/null`
-if [ "2" == "$hmmsearch_version" ] && [ "" != "$VERSION2" ]
-then
- echo $VERSION2 selected.
-elif [ "3" == "$hmmsearch_version" ] && [ "" != "$VERSION3" ]
-then
- echo $VERSION3 selected.
-else
- echo HMMER is not available.
- exit
-fi
-
-if [ "$program" == "L" ]
-then
- program_name="ltr"
-elif [ "$program" == "N" ]
-then
- program_name="nonltr"
-else
- program_name="both"
-fi
-
-#run
-$script_program $script $program_name $input_dir/ --output=$output_dir/ $mpi_enabled #-hmmerv=$hmmsearch_version -sw_rm=${11} -scaffold=${12} -min_dist=${13} -max_dist=${14} -min_len_ltr=${15} -max_len_ltr=${16} -ltr_sim_condition=${17} -cluster_sim_condition=${18} -len_condition=${19}
-#/usr/bin/perl $script -genome=$input_dir/ -data=$output_dir/ -hmmerv=$hmmsearch_version -program=$program -sw_rm=${11} -scaffold=${12} -min_dist=${13} -max_dist=${14} -min_len_ltr=${15} -max_len_ltr=${16} -ltr_sim_condition=${17} -cluster_sim_condition=${18} -len_condition=${19}
-
-#RES=`ssh -i $user_dir/.ssh/.internal silo.cs.indiana.edu "/usr/bin/perl $script -genome=$input_dir/ -data=$output_dir/ -hmmerv=$hmmsearch_version -program=$program > /dev/null"`
-
-#make a copy of output
-if [ "$program" != "N" ]
-then
- /bin/cp $output_dir/ltr/ltr.out $output_file
- if [ "$ltr_gff3" != "None" ]
- then
- /bin/cp $output_dir/ltr/ltr.gff3 $ltr_gff3
- fi
-
- if [ "$repeatmasker" != "None" ] && [ "$repeatmasker" != "" ]
- then
- # chr2L.fa.cat.gz chr2L.fa.masked chr2L.fa.out chr2L.fa.out.pos chr2L.fa.tbl
- /bin/cp $output_dir/repeatmasker/${input_file_name}.out $repeatmasker
- fi
-fi
-if [ "$program" != "L" ]
-then
-
- tmp=`mktemp`
- RANDOM=`basename $tmp`
- compressed_file=$output_dir/$RANDOM.tar.gz
- /bin/tar czfP $compressed_file $output_dir/info
- #/bin/cp $compressed_file $output_file
- #RES=`/bin/cp $output_dir/info/full/*/* $clade 2> /dev/null`
- RES=`/bin/cp $compressed_file $clade 2> /dev/null`
- RES=`/bin/cp $output_dir/info/validation/en $en 2> /dev/null`
- RES=`/bin/cp $output_dir/info/validation/rt $rt 2> /dev/null`
- if [ "$nonltr_gff3" != "None" ]
- then
- /bin/cp $output_dir/info/nonltr.gff3 $nonltr_gff3
- # nonltr.gff3
- ##gff-version 3
- #chr2L.fa MGEScan_nonLTR mobile_genetic_element 19670384 19676921 . . . ID=chr2L.fa_19670384
- #chr2L.fa MGEScan_nonLTR mobile_genetic_element 17689430 17695994 . . . ID=chr2L.fa_17689430
- #chr2L.fa MGEScan_nonLTR mobile_genetic_element 11897186 11903717 . . . ID=chr2L.fa_11897186
- #chr2L.fa MGEScan_nonLTR mobile_genetic_element 49574 56174 . . . ID=chr2L.fa_49574
- fi
-
-#else
- # Both LTR, nonLTR executed
- #compressed_file=$output_dir/$RANDOM.tar.gz
- #/bin/tar czfP $compressed_file $output_dir
- #/bin/cp $compressed_file $output_file
-fi
-
-if [ "$program" == "B" ]
-then
- #echo "track name=LTR description=\"MGEScan-LTR\" color=0,0,255," > $both_gff3
- /bin/cat $output_dir/ltr/ltr.gff3 >> $both_gff3
- #echo "track name=nonLTR description=\"MGEScan-nonLTR\" color=255,0,0" >> $both_gff3
- /bin/cat $output_dir/info/nonltr.gff3 >> $both_gff3
-fi
-
-# delete temp directory
-if [ $? -eq 0 ]
-then
- rm -rf $work_dir/$t_dir
- #echo
-else
- #echo cp -pr $work_dir/$t_dir $work_dir/error-cases/
- cp -pr $work_dir/$t_dir $work_dir/error-cases/
-fi
diff -r 33dfa472f8ef -r 088266bbf150 mgescan.xml
--- a/mgescan.xml Mon Feb 15 01:50:27 2016 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,123 +0,0 @@
-
-
-
-
- MGEScan
-
-
- mgescan
-
- mgescan --version
-
- mgescan.sh $input '$input.name' 3 $output $program $clade $qvalue_en $qvalue_rt $ltr_gff3 $nonltr_gff3 $both_gff3 $mpi_yn.nmpi
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- program != "N"
-
-
- program != "L"
-
-
- program != "L"
-
-
- program != "L"
-
-
- program != "N"
-
-
- program != "L"
-
-
- program == "B"
-
-
-
-
-How to Run MGEScan
-===================
-
-* Select an input genome data from the select box, and choose a program. Both LTR and nonLTR of MGEScan is default.
-* Click 'Execute' button.
-* MPI will be enabled depending on your system support.
-
-If you like to have more options to run LTR or nonLTR program, use separated tools on the left panel.
-
-For example, in LTR > MGEScan-LTR, preprocessing by repeatmasker and setting other variables are available e.g. distance(bp) between LTRs.
-
-Output
-============
-
-A. MGEScan_LTR:
-
-Upon completion, MGEScan-LTR generates a file "ltr.out". This output file has information
-about clusters and coordinates of LTR retrotransposons identified. Each cluster of LTR
-retrotransposons starts with the head line of "[cluster_number]---------", followed by
-the information of LTR retrotransposons in the cluster. The columns for LTR
-retrotransposons are as follows.
-
- 1. LTR_id: unique id of LTRs identified. It consist of two components, sequence file name and id in the file. For example, chr1_2 is the second LTR retrotransposon in the chr1 file.
- 2. start position of 5’ LTR.
- 3. end position of 5’ LTR.
- 4. start position of 3’ LTR.
- 5. end position of 3’ LTR.
- 6. strand: + or -.
- 7. length of 5’ LTR.
- 8. length of 3’ LTR.
- 9. length of the LTR retrotransposon.
- 10. TSD on the left side of the LTR retotransposons.
- 11. TSD on the right side of the LTR retrotransposons.
- 12. di(tri)nucleotide on the left side of 5’LTR
- 13. di(tri)nucleotide on the right side of 5’LTR
- 14. di(tri)nucleotide on the left side of 3’LTR
- 15. di(tri)nucleotide on the right side of 3’LTR
-
-B. MGEScan_nonLTR:
- Upon completion, MGEScan-nonLTR generates the directory, "info" in the data directory you
- specified. In this "info" directory, two sub-directories ("full" and "validation") are
- generated.
-
- * The "full" directory is for storing sequences of elements. Each subdirectory in "full"
- is the name of clade. In each directory of clade, the DNA sequences of nonLTRs identified
- are listed. Each sequence is in fasta format. The header contains the position
- information of TEs identified: [genome_file_name]_[start position in the sequence]
-
- For example, >chr1_333 means that this element start at 333bp in the "chr1" file.
-
- * The "validation" directory is for storing Q values.
- In the files "en" and "rt", the first column corresponds to the element name and the last column Q value.
-
-License
-============
-Copyright 2015.
-You may redistribute this software under the terms of the GNU General Public License.
-
-
-
diff -r 33dfa472f8ef -r 088266bbf150 tool_dependencies.xml
--- a/tool_dependencies.xml Mon Feb 15 01:50:27 2016 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
- https://github.com/MGEScan/mgescan/archive/1.0.tar.gz
-
-
-
-
-
-
- python setup.py install
-
-
- $INSTALL_DIR/mgescan
-
-
-
- $INSTALL_DIR/mgescan
- $INSTALL_DIR
-
-
-
-
-Program: mgescan (Tools for identifying LTR & nonLTR)
-Version: 3.0
-
-Usage:
- mgescan both <genome_dir> [--output=<data_dir>] [--mpi=<num>]
- mgescan ltr <genome_dir> [--output=<data_dir>] [--mpi=<num>]
- mgescan nonltr <genome_dir> [--output=<data_dir>] [--mpi=<num>]
- mgescan (-h | --help)
- mgescan --version
-
-
-
-