Mercurial > repos > hyungrolee > mgescan_test
changeset 5:088266bbf150 draft default tip
Deleted selected files
author | hyungrolee |
---|---|
date | Mon, 15 Feb 2016 03:00:00 -0500 |
parents | 33dfa472f8ef |
children | |
files | mgescan.sh mgescan.xml tool_dependencies.xml |
diffstat | 3 files changed, 0 insertions(+), 360 deletions(-) [+] |
line wrap: on
line diff
--- a/mgescan.sh Mon Feb 15 01:50:27 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,183 +0,0 @@ -#!/bin/bash -# mgescan.sh $input $input.name 3 $output L None None None $ltr_gff3 None None $sw_rm "$scaffold" $min_dist $max_dist $min_len_ltr $max_len_ltr $ltr_sim_condition $cluster_sim_condition $len_condition $repeatmasker -if [ "" == "$MGESCAN_SRC" ] -then - echo "\$MGESCAN_SRC is not defined." - exit -fi - -script_program=`which python` -script=$MGESCAN_SRC/mgescan/cmd.py -input_file=$1 -#input_file_name=$2 -input_file_name=`basename $input_file` -hmmsearch_version=$3 -output_file=$4 -program=$5 # N is nonLTR, L is LTR and B is both -# Optional output parameters for nonLTR -clade=$6 -en=$7 -rt=$8 -ltr_gff3=$9 -nonltr_gff3=${10} -both_gff3=${11} -#### for ltr between $11 and $20 -if [ "$program" == "L" ] -then - sw_rm=${12} - scaffold=${13} - min_dist=${14} - max_dist=${15} - min_len_ltr=${16} - max_len_ltr=${17} - ltr_sim_condition=${18} - cluster_sim_condition=${19} - len_condition=${20} - repeatmasker=${21} -fi - -#elif [ "$program" == "B" ] -if [ $# -eq 12 ] -then - nmpi=${12} - if [ ! -z $nmpi ] && [ $nmpi -ge 1 ] - then - mpi_enabled="--mpi=$nmpi" - fi - -fi - -# /nfs/nfs4/home/lee212/mgescan/galaxy-dist/tools/mgescan/find_ltr.sh /nfs/nfs4/home/lee212/mgescan/galaxy-dist/database/files/000/dataset_1.dat /nfs/nfs4/home/lee212/mgescan/galaxy-dist/database/files/000/dataset_3.dat - -#set path for transeq -#export PATH=$user_dir/mgescan/EMBOSS/bin:/usr/bin:$PATH -transeq --version 2> /dev/null -res=$? -if [ 0 -ne $res ] -then - echo "EMBOSS is not available." - exit -fi - -#move to the working directory -work_dir=`dirname $script` -cd $work_dir - -#create directory for input and output -mkdir -p input -t_dir=`mktemp -p input -d` #relative path -input_dir="$work_dir/$t_dir/seq" # full path -output_dir="$work_dir/$t_dir/data" -mkdir -p $input_dir -mkdir -p $output_dir - -#make a copy of input -#/bin/cp $input_file $input_dir/$input_file_name - -# Check tar.gz -tar tf $input_file &> /dev/null -ISGZ=$? -if [ 0 -eq $ISGZ ] -then - # It seems pre_process.pl creates ./data/genome directory and makes a copy of a genome file. - # Due to this reason, extracts compressed inputs to output directory. - tar xzf $input_file -C $input_dir 2> /dev/null - if [ $? -ne 0 ] - then - tar xf $input_file -C $input_dir 2> /dev/null - fi -else - /bin/ln -s $input_file $input_dir/$input_file_name -fi - -VERSION2=`hmmsearch -h|grep "HMMER 2" 2> /dev/null` -VERSION3=`hmmsearch -h|grep "HMMER 3" 2> /dev/null` -if [ "2" == "$hmmsearch_version" ] && [ "" != "$VERSION2" ] -then - echo $VERSION2 selected. -elif [ "3" == "$hmmsearch_version" ] && [ "" != "$VERSION3" ] -then - echo $VERSION3 selected. -else - echo HMMER is not available. - exit -fi - -if [ "$program" == "L" ] -then - program_name="ltr" -elif [ "$program" == "N" ] -then - program_name="nonltr" -else - program_name="both" -fi - -#run -$script_program $script $program_name $input_dir/ --output=$output_dir/ $mpi_enabled #-hmmerv=$hmmsearch_version -sw_rm=${11} -scaffold=${12} -min_dist=${13} -max_dist=${14} -min_len_ltr=${15} -max_len_ltr=${16} -ltr_sim_condition=${17} -cluster_sim_condition=${18} -len_condition=${19} -#/usr/bin/perl $script -genome=$input_dir/ -data=$output_dir/ -hmmerv=$hmmsearch_version -program=$program -sw_rm=${11} -scaffold=${12} -min_dist=${13} -max_dist=${14} -min_len_ltr=${15} -max_len_ltr=${16} -ltr_sim_condition=${17} -cluster_sim_condition=${18} -len_condition=${19} - -#RES=`ssh -i $user_dir/.ssh/.internal silo.cs.indiana.edu "/usr/bin/perl $script -genome=$input_dir/ -data=$output_dir/ -hmmerv=$hmmsearch_version -program=$program > /dev/null"` - -#make a copy of output -if [ "$program" != "N" ] -then - /bin/cp $output_dir/ltr/ltr.out $output_file - if [ "$ltr_gff3" != "None" ] - then - /bin/cp $output_dir/ltr/ltr.gff3 $ltr_gff3 - fi - - if [ "$repeatmasker" != "None" ] && [ "$repeatmasker" != "" ] - then - # chr2L.fa.cat.gz chr2L.fa.masked chr2L.fa.out chr2L.fa.out.pos chr2L.fa.tbl - /bin/cp $output_dir/repeatmasker/${input_file_name}.out $repeatmasker - fi -fi -if [ "$program" != "L" ] -then - - tmp=`mktemp` - RANDOM=`basename $tmp` - compressed_file=$output_dir/$RANDOM.tar.gz - /bin/tar czfP $compressed_file $output_dir/info - #/bin/cp $compressed_file $output_file - #RES=`/bin/cp $output_dir/info/full/*/* $clade 2> /dev/null` - RES=`/bin/cp $compressed_file $clade 2> /dev/null` - RES=`/bin/cp $output_dir/info/validation/en $en 2> /dev/null` - RES=`/bin/cp $output_dir/info/validation/rt $rt 2> /dev/null` - if [ "$nonltr_gff3" != "None" ] - then - /bin/cp $output_dir/info/nonltr.gff3 $nonltr_gff3 - # nonltr.gff3 - ##gff-version 3 - #chr2L.fa MGEScan_nonLTR mobile_genetic_element 19670384 19676921 . . . ID=chr2L.fa_19670384 - #chr2L.fa MGEScan_nonLTR mobile_genetic_element 17689430 17695994 . . . ID=chr2L.fa_17689430 - #chr2L.fa MGEScan_nonLTR mobile_genetic_element 11897186 11903717 . . . ID=chr2L.fa_11897186 - #chr2L.fa MGEScan_nonLTR mobile_genetic_element 49574 56174 . . . ID=chr2L.fa_49574 - fi - -#else - # Both LTR, nonLTR executed - #compressed_file=$output_dir/$RANDOM.tar.gz - #/bin/tar czfP $compressed_file $output_dir - #/bin/cp $compressed_file $output_file -fi - -if [ "$program" == "B" ] -then - #echo "track name=LTR description=\"MGEScan-LTR\" color=0,0,255," > $both_gff3 - /bin/cat $output_dir/ltr/ltr.gff3 >> $both_gff3 - #echo "track name=nonLTR description=\"MGEScan-nonLTR\" color=255,0,0" >> $both_gff3 - /bin/cat $output_dir/info/nonltr.gff3 >> $both_gff3 -fi - -# delete temp directory -if [ $? -eq 0 ] -then - rm -rf $work_dir/$t_dir - #echo -else - #echo cp -pr $work_dir/$t_dir $work_dir/error-cases/ - cp -pr $work_dir/$t_dir $work_dir/error-cases/ -fi
--- a/mgescan.xml Mon Feb 15 01:50:27 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,123 +0,0 @@ -<?xml version="1.0"?> - -<tool name="MGEScan" id="mgescan" version="0.0.2"> - <description> - MGEScan - </description> - <requirements> - <requirement type="package" version="3.0">mgescan</requirement> - </requirements> - <version_command>mgescan --version</version_command> - <command interpreter="bash"> - mgescan.sh $input '$input.name' 3 $output $program $clade $qvalue_en $qvalue_rt $ltr_gff3 $nonltr_gff3 $both_gff3 $mpi_yn.nmpi - <!-- mgescan.sh $input $input.name $hmmver $output $program $clade $qvalue_en $qvalue_rt $ltr_gff3 $nonltr_gff3 --> - </command> - <inputs> - <param format="fasta,tabular,data" name="input" type="data" label="Input FASTA file(s)"/> - <!--param name="hmmver" type="select" label="Hmmsearch version"> - <option selected="selected" value="3">3</option> - <option value="2">2</option> - </param--> - <param name="program" type="select" label="MGEScan"> - <option selected="selected" value="B">Both</option> - <option value="L">LTR</option> - <option value="N">nonLTR</option> - </param> - <conditional name="mpi_yn"> - <param name="mpi_select" type="select" label="Enable MPI"> - <option value="no_mpi">No</option> - <option value="yes_mpi">Yes</option> - </param> - <when value="yes_mpi"> - <param name="nmpi" format="txt" type="text" value="1" label="Number of MPI Processes"/> - </when> - <when value="no_mpi"> - <param name="nmpi" type="hidden" value="0"/> - </when> - </conditional> - </inputs> - <outputs> - <data format="ltr.out" name="output" label="LTR Results (ltr.out)"> - <filter>program != "N"</filter> - </data> - <data format="fasta" name="clade" label="clade file (FASTA)"> - <filter>program != "L"</filter> - </data> - <data format="qfile" name="qvalue_en" label="qvalue_en"> - <filter>program != "L"</filter> - </data> - <data format="qfile" name="qvalue_rt" label="qvalue_rt"> - <filter>program != "L"</filter> - </data> - <data format="gff3" name="ltr_gff3" label="GFF3 for LTR"> - <filter>program != "N"</filter> - </data> - <data format="gff3" name="nonltr_gff3" label="GFF3 for nonLTR"> - <filter>program != "L"</filter> - </data> - <data format="gff3" name="both_gff3" label="GFF3 for LTR and nonLTR"> - <filter>program == "B"</filter> - </data> - - </outputs> - <help> -How to Run MGEScan -=================== - -* Select an input genome data from the select box, and choose a program. Both LTR and nonLTR of MGEScan is default. -* Click 'Execute' button. -* MPI will be enabled depending on your system support. - -If you like to have more options to run LTR or nonLTR program, use separated tools on the left panel. - -For example, in LTR > MGEScan-LTR, preprocessing by repeatmasker and setting other variables are available e.g. distance(bp) between LTRs. - -Output -============ - -A. MGEScan_LTR: - -Upon completion, MGEScan-LTR generates a file "ltr.out". This output file has information -about clusters and coordinates of LTR retrotransposons identified. Each cluster of LTR -retrotransposons starts with the head line of "[cluster_number]---------", followed by -the information of LTR retrotransposons in the cluster. The columns for LTR -retrotransposons are as follows. - - 1. LTR_id: unique id of LTRs identified. It consist of two components, sequence file name and id in the file. For example, chr1_2 is the second LTR retrotransposon in the chr1 file. - 2. start position of 5’ LTR. - 3. end position of 5’ LTR. - 4. start position of 3’ LTR. - 5. end position of 3’ LTR. - 6. strand: + or -. - 7. length of 5’ LTR. - 8. length of 3’ LTR. - 9. length of the LTR retrotransposon. - 10. TSD on the left side of the LTR retotransposons. - 11. TSD on the right side of the LTR retrotransposons. - 12. di(tri)nucleotide on the left side of 5’LTR - 13. di(tri)nucleotide on the right side of 5’LTR - 14. di(tri)nucleotide on the left side of 3’LTR - 15. di(tri)nucleotide on the right side of 3’LTR - -B. MGEScan_nonLTR: - Upon completion, MGEScan-nonLTR generates the directory, "info" in the data directory you - specified. In this "info" directory, two sub-directories ("full" and "validation") are - generated. - - * The "full" directory is for storing sequences of elements. Each subdirectory in "full" - is the name of clade. In each directory of clade, the DNA sequences of nonLTRs identified - are listed. Each sequence is in fasta format. The header contains the position - information of TEs identified: [genome_file_name]_[start position in the sequence] - - For example, >chr1_333 means that this element start at 333bp in the "chr1" file. - - * The "validation" directory is for storing Q values. - In the files "en" and "rt", the first column corresponds to the element name and the last column Q value. - -License -============ -Copyright 2015. -You may redistribute this software under the terms of the GNU General Public License. - -</help> -</tool>
--- a/tool_dependencies.xml Mon Feb 15 01:50:27 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,54 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="trf" version="4.0"> - <repository changeset_revision="a2e1d1f25e35" name="tandem_repeats_finder" owner="urgi-team" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - <package name="mgescan" version="3"> - <install version="1.0"> - <actions_group> - <!-- - <actions architecture="x86_64" os="linux"> - <action type="download_by_url"> - https://github.com/MGEScan/mgescan/archive/1.0.tar.gz - </action> - <action type="move_directory_files"> - <source_directory>.</source_directory> - <destination_directory>$INSTALL_DIR</destination_directory> - </action> - </actions> - --> - <actions> - <action type="download_by_url"> - https://github.com/MGEScan/mgescan/archive/1.0.tar.gz - </action> - <action type="set_environment_for_install"> - <repository changeset_revision="a2e1d1f25e35" name="tandem_repeats_finder" owner="urgi-team" toolshed="https://toolshed.g2.bx.psu.edu"> - <package name="trf" version="4.0" /> - </repository> - </action> - <action type="shell_command">python setup.py install</action> - <action type="move_file"> - <source>mgescan</source> - <destination>$INSTALL_DIR/mgescan</destination> - </action> - </actions> - <action type="set_environment"> - <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/mgescan</environment_variable> - <environment_variable action="set_to" name="MGESCAN_SRC">$INSTALL_DIR</environment_variable> - </action> - </actions_group> - </install> - <readme> -Program: mgescan (Tools for identifying LTR & nonLTR) -Version: 3.0 - -Usage: - mgescan both <genome_dir> [--output=<data_dir>] [--mpi=<num>] - mgescan ltr <genome_dir> [--output=<data_dir>] [--mpi=<num>] - mgescan nonltr <genome_dir> [--output=<data_dir>] [--mpi=<num>] - mgescan (-h | --help) - mgescan --version - - </readme> - </package> -</tool_dependency>