0
|
1 #!/bin/bash
|
|
2 # mgescan.sh $input $input.name 3 $output L None None None $ltr_gff3 None None $sw_rm "$scaffold" $min_dist $max_dist $min_len_ltr $max_len_ltr $ltr_sim_condition $cluster_sim_condition $len_condition $repeatmasker
|
|
3 if [ "" == "$MGESCAN_SRC" ]
|
|
4 then
|
3
|
5 echo "\$MGESCAN_SRC is not defined."
|
0
|
6 exit
|
|
7 fi
|
|
8
|
|
9 script_program=`which python`
|
|
10 script=$MGESCAN_SRC/mgescan/cmd.py
|
|
11 input_file=$1
|
|
12 #input_file_name=$2
|
|
13 input_file_name=`basename $input_file`
|
|
14 hmmsearch_version=$3
|
|
15 output_file=$4
|
|
16 program=$5 # N is nonLTR, L is LTR and B is both
|
|
17 # Optional output parameters for nonLTR
|
|
18 clade=$6
|
|
19 en=$7
|
|
20 rt=$8
|
|
21 ltr_gff3=$9
|
|
22 nonltr_gff3=${10}
|
|
23 both_gff3=${11}
|
|
24 #### for ltr between $11 and $20
|
|
25 if [ "$program" == "L" ]
|
|
26 then
|
|
27 sw_rm=${12}
|
|
28 scaffold=${13}
|
|
29 min_dist=${14}
|
|
30 max_dist=${15}
|
|
31 min_len_ltr=${16}
|
|
32 max_len_ltr=${17}
|
|
33 ltr_sim_condition=${18}
|
|
34 cluster_sim_condition=${19}
|
|
35 len_condition=${20}
|
|
36 repeatmasker=${21}
|
|
37 fi
|
|
38
|
|
39 #elif [ "$program" == "B" ]
|
|
40 if [ $# -eq 12 ]
|
|
41 then
|
|
42 nmpi=${12}
|
|
43 if [ ! -z $nmpi ] && [ $nmpi -ge 1 ]
|
|
44 then
|
|
45 mpi_enabled="--mpi=$nmpi"
|
|
46 fi
|
|
47
|
|
48 fi
|
|
49
|
|
50 # /nfs/nfs4/home/lee212/mgescan/galaxy-dist/tools/mgescan/find_ltr.sh /nfs/nfs4/home/lee212/mgescan/galaxy-dist/database/files/000/dataset_1.dat /nfs/nfs4/home/lee212/mgescan/galaxy-dist/database/files/000/dataset_3.dat
|
|
51
|
|
52 #set path for transeq
|
|
53 #export PATH=$user_dir/mgescan/EMBOSS/bin:/usr/bin:$PATH
|
|
54 transeq --version 2> /dev/null
|
|
55 res=$?
|
|
56 if [ 0 -ne $res ]
|
|
57 then
|
|
58 echo "EMBOSS is not available."
|
|
59 exit
|
|
60 fi
|
|
61
|
|
62 #move to the working directory
|
|
63 work_dir=`dirname $script`
|
|
64 cd $work_dir
|
|
65
|
|
66 #create directory for input and output
|
|
67 mkdir -p input
|
|
68 t_dir=`mktemp -p input -d` #relative path
|
|
69 input_dir="$work_dir/$t_dir/seq" # full path
|
|
70 output_dir="$work_dir/$t_dir/data"
|
|
71 mkdir -p $input_dir
|
|
72 mkdir -p $output_dir
|
|
73
|
|
74 #make a copy of input
|
|
75 #/bin/cp $input_file $input_dir/$input_file_name
|
|
76
|
|
77 # Check tar.gz
|
|
78 tar tf $input_file &> /dev/null
|
|
79 ISGZ=$?
|
|
80 if [ 0 -eq $ISGZ ]
|
|
81 then
|
|
82 # It seems pre_process.pl creates ./data/genome directory and makes a copy of a genome file.
|
|
83 # Due to this reason, extracts compressed inputs to output directory.
|
|
84 tar xzf $input_file -C $input_dir 2> /dev/null
|
|
85 if [ $? -ne 0 ]
|
|
86 then
|
|
87 tar xf $input_file -C $input_dir 2> /dev/null
|
|
88 fi
|
|
89 else
|
|
90 /bin/ln -s $input_file $input_dir/$input_file_name
|
|
91 fi
|
|
92
|
|
93 VERSION2=`hmmsearch -h|grep "HMMER 2" 2> /dev/null`
|
|
94 VERSION3=`hmmsearch -h|grep "HMMER 3" 2> /dev/null`
|
|
95 if [ "2" == "$hmmsearch_version" ] && [ "" != "$VERSION2" ]
|
|
96 then
|
|
97 echo $VERSION2 selected.
|
|
98 elif [ "3" == "$hmmsearch_version" ] && [ "" != "$VERSION3" ]
|
|
99 then
|
|
100 echo $VERSION3 selected.
|
|
101 else
|
|
102 echo HMMER is not available.
|
|
103 exit
|
|
104 fi
|
|
105
|
|
106 if [ "$program" == "L" ]
|
|
107 then
|
|
108 program_name="ltr"
|
|
109 elif [ "$program" == "N" ]
|
|
110 then
|
|
111 program_name="nonltr"
|
|
112 else
|
|
113 program_name="both"
|
|
114 fi
|
|
115
|
|
116 #run
|
|
117 $script_program $script $program_name $input_dir/ --output=$output_dir/ $mpi_enabled #-hmmerv=$hmmsearch_version -sw_rm=${11} -scaffold=${12} -min_dist=${13} -max_dist=${14} -min_len_ltr=${15} -max_len_ltr=${16} -ltr_sim_condition=${17} -cluster_sim_condition=${18} -len_condition=${19}
|
|
118 #/usr/bin/perl $script -genome=$input_dir/ -data=$output_dir/ -hmmerv=$hmmsearch_version -program=$program -sw_rm=${11} -scaffold=${12} -min_dist=${13} -max_dist=${14} -min_len_ltr=${15} -max_len_ltr=${16} -ltr_sim_condition=${17} -cluster_sim_condition=${18} -len_condition=${19}
|
|
119
|
|
120 #RES=`ssh -i $user_dir/.ssh/.internal silo.cs.indiana.edu "/usr/bin/perl $script -genome=$input_dir/ -data=$output_dir/ -hmmerv=$hmmsearch_version -program=$program > /dev/null"`
|
|
121
|
|
122 #make a copy of output
|
|
123 if [ "$program" != "N" ]
|
|
124 then
|
|
125 /bin/cp $output_dir/ltr/ltr.out $output_file
|
|
126 if [ "$ltr_gff3" != "None" ]
|
|
127 then
|
|
128 /bin/cp $output_dir/ltr/ltr.gff3 $ltr_gff3
|
|
129 fi
|
|
130
|
|
131 if [ "$repeatmasker" != "None" ] && [ "$repeatmasker" != "" ]
|
|
132 then
|
|
133 # chr2L.fa.cat.gz chr2L.fa.masked chr2L.fa.out chr2L.fa.out.pos chr2L.fa.tbl
|
|
134 /bin/cp $output_dir/repeatmasker/${input_file_name}.out $repeatmasker
|
|
135 fi
|
|
136 fi
|
|
137 if [ "$program" != "L" ]
|
|
138 then
|
|
139
|
|
140 tmp=`mktemp`
|
|
141 RANDOM=`basename $tmp`
|
|
142 compressed_file=$output_dir/$RANDOM.tar.gz
|
|
143 /bin/tar czfP $compressed_file $output_dir/info
|
|
144 #/bin/cp $compressed_file $output_file
|
|
145 #RES=`/bin/cp $output_dir/info/full/*/* $clade 2> /dev/null`
|
|
146 RES=`/bin/cp $compressed_file $clade 2> /dev/null`
|
|
147 RES=`/bin/cp $output_dir/info/validation/en $en 2> /dev/null`
|
|
148 RES=`/bin/cp $output_dir/info/validation/rt $rt 2> /dev/null`
|
|
149 if [ "$nonltr_gff3" != "None" ]
|
|
150 then
|
|
151 /bin/cp $output_dir/info/nonltr.gff3 $nonltr_gff3
|
|
152 # nonltr.gff3
|
|
153 ##gff-version 3
|
|
154 #chr2L.fa MGEScan_nonLTR mobile_genetic_element 19670384 19676921 . . . ID=chr2L.fa_19670384
|
|
155 #chr2L.fa MGEScan_nonLTR mobile_genetic_element 17689430 17695994 . . . ID=chr2L.fa_17689430
|
|
156 #chr2L.fa MGEScan_nonLTR mobile_genetic_element 11897186 11903717 . . . ID=chr2L.fa_11897186
|
|
157 #chr2L.fa MGEScan_nonLTR mobile_genetic_element 49574 56174 . . . ID=chr2L.fa_49574
|
|
158 fi
|
|
159
|
|
160 #else
|
|
161 # Both LTR, nonLTR executed
|
|
162 #compressed_file=$output_dir/$RANDOM.tar.gz
|
|
163 #/bin/tar czfP $compressed_file $output_dir
|
|
164 #/bin/cp $compressed_file $output_file
|
|
165 fi
|
|
166
|
|
167 if [ "$program" == "B" ]
|
|
168 then
|
|
169 #echo "track name=LTR description=\"MGEScan-LTR\" color=0,0,255," > $both_gff3
|
|
170 /bin/cat $output_dir/ltr/ltr.gff3 >> $both_gff3
|
|
171 #echo "track name=nonLTR description=\"MGEScan-nonLTR\" color=255,0,0" >> $both_gff3
|
|
172 /bin/cat $output_dir/info/nonltr.gff3 >> $both_gff3
|
|
173 fi
|
|
174
|
|
175 # delete temp directory
|
|
176 if [ $? -eq 0 ]
|
|
177 then
|
|
178 rm -rf $work_dir/$t_dir
|
|
179 #echo
|
|
180 else
|
|
181 #echo cp -pr $work_dir/$t_dir $work_dir/error-cases/
|
|
182 cp -pr $work_dir/$t_dir $work_dir/error-cases/
|
|
183 fi
|