annotate mgescan.sh @ 15:1234e527defa draft default tip

Uploaded
author hyungrolee
date Tue, 16 Feb 2016 16:05:53 -0500
parents 1a143426370c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
1 #!/bin/bash
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
2 # mgescan.sh $input $input.name 3 $output L None None None $ltr_gff3 None None $sw_rm "$scaffold" $min_dist $max_dist $min_len_ltr $max_len_ltr $ltr_sim_condition $cluster_sim_condition $len_condition $repeatmasker
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
3 if [ "" == "$MGESCAN_SRC" ]
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
4 then
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
5 echo "\$MGESCAN_SRC is not defined."
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
6 exit
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
7 fi
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
8
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
9 script_program=`which python`
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
10 script=$MGESCAN_SRC/mgescan/cmd.py
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
11 input_file=$1
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
12 #input_file_name=$2
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
13 input_file_name=`basename $input_file`
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
14 hmmsearch_version=$3
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
15 output_file=$4
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
16 program=$5 # N is nonLTR, L is LTR and B is both
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
17 # Optional output parameters for nonLTR
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
18 clade=$6
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
19 en=$7
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
20 rt=$8
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
21 ltr_gff3=$9
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
22 nonltr_gff3=${10}
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
23 both_gff3=${11}
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
24 #### for ltr between $11 and $20
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
25 if [ "$program" == "L" ]
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
26 then
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
27 sw_rm=${12}
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
28 scaffold=${13}
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
29 min_dist=${14}
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
30 max_dist=${15}
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
31 min_len_ltr=${16}
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
32 max_len_ltr=${17}
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
33 ltr_sim_condition=${18}
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
34 cluster_sim_condition=${19}
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
35 len_condition=${20}
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
36 repeatmasker=${21}
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
37 fi
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
38
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
39 #elif [ "$program" == "B" ]
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
40 if [ $# -eq 12 ]
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
41 then
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
42 nmpi=${12}
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
43 if [ ! -z $nmpi ] && [ $nmpi -ge 1 ]
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
44 then
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
45 mpi_enabled="--mpi=$nmpi"
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
46 fi
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
47
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
48 fi
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
49
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
50 # /nfs/nfs4/home/lee212/mgescan/galaxy-dist/tools/mgescan/find_ltr.sh /nfs/nfs4/home/lee212/mgescan/galaxy-dist/database/files/000/dataset_1.dat /nfs/nfs4/home/lee212/mgescan/galaxy-dist/database/files/000/dataset_3.dat
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
51
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
52 #set path for transeq
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
53 #export PATH=$user_dir/mgescan/EMBOSS/bin:/usr/bin:$PATH
12
1a143426370c Uploaded
hyungrolee
parents: 7
diff changeset
54 if [ "" == "`which transeq`" ]
7
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
55 then
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
56 echo "EMBOSS is not available."
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
57 exit
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
58 fi
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
59
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
60 #move to the working directory
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
61 work_dir=`dirname $script`
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
62 cd $work_dir
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
63
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
64 #create directory for input and output
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
65 mkdir -p input
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
66 t_dir=`mktemp -p input -d` #relative path
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
67 input_dir="$work_dir/$t_dir/seq" # full path
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
68 output_dir="$work_dir/$t_dir/data"
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
69 mkdir -p $input_dir
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
70 mkdir -p $output_dir
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
71
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
72 #make a copy of input
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
73 #/bin/cp $input_file $input_dir/$input_file_name
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
74
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
75 # Check tar.gz
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
76 tar tf $input_file &> /dev/null
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
77 ISGZ=$?
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
78 if [ 0 -eq $ISGZ ]
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
79 then
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
80 # It seems pre_process.pl creates ./data/genome directory and makes a copy of a genome file.
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
81 # Due to this reason, extracts compressed inputs to output directory.
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
82 tar xzf $input_file -C $input_dir 2> /dev/null
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
83 if [ $? -ne 0 ]
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
84 then
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
85 tar xf $input_file -C $input_dir 2> /dev/null
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
86 fi
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
87 else
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
88 /bin/ln -s $input_file $input_dir/$input_file_name
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
89 fi
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
90
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
91 VERSION2=`hmmsearch -h|grep "HMMER 2" 2> /dev/null`
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
92 VERSION3=`hmmsearch -h|grep "HMMER 3" 2> /dev/null`
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
93 if [ "2" == "$hmmsearch_version" ] && [ "" != "$VERSION2" ]
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
94 then
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
95 echo $VERSION2 selected.
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
96 elif [ "3" == "$hmmsearch_version" ] && [ "" != "$VERSION3" ]
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
97 then
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
98 echo $VERSION3 selected.
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
99 else
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
100 echo HMMER is not available.
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
101 exit
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
102 fi
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
103
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
104 if [ "$program" == "L" ]
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
105 then
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
106 program_name="ltr"
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
107 elif [ "$program" == "N" ]
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
108 then
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
109 program_name="nonltr"
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
110 else
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
111 program_name="both"
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
112 fi
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
113
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
114 #run
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
115 $script_program $script $program_name $input_dir/ --output=$output_dir/ $mpi_enabled #-hmmerv=$hmmsearch_version -sw_rm=${11} -scaffold=${12} -min_dist=${13} -max_dist=${14} -min_len_ltr=${15} -max_len_ltr=${16} -ltr_sim_condition=${17} -cluster_sim_condition=${18} -len_condition=${19}
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
116 #/usr/bin/perl $script -genome=$input_dir/ -data=$output_dir/ -hmmerv=$hmmsearch_version -program=$program -sw_rm=${11} -scaffold=${12} -min_dist=${13} -max_dist=${14} -min_len_ltr=${15} -max_len_ltr=${16} -ltr_sim_condition=${17} -cluster_sim_condition=${18} -len_condition=${19}
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
117
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
118 #RES=`ssh -i $user_dir/.ssh/.internal silo.cs.indiana.edu "/usr/bin/perl $script -genome=$input_dir/ -data=$output_dir/ -hmmerv=$hmmsearch_version -program=$program > /dev/null"`
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
119
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
120 #make a copy of output
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
121 if [ "$program" != "N" ]
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
122 then
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
123 /bin/cp $output_dir/ltr/ltr.out $output_file
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
124 if [ "$ltr_gff3" != "None" ]
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
125 then
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
126 /bin/cp $output_dir/ltr/ltr.gff3 $ltr_gff3
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
127 fi
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
128
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
129 if [ "$repeatmasker" != "None" ] && [ "$repeatmasker" != "" ]
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
130 then
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
131 # chr2L.fa.cat.gz chr2L.fa.masked chr2L.fa.out chr2L.fa.out.pos chr2L.fa.tbl
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
132 /bin/cp $output_dir/repeatmasker/${input_file_name}.out $repeatmasker
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
133 fi
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
134 fi
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
135 if [ "$program" != "L" ]
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
136 then
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
137
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
138 tmp=`mktemp`
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
139 RANDOM=`basename $tmp`
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
140 compressed_file=$output_dir/$RANDOM.tar.gz
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
141 /bin/tar czfP $compressed_file $output_dir/info
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
142 #/bin/cp $compressed_file $output_file
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
143 #RES=`/bin/cp $output_dir/info/full/*/* $clade 2> /dev/null`
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
144 RES=`/bin/cp $compressed_file $clade 2> /dev/null`
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
145 RES=`/bin/cp $output_dir/info/validation/en $en 2> /dev/null`
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
146 RES=`/bin/cp $output_dir/info/validation/rt $rt 2> /dev/null`
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
147 if [ "$nonltr_gff3" != "None" ]
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
148 then
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
149 /bin/cp $output_dir/info/nonltr.gff3 $nonltr_gff3
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
150 # nonltr.gff3
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
151 ##gff-version 3
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
152 #chr2L.fa MGEScan_nonLTR mobile_genetic_element 19670384 19676921 . . . ID=chr2L.fa_19670384
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
153 #chr2L.fa MGEScan_nonLTR mobile_genetic_element 17689430 17695994 . . . ID=chr2L.fa_17689430
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
154 #chr2L.fa MGEScan_nonLTR mobile_genetic_element 11897186 11903717 . . . ID=chr2L.fa_11897186
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
155 #chr2L.fa MGEScan_nonLTR mobile_genetic_element 49574 56174 . . . ID=chr2L.fa_49574
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
156 fi
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
157
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
158 #else
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
159 # Both LTR, nonLTR executed
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
160 #compressed_file=$output_dir/$RANDOM.tar.gz
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
161 #/bin/tar czfP $compressed_file $output_dir
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
162 #/bin/cp $compressed_file $output_file
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
163 fi
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
164
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
165 if [ "$program" == "B" ]
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
166 then
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
167 #echo "track name=LTR description=\"MGEScan-LTR\" color=0,0,255," > $both_gff3
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
168 /bin/cat $output_dir/ltr/ltr.gff3 >> $both_gff3
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
169 #echo "track name=nonLTR description=\"MGEScan-nonLTR\" color=255,0,0" >> $both_gff3
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
170 /bin/cat $output_dir/info/nonltr.gff3 >> $both_gff3
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
171 fi
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
172
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
173 # delete temp directory
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
174 if [ $? -eq 0 ]
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
175 then
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
176 rm -rf $work_dir/$t_dir
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
177 #echo
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
178 else
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
179 #echo cp -pr $work_dir/$t_dir $work_dir/error-cases/
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
180 cp -pr $work_dir/$t_dir $work_dir/error-cases/
b14f55bb20be Uploaded
hyungrolee
parents:
diff changeset
181 fi