Mercurial > repos > hyungrolee > mgescan
comparison mgescan.sh @ 7:b14f55bb20be draft
Uploaded
author | hyungrolee |
---|---|
date | Mon, 15 Feb 2016 03:33:48 -0500 |
parents | |
children | 1a143426370c |
comparison
equal
deleted
inserted
replaced
6:f65687bfdc0a | 7:b14f55bb20be |
---|---|
1 #!/bin/bash | |
2 # mgescan.sh $input $input.name 3 $output L None None None $ltr_gff3 None None $sw_rm "$scaffold" $min_dist $max_dist $min_len_ltr $max_len_ltr $ltr_sim_condition $cluster_sim_condition $len_condition $repeatmasker | |
3 if [ "" == "$MGESCAN_SRC" ] | |
4 then | |
5 echo "\$MGESCAN_SRC is not defined." | |
6 exit | |
7 fi | |
8 | |
9 script_program=`which python` | |
10 script=$MGESCAN_SRC/mgescan/cmd.py | |
11 input_file=$1 | |
12 #input_file_name=$2 | |
13 input_file_name=`basename $input_file` | |
14 hmmsearch_version=$3 | |
15 output_file=$4 | |
16 program=$5 # N is nonLTR, L is LTR and B is both | |
17 # Optional output parameters for nonLTR | |
18 clade=$6 | |
19 en=$7 | |
20 rt=$8 | |
21 ltr_gff3=$9 | |
22 nonltr_gff3=${10} | |
23 both_gff3=${11} | |
24 #### for ltr between $11 and $20 | |
25 if [ "$program" == "L" ] | |
26 then | |
27 sw_rm=${12} | |
28 scaffold=${13} | |
29 min_dist=${14} | |
30 max_dist=${15} | |
31 min_len_ltr=${16} | |
32 max_len_ltr=${17} | |
33 ltr_sim_condition=${18} | |
34 cluster_sim_condition=${19} | |
35 len_condition=${20} | |
36 repeatmasker=${21} | |
37 fi | |
38 | |
39 #elif [ "$program" == "B" ] | |
40 if [ $# -eq 12 ] | |
41 then | |
42 nmpi=${12} | |
43 if [ ! -z $nmpi ] && [ $nmpi -ge 1 ] | |
44 then | |
45 mpi_enabled="--mpi=$nmpi" | |
46 fi | |
47 | |
48 fi | |
49 | |
50 # /nfs/nfs4/home/lee212/mgescan/galaxy-dist/tools/mgescan/find_ltr.sh /nfs/nfs4/home/lee212/mgescan/galaxy-dist/database/files/000/dataset_1.dat /nfs/nfs4/home/lee212/mgescan/galaxy-dist/database/files/000/dataset_3.dat | |
51 | |
52 #set path for transeq | |
53 #export PATH=$user_dir/mgescan/EMBOSS/bin:/usr/bin:$PATH | |
54 transeq --version 2> /dev/null | |
55 res=$? | |
56 if [ 0 -ne $res ] | |
57 then | |
58 echo "EMBOSS is not available." | |
59 exit | |
60 fi | |
61 | |
62 #move to the working directory | |
63 work_dir=`dirname $script` | |
64 cd $work_dir | |
65 | |
66 #create directory for input and output | |
67 mkdir -p input | |
68 t_dir=`mktemp -p input -d` #relative path | |
69 input_dir="$work_dir/$t_dir/seq" # full path | |
70 output_dir="$work_dir/$t_dir/data" | |
71 mkdir -p $input_dir | |
72 mkdir -p $output_dir | |
73 | |
74 #make a copy of input | |
75 #/bin/cp $input_file $input_dir/$input_file_name | |
76 | |
77 # Check tar.gz | |
78 tar tf $input_file &> /dev/null | |
79 ISGZ=$? | |
80 if [ 0 -eq $ISGZ ] | |
81 then | |
82 # It seems pre_process.pl creates ./data/genome directory and makes a copy of a genome file. | |
83 # Due to this reason, extracts compressed inputs to output directory. | |
84 tar xzf $input_file -C $input_dir 2> /dev/null | |
85 if [ $? -ne 0 ] | |
86 then | |
87 tar xf $input_file -C $input_dir 2> /dev/null | |
88 fi | |
89 else | |
90 /bin/ln -s $input_file $input_dir/$input_file_name | |
91 fi | |
92 | |
93 VERSION2=`hmmsearch -h|grep "HMMER 2" 2> /dev/null` | |
94 VERSION3=`hmmsearch -h|grep "HMMER 3" 2> /dev/null` | |
95 if [ "2" == "$hmmsearch_version" ] && [ "" != "$VERSION2" ] | |
96 then | |
97 echo $VERSION2 selected. | |
98 elif [ "3" == "$hmmsearch_version" ] && [ "" != "$VERSION3" ] | |
99 then | |
100 echo $VERSION3 selected. | |
101 else | |
102 echo HMMER is not available. | |
103 exit | |
104 fi | |
105 | |
106 if [ "$program" == "L" ] | |
107 then | |
108 program_name="ltr" | |
109 elif [ "$program" == "N" ] | |
110 then | |
111 program_name="nonltr" | |
112 else | |
113 program_name="both" | |
114 fi | |
115 | |
116 #run | |
117 $script_program $script $program_name $input_dir/ --output=$output_dir/ $mpi_enabled #-hmmerv=$hmmsearch_version -sw_rm=${11} -scaffold=${12} -min_dist=${13} -max_dist=${14} -min_len_ltr=${15} -max_len_ltr=${16} -ltr_sim_condition=${17} -cluster_sim_condition=${18} -len_condition=${19} | |
118 #/usr/bin/perl $script -genome=$input_dir/ -data=$output_dir/ -hmmerv=$hmmsearch_version -program=$program -sw_rm=${11} -scaffold=${12} -min_dist=${13} -max_dist=${14} -min_len_ltr=${15} -max_len_ltr=${16} -ltr_sim_condition=${17} -cluster_sim_condition=${18} -len_condition=${19} | |
119 | |
120 #RES=`ssh -i $user_dir/.ssh/.internal silo.cs.indiana.edu "/usr/bin/perl $script -genome=$input_dir/ -data=$output_dir/ -hmmerv=$hmmsearch_version -program=$program > /dev/null"` | |
121 | |
122 #make a copy of output | |
123 if [ "$program" != "N" ] | |
124 then | |
125 /bin/cp $output_dir/ltr/ltr.out $output_file | |
126 if [ "$ltr_gff3" != "None" ] | |
127 then | |
128 /bin/cp $output_dir/ltr/ltr.gff3 $ltr_gff3 | |
129 fi | |
130 | |
131 if [ "$repeatmasker" != "None" ] && [ "$repeatmasker" != "" ] | |
132 then | |
133 # chr2L.fa.cat.gz chr2L.fa.masked chr2L.fa.out chr2L.fa.out.pos chr2L.fa.tbl | |
134 /bin/cp $output_dir/repeatmasker/${input_file_name}.out $repeatmasker | |
135 fi | |
136 fi | |
137 if [ "$program" != "L" ] | |
138 then | |
139 | |
140 tmp=`mktemp` | |
141 RANDOM=`basename $tmp` | |
142 compressed_file=$output_dir/$RANDOM.tar.gz | |
143 /bin/tar czfP $compressed_file $output_dir/info | |
144 #/bin/cp $compressed_file $output_file | |
145 #RES=`/bin/cp $output_dir/info/full/*/* $clade 2> /dev/null` | |
146 RES=`/bin/cp $compressed_file $clade 2> /dev/null` | |
147 RES=`/bin/cp $output_dir/info/validation/en $en 2> /dev/null` | |
148 RES=`/bin/cp $output_dir/info/validation/rt $rt 2> /dev/null` | |
149 if [ "$nonltr_gff3" != "None" ] | |
150 then | |
151 /bin/cp $output_dir/info/nonltr.gff3 $nonltr_gff3 | |
152 # nonltr.gff3 | |
153 ##gff-version 3 | |
154 #chr2L.fa MGEScan_nonLTR mobile_genetic_element 19670384 19676921 . . . ID=chr2L.fa_19670384 | |
155 #chr2L.fa MGEScan_nonLTR mobile_genetic_element 17689430 17695994 . . . ID=chr2L.fa_17689430 | |
156 #chr2L.fa MGEScan_nonLTR mobile_genetic_element 11897186 11903717 . . . ID=chr2L.fa_11897186 | |
157 #chr2L.fa MGEScan_nonLTR mobile_genetic_element 49574 56174 . . . ID=chr2L.fa_49574 | |
158 fi | |
159 | |
160 #else | |
161 # Both LTR, nonLTR executed | |
162 #compressed_file=$output_dir/$RANDOM.tar.gz | |
163 #/bin/tar czfP $compressed_file $output_dir | |
164 #/bin/cp $compressed_file $output_file | |
165 fi | |
166 | |
167 if [ "$program" == "B" ] | |
168 then | |
169 #echo "track name=LTR description=\"MGEScan-LTR\" color=0,0,255," > $both_gff3 | |
170 /bin/cat $output_dir/ltr/ltr.gff3 >> $both_gff3 | |
171 #echo "track name=nonLTR description=\"MGEScan-nonLTR\" color=255,0,0" >> $both_gff3 | |
172 /bin/cat $output_dir/info/nonltr.gff3 >> $both_gff3 | |
173 fi | |
174 | |
175 # delete temp directory | |
176 if [ $? -eq 0 ] | |
177 then | |
178 rm -rf $work_dir/$t_dir | |
179 #echo | |
180 else | |
181 #echo cp -pr $work_dir/$t_dir $work_dir/error-cases/ | |
182 cp -pr $work_dir/$t_dir $work_dir/error-cases/ | |
183 fi |