annotate mgescan.sh @ 1:b7ea9a0e2714 draft

Uploaded
author hyungrolee
date Sat, 14 Jun 2014 19:06:49 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
1 #!/bin/bash
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
2 # mgescan.sh $input $input.name 3 $output L None None None $ltr_gff3 None $sw_rm "$scaffold" $min_dist $max_dist $min_len_ltr $max_len_ltr $ltr_sim_condition $cluster_sim_condition $len_condition $repeatmasker
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
3 user_dir=/u/lee212
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
4 #script=$user_dir/retrotminer/wazim/MGEScan1.1/run_MGEScan.pl
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
5 #script=$user_dir/retrotminer/wazim/MGEScan1.3.1/run_MGEScan2.pl
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
6 source $user_dir/virtualenv/retrotminer/bin/activate
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
7 script_program=`which python`
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
8 script=$user_dir/github/retrotminer/retrotminer/retrotminer.py
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
9 input_file=$1
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
10 input_file_name=$2
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
11 hmmsearch_version=$3
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
12 output_file=$4
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
13 program=$5 # N is nonLTR, L is LTR and B is both
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
14 # Optional output parameters for nonLTR
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
15 clade=$6
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
16 en=$7
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
17 rt=$8
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
18 ltr_gff3=$9
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
19 nonltr_gff3=${10}
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
20 #### for ltr between $11 and $20
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
21 sw_rm=${11}
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
22 scaffold=${12}
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
23 min_dist=${13}
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
24 max_dist=${14}
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
25 min_len_ltr=${15}
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
26 max_len_ltr=${16}
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
27 ltr_sim_condition=${17}
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
28 cluster_sim_condition=${18}
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
29 len_condition=${19}
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
30 repeatmasker=${20}
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
31
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
32 # /nfs/nfs4/home/lee212/retrotminer/galaxy-dist/tools/retrotminer/find_ltr.sh /nfs/nfs4/home/lee212/retrotminer/galaxy-dist/database/files/000/dataset_1.dat /nfs/nfs4/home/lee212/retrotminer/galaxy-dist/database/files/000/dataset_3.dat
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
33
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
34 #load env?
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
35 source $user_dir/.bashrc
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
36 source $user_dir/.bash_profile
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
37
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
38 #set path for transeq
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
39 export PATH=$user_dir/retrotminer/EMBOSS/bin:/usr/bin:$PATH
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
40
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
41 #move to the working directory
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
42 work_dir=`dirname $script`
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
43 cd $work_dir
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
44
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
45 #create directory for input and output
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
46 mkdir -p input
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
47 t_dir=`mktemp -p input -d` #relative path
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
48 input_dir="$work_dir/$t_dir/seq" # full path
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
49 output_dir="$work_dir/$t_dir/data"
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
50 mkdir -p $input_dir
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
51 mkdir -p $output_dir
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
52
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
53 #make a copy of input
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
54 /bin/cp $input_file $input_dir/$input_file_name
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
55
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
56 if [ "2" == "$hmmsearch_version" ]
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
57 then
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
58 export PATH=$user_dir/retrotminer/HMMER2.0/bin:$PATH
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
59 else
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
60 export PATH=/usr/bin:$PATH
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
61 fi
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
62
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
63 if [ "$program" == "L" ]
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
64 then
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
65 program_name="ltr"
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
66 else
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
67 programname="nonltr"
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
68 fi
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
69
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
70 #run
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
71 $script_program $script $program_name $input_dir/ --output=$output_dir/ #-hmmerv=$hmmsearch_version -sw_rm=${11} -scaffold=${12} -min_dist=${13} -max_dist=${14} -min_len_ltr=${15} -max_len_ltr=${16} -ltr_sim_condition=${17} -cluster_sim_condition=${18} -len_condition=${19}
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
72 #/usr/bin/perl $script -genome=$input_dir/ -data=$output_dir/ -hmmerv=$hmmsearch_version -program=$program -sw_rm=${11} -scaffold=${12} -min_dist=${13} -max_dist=${14} -min_len_ltr=${15} -max_len_ltr=${16} -ltr_sim_condition=${17} -cluster_sim_condition=${18} -len_condition=${19}
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
73
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
74 #RES=`ssh -i $user_dir/.ssh/.internal silo.cs.indiana.edu "/usr/bin/perl $script -genome=$input_dir/ -data=$output_dir/ -hmmerv=$hmmsearch_version -program=$program > /dev/null"`
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
75
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
76 #make a copy of output
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
77 if [ "$program" != "N" ]
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
78 then
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
79 /bin/cp $output_dir/ltr/ltr.out $output_file
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
80 if [ "$ltr_gff3" != "None" ]
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
81 then
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
82 /bin/cp $output_dir/ltr/ltr.gff3 $ltr_gff3
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
83 fi
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
84
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
85 if [ "$repeatmasker" != "None" ]
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
86 then
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
87 # chr2L.fa.cat.gz chr2L.fa.masked chr2L.fa.out chr2L.fa.out.pos chr2L.fa.tbl
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
88 /bin/cp $output_dir/repeatmasker/${input_file_name}.out $repeatmasker
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
89 fi
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
90 fi
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
91 if [ "$program" != "L" ]
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
92 then
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
93
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
94 #compressed_file=$output_dir/$RANDOM.tar.gz
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
95 #/bin/tar cvzfP $compressed_file $output_dir/info
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
96 #/bin/cp $compressed_file $output_file
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
97 /bin/cp $output_dir/info/full/*/* $clade
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
98 /bin/cp $output_dir/info/validation/en $en
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
99 /bin/cp $output_dir/info/validation/rt $rt
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
100 if [ "$nonltr_gff3" != "None" ]
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
101 then
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
102 /bin/cp $output_dir/info/nonltr.gff3 $nonltr_gff3
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
103 # nonltr.gff3
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
104 ##gff-version 3
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
105 #chr2L.fa MGEScan_nonLTR mobile_genetic_element 19670384 19676921 . . . ID=chr2L.fa_19670384
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
106 #chr2L.fa MGEScan_nonLTR mobile_genetic_element 17689430 17695994 . . . ID=chr2L.fa_17689430
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
107 #chr2L.fa MGEScan_nonLTR mobile_genetic_element 11897186 11903717 . . . ID=chr2L.fa_11897186
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
108 #chr2L.fa MGEScan_nonLTR mobile_genetic_element 49574 56174 . . . ID=chr2L.fa_49574
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
109 fi
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
110
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
111 #else
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
112 # Both LTR, nonLTR executed
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
113 #compressed_file=$output_dir/$RANDOM.tar.gz
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
114 #/bin/tar cvzfP $compressed_file $output_dir
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
115 #/bin/cp $compressed_file $output_file
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
116 fi
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
117
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
118 # delete temp directory
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
119 if [ $? -eq 0 ]
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
120 then
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
121 rm -rf $work_dir/$t_dir
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
122 #echo
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
123 else
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
124 #echo cp -pr $work_dir/$t_dir $work_dir/error-cases/
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
125 cp -pr $work_dir/$t_dir $work_dir/error-cases/
b7ea9a0e2714 Uploaded
hyungrolee
parents:
diff changeset
126 fi