annotate tools/human_genome_variation/sift_variants_wrapper.sh @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env bash
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 input_file=$1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 output_file=$2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 org=$3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 db_loc=$4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 chrom_col=$5
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 pos_col=$6
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 base=$7
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 allele_col=$8
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 strand_col=$9
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 comment_col=${10}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 output_opts=${11}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 working_dir=$PWD
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 sift_input="$working_dir/sift_input.txt"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 sift_output="$working_dir/sift_output.txt"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 ################################################################################
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 ## make sure input file column selections are mutually exclusive ##
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 ################################################################################
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 ERROR=0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 declare -a col_use
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 function check_col () {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 local col=$1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 local use=$2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 local int=$3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 if [ -n "${col//[0-9]}" ]; then
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 if [ $int -eq 1 ]; then
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 echo "ERROR: invalid value for $use column: $col" 1>&2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 ERROR=1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 fi
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 return
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 fi
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 local cur=${col_use[$col]}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 if [ -n "$cur" ]; then
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 echo "ERROR: $use column is the same as $cur column" 1>&2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 col_use[$col]="${cur},$use"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 ERROR=1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 else
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 col_use[$col]=$use
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 fi
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 check_col $chrom_col 'chromosome' 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 check_col $pos_col 'position' 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 check_col $allele_col 'allele' 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 check_col $strand_col 'strand' 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 check_col $comment_col 'comment' 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 if [ $ERROR -ne 0 ]; then
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 exit 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 fi
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 ################################################################################
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 ## get/check the db directory from the argument org,db_loc ##
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 ################################################################################
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 db_dir=$( awk '$1 == org { print $2 }' org=$org $db_loc )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 if [ -z "$db_dir" ]; then
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 echo "Can't find dbkey \"$org\" in loc file \"$db_loc\"" 1>&2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 exit 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 fi
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 if [ ! -d "$db_dir" ]; then
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 echo "Can't access SIFT database directory \"$db_dir\"" 1>&2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 exit 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 fi
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 ################################################################################
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 ## create input file for SIFT_exome_nssnvs.pl ##
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 ################################################################################
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 if [ ! -r "$input_file" ]; then
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 echo "Can't read input file \"$input_file\"" 1>&2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 exit 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 fi
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 if [ $base -eq 0 ]; then
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 beg_col="$pos_col"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 end_col="$pos_col + 1"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 pos_adj='$2 = $2 - 1'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 else
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 beg_col="$pos_col - 1"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 end_col="$pos_col"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 pos_adj=''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 fi
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 strand_cvt=''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 if [ \( "$strand_col" = "+" \) ]; then
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 strand='"1"'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 elif [ \( "$strand_col" = "-" \) ]; then
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 strand='"-1"'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 else
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 strand="\$$strand_col"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 strand_cvt='if ('"${strand}"' == "+") {'"${strand}"' = "1"} else if ('"${strand}"' == "-") {'"${strand}"' = "-1"}'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 fi
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 print_row='print $'"${chrom_col}"', $'"${beg_col}"', $'"${end_col}"', '"${strand}"', $'"${allele_col}"''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 if [ "$comment_col" != "-" ]; then
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 print_row=''"${print_row}"', $'"${comment_col}"''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 fi
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 awk '
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 BEGIN {FS="\t";OFS=","}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 $'"${chrom_col}"' ~ /^[cC][hH][rR]/ {$'"${chrom_col}"' = substr($'"${chrom_col}"',4)}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 '"${strand_cvt}"'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 '"${print_row}"'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 ' "$input_file" > "$sift_input"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 ################################################################################
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 ## run SIFT_exome_nssnvs.pl command line program ##
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 ################################################################################
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 if [ "$output_opts" = "None" ]; then
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 output_opts=""
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 else
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 output_opts=$( echo "$output_opts" | sed -e 's/,/ 1 -/g' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 output_opts="-$output_opts 1"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 fi
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 SIFT_exome_nssnvs.pl -i "$sift_input" -d "$db_dir" -o "$working_dir" $output_opts &> "$sift_output"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 if [ $? -ne 0 ]; then
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 echo "failed: SIFT_exome_nssnvs.pl -i \"$sift_input\" -d \"$db_dir\" -o \"$working_dir\" $output_opts"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 exit 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 fi
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 ################################################################################
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 ## locate the SIFT_exome_nssnvs.pl output file ##
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 ################################################################################
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 sift_pid=$( sed -n -e 's/^.*Your job id is \([0-9][0-9]*\) and is currently running.*$/\1/p' "$sift_output" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 if [ -z "$sift_pid" ]; then
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 echo "Can't find SIFT pid in \"$sift_output\"" 1>&2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 exit 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 fi
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141 sift_outdir="$working_dir/$sift_pid"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 if [ ! -d "$sift_outdir" ]; then
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143 echo "Can't access SIFT output directory \"$sift_outdir\"" 1>&2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 exit 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145 fi
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 sift_outfile="$sift_outdir/${sift_pid}_predictions.tsv"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 if [ ! -r "$sift_outfile" ]; then
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 echo "Can't access SIFT output file \"$sift_outfile\"" 1>&2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150 exit 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151 fi
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153 ################################################################################
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154 ## create galaxy output file ##
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155 ################################################################################
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156 awk '
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157 BEGIN {FS="\t";OFS="\t"}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 NR == 1 {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159 $12 = "Num seqs at position"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160 $1 = "Chrom\tPosition\tStrand\tAllele"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 print
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163 NR != 1 {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164 $1 = "chr" $1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165 gsub(/,/, "\t", $1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166 print
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
168 ' "$sift_outfile" | awk '
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
169 BEGIN {FS="\t";OFS="\t"}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
170 NR == 1 {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
171 print "#" $0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
172 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
173 NR != 1 {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
174 if ($3 == "1") {$3 = "+"} else if ($3 == "-1") {$3 = "-"}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
175 '"${pos_adj}"'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
176 print
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
177 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
178 ' > "$output_file"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
179
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
180 ################################################################################
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
181 ## cleanup ##
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
182 ################################################################################
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
183 rm -rf "$sift_outdir" "$sift_input" "$sift_output"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
184