Mercurial > repos > xuebing > sharplabtool
comparison tools/human_genome_variation/sift_variants_wrapper.sh @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 #!/usr/bin/env bash | |
2 | |
3 input_file=$1 | |
4 output_file=$2 | |
5 org=$3 | |
6 db_loc=$4 | |
7 chrom_col=$5 | |
8 pos_col=$6 | |
9 base=$7 | |
10 allele_col=$8 | |
11 strand_col=$9 | |
12 comment_col=${10} | |
13 output_opts=${11} | |
14 | |
15 working_dir=$PWD | |
16 sift_input="$working_dir/sift_input.txt" | |
17 sift_output="$working_dir/sift_output.txt" | |
18 | |
19 ################################################################################ | |
20 ## make sure input file column selections are mutually exclusive ## | |
21 ################################################################################ | |
22 ERROR=0 | |
23 declare -a col_use | |
24 | |
25 function check_col () { | |
26 local col=$1 | |
27 local use=$2 | |
28 local int=$3 | |
29 | |
30 if [ -n "${col//[0-9]}" ]; then | |
31 if [ $int -eq 1 ]; then | |
32 echo "ERROR: invalid value for $use column: $col" 1>&2 | |
33 ERROR=1 | |
34 fi | |
35 return | |
36 fi | |
37 | |
38 local cur=${col_use[$col]} | |
39 if [ -n "$cur" ]; then | |
40 echo "ERROR: $use column is the same as $cur column" 1>&2 | |
41 col_use[$col]="${cur},$use" | |
42 ERROR=1 | |
43 else | |
44 col_use[$col]=$use | |
45 fi | |
46 } | |
47 | |
48 check_col $chrom_col 'chromosome' 1 | |
49 check_col $pos_col 'position' 1 | |
50 check_col $allele_col 'allele' 1 | |
51 check_col $strand_col 'strand' 0 | |
52 check_col $comment_col 'comment' 0 | |
53 | |
54 if [ $ERROR -ne 0 ]; then | |
55 exit 1 | |
56 fi | |
57 | |
58 ################################################################################ | |
59 ## get/check the db directory from the argument org,db_loc ## | |
60 ################################################################################ | |
61 db_dir=$( awk '$1 == org { print $2 }' org=$org $db_loc ) | |
62 | |
63 if [ -z "$db_dir" ]; then | |
64 echo "Can't find dbkey \"$org\" in loc file \"$db_loc\"" 1>&2 | |
65 exit 1 | |
66 fi | |
67 | |
68 if [ ! -d "$db_dir" ]; then | |
69 echo "Can't access SIFT database directory \"$db_dir\"" 1>&2 | |
70 exit 1 | |
71 fi | |
72 | |
73 ################################################################################ | |
74 ## create input file for SIFT_exome_nssnvs.pl ## | |
75 ################################################################################ | |
76 if [ ! -r "$input_file" ]; then | |
77 echo "Can't read input file \"$input_file\"" 1>&2 | |
78 exit 1 | |
79 fi | |
80 | |
81 if [ $base -eq 0 ]; then | |
82 beg_col="$pos_col" | |
83 end_col="$pos_col + 1" | |
84 pos_adj='$2 = $2 - 1' | |
85 else | |
86 beg_col="$pos_col - 1" | |
87 end_col="$pos_col" | |
88 pos_adj='' | |
89 fi | |
90 | |
91 strand_cvt='' | |
92 if [ \( "$strand_col" = "+" \) ]; then | |
93 strand='"1"' | |
94 elif [ \( "$strand_col" = "-" \) ]; then | |
95 strand='"-1"' | |
96 else | |
97 strand="\$$strand_col" | |
98 strand_cvt='if ('"${strand}"' == "+") {'"${strand}"' = "1"} else if ('"${strand}"' == "-") {'"${strand}"' = "-1"}' | |
99 fi | |
100 | |
101 print_row='print $'"${chrom_col}"', $'"${beg_col}"', $'"${end_col}"', '"${strand}"', $'"${allele_col}"'' | |
102 if [ "$comment_col" != "-" ]; then | |
103 print_row=''"${print_row}"', $'"${comment_col}"'' | |
104 fi | |
105 | |
106 awk ' | |
107 BEGIN {FS="\t";OFS=","} | |
108 $'"${chrom_col}"' ~ /^[cC][hH][rR]/ {$'"${chrom_col}"' = substr($'"${chrom_col}"',4)} | |
109 { | |
110 '"${strand_cvt}"' | |
111 '"${print_row}"' | |
112 } | |
113 ' "$input_file" > "$sift_input" | |
114 | |
115 ################################################################################ | |
116 ## run SIFT_exome_nssnvs.pl command line program ## | |
117 ################################################################################ | |
118 if [ "$output_opts" = "None" ]; then | |
119 output_opts="" | |
120 else | |
121 output_opts=$( echo "$output_opts" | sed -e 's/,/ 1 -/g' ) | |
122 output_opts="-$output_opts 1" | |
123 fi | |
124 | |
125 SIFT_exome_nssnvs.pl -i "$sift_input" -d "$db_dir" -o "$working_dir" $output_opts &> "$sift_output" | |
126 if [ $? -ne 0 ]; then | |
127 echo "failed: SIFT_exome_nssnvs.pl -i \"$sift_input\" -d \"$db_dir\" -o \"$working_dir\" $output_opts" | |
128 exit 1 | |
129 fi | |
130 | |
131 ################################################################################ | |
132 ## locate the SIFT_exome_nssnvs.pl output file ## | |
133 ################################################################################ | |
134 sift_pid=$( sed -n -e 's/^.*Your job id is \([0-9][0-9]*\) and is currently running.*$/\1/p' "$sift_output" ) | |
135 | |
136 if [ -z "$sift_pid" ]; then | |
137 echo "Can't find SIFT pid in \"$sift_output\"" 1>&2 | |
138 exit 1 | |
139 fi | |
140 | |
141 sift_outdir="$working_dir/$sift_pid" | |
142 if [ ! -d "$sift_outdir" ]; then | |
143 echo "Can't access SIFT output directory \"$sift_outdir\"" 1>&2 | |
144 exit 1 | |
145 fi | |
146 | |
147 sift_outfile="$sift_outdir/${sift_pid}_predictions.tsv" | |
148 if [ ! -r "$sift_outfile" ]; then | |
149 echo "Can't access SIFT output file \"$sift_outfile\"" 1>&2 | |
150 exit 1 | |
151 fi | |
152 | |
153 ################################################################################ | |
154 ## create galaxy output file ## | |
155 ################################################################################ | |
156 awk ' | |
157 BEGIN {FS="\t";OFS="\t"} | |
158 NR == 1 { | |
159 $12 = "Num seqs at position" | |
160 $1 = "Chrom\tPosition\tStrand\tAllele" | |
161 print | |
162 } | |
163 NR != 1 { | |
164 $1 = "chr" $1 | |
165 gsub(/,/, "\t", $1) | |
166 print | |
167 } | |
168 ' "$sift_outfile" | awk ' | |
169 BEGIN {FS="\t";OFS="\t"} | |
170 NR == 1 { | |
171 print "#" $0 | |
172 } | |
173 NR != 1 { | |
174 if ($3 == "1") {$3 = "+"} else if ($3 == "-1") {$3 = "-"} | |
175 '"${pos_adj}"' | |
176 print | |
177 } | |
178 ' > "$output_file" | |
179 | |
180 ################################################################################ | |
181 ## cleanup ## | |
182 ################################################################################ | |
183 rm -rf "$sift_outdir" "$sift_input" "$sift_output" | |
184 |