comparison tools/annovar/annovar.sh @ 8:d6af2a78617f draft

added support for databases upto 4 march 2016
author saskia-hiltemann
date Fri, 04 Mar 2016 11:32:50 -0500
parents 69e2067a120d
children f7ff063c738e
comparison
equal deleted inserted replaced
7:69e2067a120d 8:d6af2a78617f
2 2
3 test="N" 3 test="N"
4 dofilter="N" 4 dofilter="N"
5 5
6 ######################### 6 #########################
7 # DEFINE SOME 7 # DEFINE SOME
8 # FUNCTIONS 8 # FUNCTIONS
9 ######################### 9 #########################
10 10
11 function usage(){ 11 function usage(){
12 echo "usage: $0 todo" 12 echo "usage: $0 todo"
13 } 13 }
14 14
15 function runfilter(){ 15 function runfilter(){
16 ifile=$1 16 ifile=$1
17 columnname=$2 17 columnname=$2
18 threshold=$3 18 threshold=$3
19 19
20 if [[ $threshold == "-1" ]] 20 if [[ $threshold == "-1" ]]
21 then 21 then
22 echo "not filtering" 22 echo "not filtering"
23 return 23 return
24 fi 24 fi
25 25
26 echo "filtering: $columnname, $threshold" 26 echo "filtering: $columnname, $threshold"
27 cat $ifile 27 cat $ifile
28 28
29 #get column number corresponding to column header 29 #get column number corresponding to column header
30 column=`awk 'BEGIN{ 30 column=`awk 'BEGIN{
31 FS="\t"; 31 FS="\t";
32 col=-1 32 col=-1
33 }{ 33 }{
34 if(FNR==1){ 34 if(FNR==1){
35 for(i=1;i<=NF;i++){ 35 for(i=1;i<=NF;i++){
36 if($i == "'"${columnname}"'") 36 if($i == "'"${columnname}"'")
37 col=i 37 col=i
38 } 38 }
39 print col 39 print col
40 } 40 }
41 }' $ifile ` 41 }' $ifile `
42 42
43 if [ $column == -1 ] 43 if [ $column == -1 ]
44 then 44 then
45 echo "no such column, exiting" 45 echo "no such column, exiting"
46 return 46 return
47 fi 47 fi
48 48
49 #perform filtering using the threshold 49 #perform filtering using the threshold
50 awk 'BEGIN{ 50 awk 'BEGIN{
51 FS="\t"; 51 FS="\t";
52 OFS="\t"; 52 OFS="\t";
53 }{ 53 }{
54 if(FNR==1) 54 if(FNR==1)
55 print $0; 55 print $0;
56 if(FNR>1){ 56 if(FNR>1){
57 if( $"'"${column}"'" == "" ) # empty column, then print 57 if( $"'"${column}"'" == "" ) # empty column, then print
58 print $0 58 print $0
59 else if ("'"${threshold}"'" == "text"){} #if set to text dont check threshold 59 else if ("'"${threshold}"'" == "text"){} #if set to text dont check threshold
60 60
61 else if ($"'"${column}"'" < "'"${threshold}"'") #else do check it 61 else if ($"'"${column}"'" < "'"${threshold}"'") #else do check it
62 print $0 62 print $0
63 } 63 }
64 }' $ifile > tmpfile 64 }' $ifile > tmpfile
65 65
66 mv tmpfile $ifile 66 mv tmpfile $ifile
67 } 67 }
68 68
69 # arguments: originalfile,resultfile,chrcol,startcol,endcol,refcol,obscol,addcols 69 # arguments: originalfile,resultfile,chrcol,startcol,endcol,refcol,obscol,addcols
70 function joinresults(){ 70 function joinresults(){
71 ofile=$1 71 ofile=$1
72 rfile=$2 72 rfile=$2
73 colchr=$3 73 colchr=$3
74 colstart=$4 74 colstart=$4
75 colend=$5 75 colend=$5
76 colref=$6 76 colref=$6
77 colobs=$7 77 colobs=$7
78 addcols=$8 #e.g. "B.col1,B.col2" 78 addcols=$8 #e.g. "B.col1,B.col2"
79 79
80 test="N" 80 test="N"
81 81
82 # echo "joining result with original file" 82 # echo "joining result with original file"
83 if [ $test == "Y" ] 83 if [ $test == "Y" ]
84 then 84 then
85 echo "ofile: $ofile" 85 echo "ofile: $ofile"
86 head $ofile 86 head $ofile
87 echo "rfile: $rfile" 87 echo "rfile: $rfile"
88 head $rfile 88 head $rfile
89 fi 89 fi
90 numlines=`wc $rfile | cut -d" " -f2` 90 numlines=`wc $rfile | cut -d" " -f2`
91 91
92 # if empty results file, just add header fields 92 # if empty results file, just add header fields
93 if [[ ! -s $rfile ]] 93 if [[ ! -s $rfile ]]
94 then 94 then
95 dummycol=${addcols:2} 95 dummycol=${addcols:2}
96 outputcol=${dummycol//",B."/" "} 96 outputcol=${dummycol//",B."/" "}
97 numcommas=`echo "$addcols" | grep -o "," | wc -l` 97 numcommas=`echo "$addcols" | grep -o "," | wc -l`
98 98
99 awk 'BEGIN{FS="\t";OFS="\t"}{ 99 awk 'BEGIN{FS="\t";OFS="\t"}{
100 if(FNR==1) 100 if(FNR==1)
101 print $0,"'"$outputcol"'"; 101 print $0,"'"$outputcol"'";
102 else{ 102 else{
103 printf $0 103 printf $0
104 for(i=0;i<="'"$numcommas"'"+1;i++) 104 for(i=0;i<="'"$numcommas"'"+1;i++)
105 printf "\t" 105 printf "\t"
106 printf "\n" 106 printf "\n"
107 } 107 }
108 }END{}' $ofile > tempofile 108 }END{}' $ofile > tempofile
109 109
110 mv tempofile $ofile 110 mv tempofile $ofile
111 return 111 return
112 fi 112 fi
113 113
114 114
115 #get input file column names for cgatools join 115 #get input file column names for cgatools join
116 col_chr_name=`head -1 $rfile | cut -f${colchr}` 116 col_chr_name=`head -1 $rfile | cut -f${colchr}`
117 col_start_name=`head -1 $rfile | cut -f${colstart}` 117 col_start_name=`head -1 $rfile | cut -f${colstart}`
118 col_end_name=`head -1 $rfile | cut -f${colend}` 118 col_end_name=`head -1 $rfile | cut -f${colend}`
119 col_ref_name=`head -1 $rfile | cut -f${colref}` 119 col_ref_name=`head -1 $rfile | cut -f${colref}`
120 col_obs_name=`head -1 $rfile | cut -f${colobs}` 120 col_obs_name=`head -1 $rfile | cut -f${colobs}`
121 121
122 #get annotation file column names for cgatools join 122 #get annotation file column names for cgatools join
123 chr_name=`head -1 $ofile | cut -f${chrcol}` 123 chr_name=`head -1 $ofile | cut -f${chrcol}`
124 start_name=`head -1 $ofile | cut -f${startcol}` 124 start_name=`head -1 $ofile | cut -f${startcol}`
125 end_name=`head -1 $ofile | cut -f${endcol}` 125 end_name=`head -1 $ofile | cut -f${endcol}`
126 ref_name=`head -1 $ofile | cut -f${refcol}` 126 ref_name=`head -1 $ofile | cut -f${refcol}`
127 obs_name=`head -1 $ofile | cut -f${obscol}` 127 obs_name=`head -1 $ofile | cut -f${obscol}`
128 128
129 if [ $test == "Y" ] 129 if [ $test == "Y" ]
130 then 130 then
131 echo "input file" 131 echo "input file"
132 echo "chr col: $col_chr_name ($colchr)" 132 echo "chr col: $col_chr_name ($colchr)"
133 echo "start col: $col_start_name ($colstart)" 133 echo "start col: $col_start_name ($colstart)"
134 echo "end col: $col_end_name ($colend)" 134 echo "end col: $col_end_name ($colend)"
135 echo "ref col: $col_ref_name ($colref)" 135 echo "ref col: $col_ref_name ($colref)"
136 echo "obs col: $col_obs_name ($colobs)" 136 echo "obs col: $col_obs_name ($colobs)"
137 echo "" 137 echo ""
138 echo "annotation file" 138 echo "annotation file"
139 echo "chr col: $chr_name ($chrcol)" 139 echo "chr col: $chr_name ($chrcol)"
140 echo "start col: $start_name ($startcol)" 140 echo "start col: $start_name ($startcol)"
141 echo "end col: $end_name ($endcol)" 141 echo "end col: $end_name ($endcol)"
142 echo "ref col: $ref_name ($refcol)" 142 echo "ref col: $ref_name ($refcol)"
143 echo "obs col: $obs_name ($obscol)" 143 echo "obs col: $obs_name ($obscol)"
144 fi 144 fi
145 145
146 #perform join 146 #perform join
147 cgatools join --beta \ 147 cgatools join --beta \
148 --input $ofile $rfile \ 148 --input $ofile $rfile \
149 --output temporiginal \ 149 --output temporiginal \
150 --match ${chr_name}:${col_chr_name} \ 150 --match ${chr_name}:${col_chr_name} \
151 --match ${start_name}:${col_start_name} \ 151 --match ${start_name}:${col_start_name} \
152 --match ${end_name}:${col_end_name} \ 152 --match ${end_name}:${col_end_name} \
153 --match ${ref_name}:${col_ref_name} \ 153 --match ${ref_name}:${col_ref_name} \
154 --match ${obs_name}:${col_obs_name} \ 154 --match ${obs_name}:${col_obs_name} \
155 --select A.*,$addcols \ 155 --select A.*,$addcols \
156 --always-dump \ 156 --always-dump \
157 --output-mode compact 157 --output-mode compact
158 158
159 #replace originalfile 159 #replace originalfile
160 sed -i 's/^>//g' temporiginal #join sometimes adds a '>' symbol to header 160 sed -i 's/^>//g' temporiginal #join sometimes adds a '>' symbol to header
161 mv temporiginal originalfile 161 mv temporiginal originalfile
162 162
163 if [ $test == "Y" ] 163 if [ $test == "Y" ]
164 then 164 then
165 echo "joining complete" 165 echo "joining complete"
166 head originalfile 166 head originalfile
167 echo "" 167 echo ""
168 fi 168 fi
169
170 } 169 }
171 170
172 171
173 172
174 173
175 ################################# 174 #################################
176 # 175 #
177 # PARSE PARAMETERS 176 # PARSE PARAMETERS
178 # 177 #
179 ################################# 178 #################################
180 179
181 180
182 set -- `getopt -n$0 -u -a --longoptions="inputfile: buildver: humandb: varfile: VCF: chrcol: startcol: endcol: refcol: obscol: vartypecol: convertcoords: geneanno: hgvs: verdbsnp: tfbs: mce: cytoband: segdup: dgv: gwas: ver1000g: cg46: cg69: impactscores: newimpactscores: otherinfo: esp: exac03: spidex: gonl: gerp: cosmic61: cosmic63: cosmic64: cosmic65: cosmic67: cosmic68: clinvar: nci60: outall: outfilt: outinvalid: scriptsdir: dorunannovar: dofilter: filt_dbsnp: filt1000GALL: filt1000GAFR: filt1000GAMR: filt1000GASN: filt1000GEUR: filtESP6500ALL: filtESP6500EA: filtESP6500AA: filtcg46: filtcg69: dummy:" "h:" "$@"` || usage 181 set -- `getopt -n$0 -u -a --longoptions="inputfile: buildver: humandb: varfile: VCF: chrcol: startcol: endcol: refcol: obscol: vartypecol: convertcoords: geneanno: hgvs: verdbsnp: tfbs: mce: cytoband: segdup: dgv: gwas: ver1000g: cg46: cg69: impactscores: newimpactscores: otherinfo: esp: exac03: exac03nonpsych: exac03nontcga: dbscsnv11: kaviar_20150923: hrcr1: mitimpact2: mitimpact24: dbnsfp30a: spidex: gonl: gerp: cosmic61: cosmic63: cosmic64: cosmic65: cosmic67: cosmic68: clinvar: nci60: outall: outfilt: outinvalid: scriptsdir: dorunannovar: dofilter: filt_dbsnp: filt1000GALL: filt1000GAFR: filt1000GAMR: filt1000GASN: filt1000GEUR: filtESP6500ALL: filtESP6500EA: filtESP6500AA: filtcg46: filtcg69: dummy:" "h:" "$@"` || usage
183 [ $# -eq 0 ] && usage 182 [ $# -eq 0 ] && usage
184 183
185 184
186 185
187 while [ $# -gt 0 ] 186 while [ $# -gt 0 ]
188 do 187 do
189 case "$1" in 188 case "$1" in
190 --inputfile) infile=$2;shift;; # inputfile 189 --inputfile) infile=$2;shift;; # inputfile
191 --buildver) buildvertmp=$2;shift;; # hg18 or hg19 190 --buildver) buildvertmp=$2;shift;; # hg18 or hg19
192 --humandb) humandbtmp=$2;shift;; # location of humandb database 191 --humandb) humandbtmp=$2;shift;; # location of humandb database
193 --varfile) varfile=$2;shift;; # Y or N 192 --varfile) varfile=$2;shift;; # Y or N
194 --VCF) vcf=$2;shift;; #Y or N 193 --VCF) vcf=$2;shift;; #Y or N
195 --chrcol) chrcol=$2;shift;; # which column has chr 194 --chrcol) chrcol=$2;shift;; # which column has chr
196 --startcol) startcol=$2;shift;; # which column has start 195 --startcol) startcol=$2;shift;; # which column has start coord
197 --endcol) endcol=$2;shift;; # which column has end 196 --endcol) endcol=$2;shift;; # which column has end coord
198 --refcol) refcol=$2;shift;; # which column has ref 197 --refcol) refcol=$2;shift;; # which column has ref allele
199 --obscol) obscol=$2;shift;; # which column has alt 198 --obscol) obscol=$2;shift;; # which column has alt allele
200 --vartypecol) vartypecol=$2;shift;; # which column has vartype 199 --vartypecol) vartypecol=$2;shift;; # which column has vartype
201 --convertcoords) convertcoords=$2;shift;; # Y or N convert coordinate from CG to 1-based? 200 --convertcoords) convertcoords=$2;shift;; # Y or N convert coordinate from CG to 1-based?
202 --geneanno) geneanno=$2;shift;; # comma-separated list of strings refSeq, knowngene, ensgene 201 --geneanno) geneanno=$2;shift;; # comma-separated list of strings refSeq, knowngene, ensgene
203 --hgvs) hgvs=$2;shift;; 202 --hgvs) hgvs=$2;shift;;
204 --verdbsnp) verdbsnp=$2;shift;; #comma-separated list of dbsnp version to annotate with (e.g. "132,135NonFlagged,137,138")" 203 --verdbsnp) verdbsnp=$2;shift;; #comma-separated list of dbsnp version to annotate with (e.g. "132,135NonFlagged,137,138")"
205 --tfbs) tfbs=$2;shift;; # Y or N 204 --tfbs) tfbs=$2;shift;; # Y or N
206 --mce) mce=$2;shift;; # Y or N 205 --mce) mce=$2;shift;; # Y or N
207 --cytoband) cytoband=$2;shift;; # Y or N 206 --cytoband) cytoband=$2;shift;; # Y or N
208 --segdup) segdup=$2;shift;; # Y or N 207 --segdup) segdup=$2;shift;; # Y or N
209 --dgv) dgv=$2;shift;; # Y or N 208 --dgv) dgv=$2;shift;; # Y or N
210 --gwas) gwas=$2;shift;; # Y or N 209 --gwas) gwas=$2;shift;; # Y or N
211 --ver1000g) ver1000g=$2;shift;; # Y or N 210 --ver1000g) ver1000g=$2;shift;; # Y or N
212 --cg46) cg46=$2;shift;; 211 --cg46) cg46=$2;shift;; # Y or N
213 --cg69) cg69=$2;shift;; 212 --cg69) cg69=$2;shift;; # Y or N
214 --impactscores) impactscores=$2;shift;; # Y or N 213 --impactscores) impactscores=$2;shift;; # Y or N
215 --newimpactscores) newimpactscores=$2;shift;; # Y or N 214 --newimpactscores) newimpactscores=$2;shift;; # Y or N
216 --otherinfo) otherinfo=$2;shift;; 215 --otherinfo) otherinfo=$2;shift;; # display additional columns?
217 --scriptsdir) scriptsdirtmp=$2;shift;; # Y or N 216 --scriptsdir) scriptsdirtmp=$2;shift;; # Y or N
218 --esp) esp=$2;shift;; # Y or N 217 --esp) esp=$2;shift;; # Y or N
219 --exac03) exac03=$2;shift;; 218 --exac03) exac03=$2;shift;; # Y or N
220 --gonl) gonl=$2;shift;; 219 --exac03nonpsych) exac03nonpsych=$2;shift;; # Y or N
221 --spidex) spidex=$2;shift;; 220 --exac03nontcga) exac03nontcga=$2;shift;; # Y or N
222 --gerp) gerp=$2;shift;; # Y or N 221 --dbscsnv11) dbscsnv11=$2;shift;;
223 --cosmic61) cosmic61=$2;shift;; # Y or N 222 --kaviar_20150923) kaviar_20150923=$2;shift;;
224 --cosmic63) cosmic63=$2;shift;; # Y or N 223 --hrcr1) hrcr1=$2;shift;;
225 --cosmic64) cosmic64=$2;shift;; # Y or N 224 --mitimpact2) mitimpact2=$2;shift;;
226 --cosmic65) cosmic65=$2;shift;; # Y or N 225 --mitimpact24) mitimpact24=$2;shift;;
227 --cosmic67) cosmic67=$2;shift;; # Y or N 226 --dbnsfp30a) dbnsfp30a=$2;shift;;
228 --cosmic68) cosmic68=$2;shift;; # Y or N 227 --gonl) gonl=$2;shift;; # Y or N
229 --nci60) nci60=$2;shift;; # Y or N 228 --spidex) spidex=$2;shift;; # Y or N
230 --clinvar) clinvar=$2;shift;; # Y or N 229 --gerp) gerp=$2;shift;; # Y or N
231 --filt_dbsnp) filt_dbsnp=$2;shift;; 230 --cosmic61) cosmic61=$2;shift;; # Y or N
232 --filt1000GALL) threshold_1000g_ALL=$2;shift;; #threshold value 231 --cosmic63) cosmic63=$2;shift;; # Y or N
233 --filt1000GAFR) threshold_1000g_AFR=$2;shift;; #threshold value 232 --cosmic64) cosmic64=$2;shift;; # Y or N
234 --filt1000GAMR) threshold_1000g_AMR=$2;shift;; #threshold value 233 --cosmic65) cosmic65=$2;shift;; # Y or N
235 --filt1000GASN) threshold_1000g_ASN=$2;shift;; #threshold value 234 --cosmic67) cosmic67=$2;shift;; # Y or N
236 --filt1000GEUR) threshold_1000g_EUR=$2;shift;; #threshold value 235 --cosmic68) cosmic68=$2;shift;; # Y or N
237 --filtESP6500ALL) threshold_ESP6500_ALL=$2;shift;; #threshold value 236 --nci60) nci60=$2;shift;; # Y or N
238 --filtESP6500EA) threshold_ESP6500_EA=$2;shift;; #threshold value 237 --clinvar) clinvar=$2;shift;; # Y or N
239 --filtESP6500AA) threshold_ESP6500_AA=$2;shift;; #threshold value 238 --filt_dbsnp) filt_dbsnp=$2;shift;; # Y or N
240 --filtcg46) threshold_cg46=$2;shift;; 239 --filt1000GALL) threshold_1000g_ALL=$2;shift;; #threshold value
241 --filtcg69) threshold_cg69=$2;shift;; 240 --filt1000GAFR) threshold_1000g_AFR=$2;shift;; #threshold value
242 --outall) outfile_all=$2;shift;; # file 241 --filt1000GAMR) threshold_1000g_AMR=$2;shift;; #threshold value
243 --outfilt) outfile_filt=$2;shift;; # file 242 --filt1000GASN) threshold_1000g_ASN=$2;shift;; #threshold value
244 --outinvalid) outfile_invalid=$2;shift;; #file 243 --filt1000GEUR) threshold_1000g_EUR=$2;shift;; #threshold value
245 --dorunannovar) dorunannovar=$2;shift;; #Y or N 244 --filtESP6500ALL) threshold_ESP6500_ALL=$2;shift;; #threshold value
246 -h) shift;; 245 --filtESP6500EA) threshold_ESP6500_EA=$2;shift;; #threshold value
247 --) shift;break;; 246 --filtESP6500AA) threshold_ESP6500_AA=$2;shift;; #threshold value
248 -*) usage;; 247 --filtcg46) threshold_cg46=$2;shift;;
249 *) break;; 248 --filtcg69) threshold_cg69=$2;shift;;
249 --outall) outfile_all=$2;shift;; # file
250 --outfilt) outfile_filt=$2;shift;; # file
251 --outinvalid) outfile_invalid=$2;shift;; #file
252 --dorunannovar) dorunannovar=$2;shift;; #Y or N
253 -h) shift;;
254 --) shift;break;;
255 -*) usage;;
256 *) break;;
250 esac 257 esac
251 shift 258 shift
252 done 259 done
253 260
254 #sometimes galaxy screws up these variables after updates, if comma-separated list, use only what is before first comma 261 #sometimes galaxy screws up these variables after updates, if comma-separated list, use only what is before first comma
257 scriptsdir=${scriptsdirtmp%,*} 264 scriptsdir=${scriptsdirtmp%,*}
258 265
259 266
260 if [ $test == "Y" ] 267 if [ $test == "Y" ]
261 then 268 then
262 echo "dorunannovar: $dorunannovar" 269 echo "dorunannovar: $dorunannovar"
263 echo "infile: $infile" 270 echo "infile: $infile"
264 echo "buildver: $buildver" 271 echo "buildver: $buildver"
265 echo "annovardb: $humandb" 272 echo "annovardb: $humandb"
266 echo "verdbnsp: $verdbsnp" 273 echo "verdbnsp: $verdbsnp"
267 echo "geneanno: $geneanno" 274 echo "geneanno: $geneanno"
268 echo "tfbs: $tfbs" 275 echo "tfbs: $tfbs"
269 echo "mce: $mce" 276 echo "mce: $mce"
270 echo "cytoband: $cytoband" 277 echo "cytoband: $cytoband"
271 echo "segdup: $segdup" 278 echo "segdup: $segdup"
272 echo "dgv: $dgv" 279 echo "dgv: $dgv"
273 echo "gwas: $gwas" 280 echo "gwas: $gwas"
274 echo "g1000: ${g1000}" 281 echo "g1000: ${g1000}"
275 echo "cg46: ${cg46}" 282 echo "cg46: ${cg46}"
276 echo "cg69: ${cg69}" 283 echo "cg69: ${cg69}"
277 echo "impactscores: $impactscores" 284 echo "impactscores: $impactscores"
278 echo "impactscores: $newimpactscores" 285 echo "impactscores: $newimpactscores"
279 echo "esp: $esp" 286 echo "esp: $esp"
280 echo "gerp: $gerp" 287 echo "gerp: $gerp"
281 echo "cosmic: $cosmic" 288 echo "cosmic: $cosmic"
282 echo "outfile: $outfile_all" 289 echo "outfile: $outfile_all"
283 echo "outinvalid: $outfile_invalid" 290 echo "outinvalid: $outfile_invalid"
284 echo "outfiltered: $outfile_filt" 291 echo "outfiltered: $outfile_filt"
285 echo "varfile: $varfile" 292 echo "varfile: $varfile"
286 echo "vcf" $vcf 293 echo "vcf" $vcf
287 echo "chrcol: $chrcol" 294 echo "chrcol: $chrcol"
288 echo "startcol: $startcol" 295 echo "startcol: $startcol"
289 echo "endcol: $endcol" 296 echo "endcol: $endcol"
290 echo "refcol: $refcol" 297 echo "refcol: $refcol"
291 echo "obscol: $obscol" 298 echo "obscol: $obscol"
292 echo "convertcoords: $convertcoords" 299 echo "convertcoords: $convertcoords"
293 echo "vartypecol: $vartypecol" 300 echo "vartypecol: $vartypecol"
294 echo "dofilter: $dofilter" 301 echo "dofilter: $dofilter"
295 echo "threshold_1000g_ALL : $threshold_1000g_ALL" 302 echo "threshold_1000g_ALL : $threshold_1000g_ALL"
296 echo "threshold_1000g_AFR : $threshold_1000g_AFR" 303 echo "threshold_1000g_AFR : $threshold_1000g_AFR"
297 echo "threshold_1000g_AMR : $threshold_1000g_AMR" 304 echo "threshold_1000g_AMR : $threshold_1000g_AMR"
298 echo "threshold_1000g_ASN : $threshold_1000g_ASN" 305 echo "threshold_1000g_ASN : $threshold_1000g_ASN"
299 echo "threshold_1000g_EUR : $threshold_1000g_EUR" 306 echo "threshold_1000g_EUR : $threshold_1000g_EUR"
300 echo "threshold_ESP6500_ALL: $threshold_ESP6500_ALL" 307 echo "threshold_ESP6500_ALL: $threshold_ESP6500_ALL"
301 echo "threshold_ESP6500_EA : $threshold_ESP6500_EA" 308 echo "threshold_ESP6500_EA : $threshold_ESP6500_EA"
302 echo "threshold_ESP6500_AA : $threshold_ESP6500_AA" 309 echo "threshold_ESP6500_AA : $threshold_ESP6500_AA"
303
304 fi 310 fi
305 311
306 312
307 313
308 ############################################ 314 ############################################
309 # 315 #
310 # Annotate Variants 316 # Annotate Variants
311 # 317 #
312 ############################################ 318 ############################################
313 319
314 #parse geneanno param 320 # parse geneanno param
315 refgene="N" 321 refgene="N"
316 knowngene="N" 322 knowngene="N"
317 ensgene="N" 323 ensgene="N"
318 324
319 if [[ $geneanno =~ "refSeq" ]] 325 if [[ $geneanno =~ "refSeq" ]]
320 then 326 then
321 refgene="Y" 327 refgene="Y"
322 fi 328 fi
323 if [[ $geneanno =~ "knowngene" ]] 329 if [[ $geneanno =~ "knowngene" ]]
324 then 330 then
325 knowngene="Y" 331 knowngene="Y"
326 fi 332 fi
327 if [[ $geneanno =~ "ensgene" ]] 333 if [[ $geneanno =~ "ensgene" ]]
328 then 334 then
329 ensgene="Y" 335 ensgene="Y"
330 fi 336 fi
331 if [ $hgvs == "N" ] 337 if [ $hgvs == "N" ]
332 then 338 then
333 hgvs="" 339 hgvs=""
334 fi 340 fi
335 341
336 #parse verdbsnp/1000g/esp strings 342 #parse verdbsnp/1000g/esp strings
337 dbsnpstr=${verdbsnp//,/ } 343 dbsnpstr=${verdbsnp//,/ }
338 filt_dbsnpstr=${filt_dbsnp//,/ } 344 filt_dbsnpstr=${filt_dbsnp//,/ }
339 g1000str=${ver1000g//,/ } 345 g1000str=${ver1000g//,/ }
340 espstr=${esp//,/ } 346 espstr=${esp//,/ }
341 347
342 if [ $test == "Y" ] 348 if [ $test == "Y" ]
343 then 349 then
344 echo "annotate dbsnp: $dbsnpstr" 350 echo "annotate dbsnp: $dbsnpstr"
345 echo "annotate esp: $espstr" 351 echo "annotate esp: $espstr"
346 echo "filter dbsnp: $filt_dbsnpstr" 352 echo "filter dbsnp: $filt_dbsnpstr"
347 fi 353 fi
348 354
349 mutationtaster="N" 355 mutationtaster="N"
350 avsift="N" 356 avsift="N"
351 lrt="N" 357 lrt="N"
354 ljbsift="N" 360 ljbsift="N"
355 361
356 #parse old impactscores param (obsolete) 362 #parse old impactscores param (obsolete)
357 if [[ $impactscores =~ "mutationtaster" ]] 363 if [[ $impactscores =~ "mutationtaster" ]]
358 then 364 then
359 mutationtaster="Y" 365 mutationtaster="Y"
360 fi 366 fi
361 if [[ $impactscores =~ "sift" ]] 367 if [[ $impactscores =~ "sift" ]]
362 then 368 then
363 avsift="Y" 369 avsift="Y"
364 fi 370 fi
365 if [[ $impactscores =~ "lrt" ]] 371 if [[ $impactscores =~ "lrt" ]]
366 then 372 then
367 lrt="Y" 373 lrt="Y"
368 fi 374 fi
369 if [[ $impactscores =~ "ljbsift" ]] 375 if [[ $impactscores =~ "ljbsift" ]]
370 then 376 then
371 ljbsift="Y" 377 ljbsift="Y"
372 fi 378 fi
373 if [[ $impactscores =~ "ljb2sift" ]] 379 if [[ $impactscores =~ "ljb2sift" ]]
374 then 380 then
375 ljb2sift="Y" 381 ljb2sift="Y"
376 fi 382 fi
377 if [[ $impactscores =~ "pp2" ]] 383 if [[ $impactscores =~ "pp2" ]]
378 then 384 then
379 polyphen2="Y" 385 polyphen2="Y"
380 fi 386 fi
381 if [[ $impactscores =~ "phylop" ]] 387 if [[ $impactscores =~ "phylop" ]]
382 then 388 then
383 phylop="Y" 389 phylop="Y"
384 fi 390 fi
385 391
386 if [[ $varfile == "Y" ]] 392 if [[ $varfile == "Y" ]]
387 then 393 then
388 convertcoords="Y" 394 convertcoords="Y"
389 fi 395 fi
390 396
391 #ljb refers to Liu, Jian, Boerwinkle paper in Human Mutation with pubmed ID 21520341. Cite this paper if you use the scores 397 #ljb refers to Liu, Jian, Boerwinkle paper in Human Mutation with pubmed ID 21520341. Cite this paper if you use the scores
392 398
393 ljb2_sift="N" 399 ljb2_sift="N"
403 409
404 # parse ljb2 newimpactscores param 410 # parse ljb2 newimpactscores param
405 # ljb2_sift, ljb2_pp2hdiv, ljb2_pp2hvar, ljb2_lrt, ljb2_mt, ljb2_ma, ljb2_fathmm, ljb2_gerp++, ljb2_phylop, ljb2_siphy 411 # ljb2_sift, ljb2_pp2hdiv, ljb2_pp2hvar, ljb2_lrt, ljb2_mt, ljb2_ma, ljb2_fathmm, ljb2_gerp++, ljb2_phylop, ljb2_siphy
406 if [[ $newimpactscores =~ "ljb2_sift" ]] 412 if [[ $newimpactscores =~ "ljb2_sift" ]]
407 then 413 then
408 ljb2_sift="Y" 414 ljb2_sift="Y"
409 fi 415 fi
410 if [[ $newimpactscores =~ "ljb2_pp2hdiv" ]] 416 if [[ $newimpactscores =~ "ljb2_pp2hdiv" ]]
411 then 417 then
412 ljb2_pp2hdiv="Y" 418 ljb2_pp2hdiv="Y"
413 fi 419 fi
414 if [[ $newimpactscores =~ "ljb2_pp2hvar" ]] 420 if [[ $newimpactscores =~ "ljb2_pp2hvar" ]]
415 then 421 then
416 ljb2_pp2hvar="Y" 422 ljb2_pp2hvar="Y"
417 fi 423 fi
418 if [[ $newimpactscores =~ "ljb2_lrt" ]] 424 if [[ $newimpactscores =~ "ljb2_lrt" ]]
419 then 425 then
420 ljb2_lrt="Y" 426 ljb2_lrt="Y"
421 fi 427 fi
422 if [[ $newimpactscores =~ "ljb2_mt" ]] 428 if [[ $newimpactscores =~ "ljb2_mt" ]]
423 then 429 then
424 ljb2_mt="Y" 430 ljb2_mt="Y"
425 fi 431 fi
426 if [[ $newimpactscores =~ "ljb2_ma" ]] 432 if [[ $newimpactscores =~ "ljb2_ma" ]]
427 then 433 then
428 ljb2_ma="Y" 434 ljb2_ma="Y"
429 fi 435 fi
430 if [[ $newimpactscores =~ "ljb2_fathmm" ]] 436 if [[ $newimpactscores =~ "ljb2_fathmm" ]]
431 then 437 then
432 ljb2_fathmm="Y" 438 ljb2_fathmm="Y"
433 fi 439 fi
434 if [[ $newimpactscores =~ "ljb2_gerp" ]] 440 if [[ $newimpactscores =~ "ljb2_gerp" ]]
435 then 441 then
436 ljb2_gerp="Y" 442 ljb2_gerp="Y"
437 fi 443 fi
438 if [[ $newimpactscores =~ "ljb2_phylop" ]] 444 if [[ $newimpactscores =~ "ljb2_phylop" ]]
439 then 445 then
440 ljb2_phylop="Y" 446 ljb2_phylop="Y"
441 fi 447 fi
442 if [[ $newimpactscores =~ "ljb2_siphy" ]] 448 if [[ $newimpactscores =~ "ljb2_siphy" ]]
443 then 449 then
444 ljb2_siphy="Y" 450 ljb2_siphy="Y"
445 fi 451 fi
446 452
447 if [ $otherinfo == "N" ] 453 if [ $otherinfo == "N" ]
448 then 454 then
449 otherinfo="" 455 otherinfo=""
450 fi 456 fi
451 457
452 458
453 #column header names we will be adding 459 #column header names we will be adding
454 # ESP 6500 460 # ESP 6500
474 #run annovar or filter only? 480 #run annovar or filter only?
475 if [ $dorunannovar == "Y" ] 481 if [ $dorunannovar == "Y" ]
476 then 482 then
477 483
478 484
479 #################################### 485 ####################################
480 # 486 #
481 # PREPARE INPUT FILE 487 # PREPARE INPUT FILE
482 # 488 #
483 #################################### 489 ####################################
484 490
485 echo "converting input file" 491 echo "converting input file"
486 vcfheader="" 492 vcfheader=""
487 if [ $vcf == "Y" ] #if CG varfile, convert 493 if [ $vcf == "Y" ] #if CG varfile, convert
488 then 494 then
489 # convert vcf to annovarinput 495 # convert vcf to annovarinput
490 $scriptsdir/convert2annovar.pl --format vcf4old --allallele --includeinfo --outfile annovarinput $infile 2>&1 496 $scriptsdir/convert2annovar.pl --format vcf4old --allallele --includeinfo --outfile annovarinput $infile 2>&1
491 497
492 #construct header line from vcf file 498 #construct header line from vcf file
493 cat $infile | grep "#CHROM" > additionalcols 499 cat $infile | grep "#CHROM" > additionalcols
494 sed -i 's/#//g' additionalcols 500 sed -i 's/#//g' additionalcols
495 vcfheader="\t`cat additionalcols`" 501 vcfheader="\t`cat additionalcols`"
496 echo "vcfheader:$vcfheader" 502 echo "vcfheader:$vcfheader"
497 echo -e "chromosome\tbegin\tend\treference\tobserved\t`cat additionalcols`" > originalfile 503 echo -e "chromosome\tbegin\tend\treference\tobserved\t`cat additionalcols`" > originalfile
498 cat annovarinput >> originalfile 504 cat annovarinput >> originalfile
499 505
500 chrcol=1 506 chrcol=1
501 startcol=2 507 startcol=2
502 endcol=3 508 endcol=3
503 refcol=4 509 refcol=4
504 obscol=5 510 obscol=5
505 511
506 512
507 elif [ $varfile == "Y" ] #if CG varfile, convert 513 elif [ $varfile == "Y" ] #if CG varfile, convert
508 then 514 then
509 # convert varfile 515 # convert varfile
510 $scriptsdir/convert2annovar.pl --format cg --outfile annovarinput $infile 2>&1 516 $scriptsdir/convert2annovar.pl --format cg --outfile annovarinput $infile 2>&1
511 echo -e "chromosome\tbegin\tend\treference\talleleSeq\tvarType\thaplotype" > originalfile 517 echo -e "chromosome\tbegin\tend\treference\talleleSeq\tvarType\thaplotype" > originalfile
512 cat annovarinput | cut -f1-6,8 >> originalfile 518 cat annovarinput | cut -f1-6,8 >> originalfile
513 cat annovarinput | cut -f1-5 >> annovarinput2 519 cat annovarinput | cut -f1-5 >> annovarinput2
514 mv annovarinput2 annovarinput 520 mv annovarinput2 annovarinput
515 521
516 chrcol=1 522 chrcol=1
517 startcol=2 523 startcol=2
518 endcol=3 524 endcol=3
519 refcol=4 525 refcol=4
520 obscol=5 526 obscol=5
521 527
522 elif [ $convertcoords == "Y" ] # if CG-coordinates, convert 528 elif [ $convertcoords == "Y" ] # if CG-coordinates, convert
523 then 529 then
524 #echo "rearranging columns and converting coordinates" 530 #echo "rearranging columns and converting coordinates"
525 awk 'BEGIN{ 531 awk 'BEGIN{
526 FS="\t"; 532 FS="\t";
527 OFS="\t"; 533 OFS="\t";
528 }{ 534 }{
529 if(FNR>1) { 535 if(FNR>1) {
530 gsub(/chr/,"",$"'"${chrcol}"'") 536 gsub(/chr/,"",$"'"${chrcol}"'")
531 if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" += 1 }; 537 if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" += 1 };
532 if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "-" }; 538 if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "-" };
533 if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" +=1; $"'"${obscol}"'" = "-" }; 539 if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" +=1; $"'"${obscol}"'" = "-" };
534 if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" += 1 }; 540 if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" += 1 };
535 541
536 printf("%s\t%s\t%s\t%s\t%s\n" ,$"'"${chrcol}"'",$"'"${startcol}"'",$"'"${endcol}"'",$"'"${refcol}"'",$"'"${obscol}"'"); 542 printf("%s\t%s\t%s\t%s\t%s\n" ,$"'"${chrcol}"'",$"'"${startcol}"'",$"'"${endcol}"'",$"'"${refcol}"'",$"'"${obscol}"'");
537 } 543 }
538 } 544 }
539 END{ 545 END{
540 }' $infile > annovarinput 546 }' $infile > annovarinput
541 547
542 #remove any "chr" prefixes 548 #remove any "chr" prefixes
543 #sed -i '2,$s/chr//g' annovarinput 549 #sed -i '2,$s/chr//g' annovarinput
544 550
545 awk 'BEGIN{ 551 awk 'BEGIN{
546 FS="\t"; 552 FS="\t";
547 OFS="\t"; 553 OFS="\t";
548 }{ 554 }{
549 555
550 if(FNR>=1) { 556 if(FNR>=1) {
551 gsub(/chr/,"",$"'"${chrcol}"'") 557 gsub(/chr/,"",$"'"${chrcol}"'")
552 if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" += 1 }; 558 if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" += 1 };
553 if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "-" }; 559 if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "-" };
554 if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" +=1; $"'"${obscol}"'" = "-" }; 560 if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" +=1; $"'"${obscol}"'" = "-" };
555 if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" += 1 }; 561 if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" += 1 };
556 562
557 print $0 563 print $0
558 } 564 }
559 } 565 }
560 END{ 566 END{
561 }' $infile > originalfile 567 }' $infile > originalfile
562 568
563 #remove any "chr" prefixes 569 #remove any "chr" prefixes
564 #sed -i '2,$s/chr//g' originalfile 570 #sed -i '2,$s/chr//g' originalfile
565 sed -i 's/omosome/chromosome/g' originalfile 571 sed -i 's/omosome/chromosome/g' originalfile
566 572
567 573
568 else #only rearrange columns if already 1-based coordinates 574 else #only rearrange columns if already 1-based coordinates
569 echo "rearranging columns " 575 echo "rearranging columns "
570 awk 'BEGIN{ 576 awk 'BEGIN{
571 FS="\t"; 577 FS="\t";
572 OFS="\t"; 578 OFS="\t";
573 }{ 579 }{
574 if(FNR>1) { 580 if(FNR>1) {
575 printf("%s\t%s\t%s\t%s\t%s\n",$"'"${chrcol}"'",$"'"${startcol}"'",$"'"${endcol}"'",$"'"${refcol}"'",$"'"${obscol}"'"); 581 printf("%s\t%s\t%s\t%s\t%s\n",$"'"${chrcol}"'",$"'"${startcol}"'",$"'"${endcol}"'",$"'"${refcol}"'",$"'"${obscol}"'");
576 } 582 }
577 } 583 }
578 END{ 584 END{
579 }' $infile > annovarinput 585 }' $infile > annovarinput
580 586
581 #remove any "chr" prefixes 587 #remove any "chr" prefixes
582 sed -i '2,$s/chr//g' annovarinput 588 sed -i '2,$s/chr//g' annovarinput
583 sed '2,$s/chr//g' $infile > originalfile 589 sed '2,$s/chr//g' $infile > originalfile
584 sed -i 's/omosome/chromosome/g' originalfile 590 sed -i 's/omosome/chromosome/g' originalfile
585 fi 591 fi
586 592
587 echo "...finished conversion" 593 echo "...finished conversion"
588 594
589 595
590 596
591 597 ####################################
592 #################################### 598 #
593 # 599 # RUN ANNOVAR COMMANDS
594 # RUN ANNOVAR COMMANDS 600 #
595 # 601 ####################################
596 #################################### 602
597 603
598 604
599 605 ###### gene-based annotation #######
600 ###### gene-based annotation ####### 606
601 607 # RefSeq Gene
602 # RefSeq Gene 608 if [ $refgene == "Y" ]
603 if [ $refgene == "Y" ] 609 then
604 then 610 echo -e "\nrefSeq gene"
605 echo -e "\nrefSeq gene" 611 $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype gene ${hgvs} annovarinput $humandb 2>&1
606 $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype gene ${hgvs} annovarinput $humandb 2>&1 612
607 613 annovarout=annovarinput.variant_function
608 annovarout=annovarinput.variant_function 614 sed -i '1i\RefSeq_Func\tRefSeq_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
609 sed -i '1i\RefSeq_Func\tRefSeq_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 615 joinresults originalfile $annovarout 3 4 5 6 7 B.RefSeq_Func,B.RefSeq_Gene
610 joinresults originalfile $annovarout 3 4 5 6 7 B.RefSeq_Func,B.RefSeq_Gene 616
611 617 annovarout=annovarinput.exonic_variant_function
612 annovarout=annovarinput.exonic_variant_function 618 sed -i '1i\linenum\tRefSeq_ExonicFunc\tRefSeq_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
613 sed -i '1i\linenum\tRefSeq_ExonicFunc\tRefSeq_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 619 joinresults originalfile $annovarout 4 5 6 7 8 B.RefSeq_ExonicFunc,B.RefSeq_AAChange
614 joinresults originalfile $annovarout 4 5 6 7 8 B.RefSeq_ExonicFunc,B.RefSeq_AAChange 620 fi
615 fi 621
616 622
617 623 # UCSC KnownGene
618 # UCSC KnownGene 624 if [ $knowngene == "Y" ]
619 if [ $knowngene == "Y" ] 625 then
620 then 626 echo -e "\nUCSC known gene"
621 echo -e "\nUCSC known gene" 627 $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype knowngene annovarinput $humandb 2>&1
622 $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype knowngene annovarinput $humandb 2>&1 628
623 629 annovarout=annovarinput.variant_function
624 annovarout=annovarinput.variant_function 630 sed -i '1i\UCSCKnownGene_Func\tUCSCKnownGene_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
625 sed -i '1i\UCSCKnownGene_Func\tUCSCKnownGene_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 631 joinresults originalfile $annovarout 3 4 5 6 7 B.UCSCKnownGene_Func,B.UCSCKnownGene_Gene
626 joinresults originalfile $annovarout 3 4 5 6 7 B.UCSCKnownGene_Func,B.UCSCKnownGene_Gene 632
627 633 annovarout=annovarinput.exonic_variant_function
628 annovarout=annovarinput.exonic_variant_function 634 sed -i '1i\linenum\tUCSCKnownGene_ExonicFunc\tUCSCKnownGene_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
629 sed -i '1i\linenum\tUCSCKnownGene_ExonicFunc\tUCSCKnownGene_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 635 joinresults originalfile $annovarout 4 5 6 7 8 B.UCSCKnownGene_ExonicFunc,B.UCSCKnownGene_AAChange
630 joinresults originalfile $annovarout 4 5 6 7 8 B.UCSCKnownGene_ExonicFunc,B.UCSCKnownGene_AAChange 636 fi
631 fi 637
632 638
633 639 # Emsembl Gene
634 # Emsembl Gene 640 if [ $ensgene == "Y" ]
635 if [ $ensgene == "Y" ] 641 then
636 then 642 echo -e "\nEnsembl gene"
637 echo -e "\nEnsembl gene" 643 $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype ensgene annovarinput $humandb 2>&1
638 $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype ensgene annovarinput $humandb 2>&1 644
639 645 annovarout=annovarinput.variant_function
640 annovarout=annovarinput.variant_function 646 sed -i '1i\EnsemblGene_Func\tEnsemblGene_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
641 sed -i '1i\EnsemblGene_Func\tEnsemblGene_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 647 joinresults originalfile $annovarout 3 4 5 6 7 B.EnsemblGene_Func,B.EnsemblGene_Gene
642 joinresults originalfile $annovarout 3 4 5 6 7 B.EnsemblGene_Func,B.EnsemblGene_Gene 648
643 649 annovarout=annovarinput.exonic_variant_function
644 annovarout=annovarinput.exonic_variant_function 650 sed -i '1i\linenum\tEnsemblGene_ExonicFunc\tEnsemblGene_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
645 sed -i '1i\linenum\tEnsemblGene_ExonicFunc\tEnsemblGene_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 651 joinresults originalfile $annovarout 4 5 6 7 8 B.EnsemblGene_ExonicFunc,B.EnsemblGene_AAChange
646 joinresults originalfile $annovarout 4 5 6 7 8 B.EnsemblGene_ExonicFunc,B.EnsemblGene_AAChange 652 fi
647 fi 653
648 654
649 655
650 656 ###### region-based annotation #######
651 ###### region-based annotation ####### 657
652 658
653 659 # Transcription Factor Binding Sites Annotation
654 # Transcription Factor Binding Sites Annotation 660 if [ $mce == "Y" ]
655 if [ $mce == "Y" ] 661 then
656 then 662 echo -e "\nMost Conserved Elements"
657 echo -e "\nMost Conserved Elements" 663
658 664 if [ $buildver == "hg18" ]
659 if [ $buildver == "hg18" ] 665 then
660 then 666 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype mce44way annovarinput $humandb 2>&1
661 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype mce44way annovarinput $humandb 2>&1 667 annovarout=annovarinput.${buildver}_phastConsElements44way
662 annovarout=annovarinput.${buildver}_phastConsElements44way 668 sed -i '1i\db\tphastConsElements44way\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
663 sed -i '1i\db\tphastConsElements44way\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 669 joinresults originalfile $annovarout 3 4 5 6 7 B.phastConsElements44way
664 joinresults originalfile $annovarout 3 4 5 6 7 B.phastConsElements44way 670
665 671 else #hg19
666 else #hg19 672 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype mce46way annovarinput $humandb 2>&1
667 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype mce46way annovarinput $humandb 2>&1 673 annovarout=annovarinput.${buildver}_phastConsElements46way
668 annovarout=annovarinput.${buildver}_phastConsElements46way 674 sed -i '1i\db\tphastConsElements46way\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
669 sed -i '1i\db\tphastConsElements46way\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 675 joinresults originalfile $annovarout 3 4 5 6 7 B.phastConsElements46way
670 joinresults originalfile $annovarout 3 4 5 6 7 B.phastConsElements46way 676 fi
671 fi 677
672 678 fi
673 fi 679
674 680
675 681
676 682 # Transcription Factor Binding Sites Annotation
677 # Transcription Factor Binding Sites Annotation 683 if [ $tfbs == "Y" ]
678 if [ $tfbs == "Y" ] 684 then
679 then 685 echo -e "\nTranscription Factor Binding Site Annotation"
680 echo -e "\nTranscription Factor Binding Site Annotation" 686 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype tfbs annovarinput $humandb 2>&1
681 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype tfbs annovarinput $humandb 2>&1 687
682 688 # arguments: originalfile, resultfile,chrcol,startcol,endcol,refcol,obscol,selectcolumns
683 # arguments: originalfile, resultfile,chrcol,startcol,endcol,refcol,obscol,selectcolumns 689 annovarout=annovarinput.${buildver}_tfbsConsSites
684 annovarout=annovarinput.${buildver}_tfbsConsSites 690 sed -i '1i\db\tTFBS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
685 sed -i '1i\db\tTFBS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 691 joinresults originalfile $annovarout 3 4 5 6 7 B.TFBS
686 joinresults originalfile $annovarout 3 4 5 6 7 B.TFBS 692 fi
687 fi 693
688 694
689 695
690 696 # Identify cytogenetic band for genetic variants
691 # Identify cytogenetic band for genetic variants 697 if [ $cytoband == "Y" ]
692 if [ $cytoband == "Y" ] 698 then
693 then 699 echo -e "\nCytogenic band Annotation"
694 echo -e "\nCytogenic band Annotation" 700 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype band annovarinput $humandb 2>&1
695 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype band annovarinput $humandb 2>&1 701
696 702 annovarout=annovarinput.${buildver}_cytoBand
697 annovarout=annovarinput.${buildver}_cytoBand 703 sed -i '1i\db\tBand\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
698 sed -i '1i\db\tBand\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 704 joinresults originalfile $annovarout 3 4 5 6 7 B.Band
699 joinresults originalfile $annovarout 3 4 5 6 7 B.Band 705 fi
700 fi 706
701 707
702 708 # Identify variants located in segmental duplications
703 # Identify variants located in segmental duplications 709 if [ $segdup == "Y" ]
704 if [ $segdup == "Y" ] 710 then
705 then 711 echo -e "\nSegmental Duplications Annotation"
706 echo -e "\nSegmental Duplications Annotation" 712 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype segdup annovarinput $humandb 2>&1
707 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype segdup annovarinput $humandb 2>&1 713
708 714 annovarout=annovarinput.${buildver}_genomicSuperDups
709 annovarout=annovarinput.${buildver}_genomicSuperDups 715 sed -i '1i\db\tSegDup\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
710 sed -i '1i\db\tSegDup\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 716 joinresults originalfile $annovarout 3 4 5 6 7 B.SegDup
711 joinresults originalfile $annovarout 3 4 5 6 7 B.SegDup 717 fi
712 fi 718
713 719
714 720
715 721 # Identify previously reported structural variants in DGV
716 # Identify previously reported structural variants in DGV 722 if [ $dgv == "Y" ]
717 if [ $dgv == "Y" ] 723 then
718 then 724 echo -e "\nDGV Annotation"
719 echo -e "\nDGV Annotation" 725 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype dgvMerged annovarinput $humandb 2>&1
720 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype dgvMerged annovarinput $humandb 2>&1 726
721 727 annovarout=annovarinput.${buildver}_dgvMerged
722 annovarout=annovarinput.${buildver}_dgvMerged 728 sed -i '1i\db\tDGV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
723 sed -i '1i\db\tDGV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 729 joinresults originalfile $annovarout 3 4 5 6 7 B.DGV
724 joinresults originalfile $annovarout 3 4 5 6 7 B.DGV 730 fi
725 fi 731
726 732
727 733 # Identify variants reported in previously published GWAS studies
728 # Identify variants reported in previously published GWAS studies 734 if [ $gwas == "Y" ]
729 if [ $gwas == "Y" ] 735 then
730 then 736 echo -e "\nGWAS Annotation"
731 echo -e "\nGWAS Annotation" 737 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype gwascatalog annovarinput $humandb 2>&1
732 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype gwascatalog annovarinput $humandb 2>&1 738
733 739 annovarout=annovarinput.${buildver}_gwasCatalog
734 annovarout=annovarinput.${buildver}_gwasCatalog 740 sed -i '1i\db\tGWAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
735 sed -i '1i\db\tGWAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 741 joinresults originalfile $annovarout 3 4 5 6 7 B.GWAS
736 joinresults originalfile $annovarout 3 4 5 6 7 B.GWAS 742 fi
737 fi 743
738 744
739 745
740 746
741 747 ###### filter-based annotation #######
742 ###### filter-based annotation ####### 748
743 749 #dbSNP
744 #dbSNP 750 for version in $dbsnpstr
745 for version in $dbsnpstr 751 do
746 do 752 if [ $version == "None" ]
747 if [ $version == "None" ] 753 then
748 then 754 break
749 break 755 fi
750 fi 756 echo -e "\ndbSNP region Annotation, version: $version"
751 echo -e "\ndbSNP region Annotation, version: $version" 757 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ${version} annovarinput $humandb 2>&1
752 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ${version} annovarinput $humandb 2>&1 758
753 759 columnname=${version}
754 columnname=${version} 760 if [[ $columnname == snp* ]]
755 if [[ $columnname == snp* ]] 761 then
756 then 762 columnname="db${version}"
757 columnname="db${version}" 763 fi
758 fi 764
759 765 annovarout=annovarinput.${buildver}_${version}_dropped
760 annovarout=annovarinput.${buildver}_${version}_dropped 766 sed -i '1i\db\t'${columnname}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
761 sed -i '1i\db\t'${columnname}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 767 joinresults originalfile $annovarout 3 4 5 6 7 B.${columnname}
762 joinresults originalfile $annovarout 3 4 5 6 7 B.${columnname} 768
763 769 done
764 770
765 done 771
766 772
767 773 #1000 Genomes
768 774
769 #1000 Genomes 775 if [ $ver1000g != "None" ]
770 776 then
771 if [ $ver1000g != "None" ] 777
772 then 778 for version in $g1000str
773 779 do
774 for version in $g1000str 780 #column headers
775 do 781 g1000_colheader_ALL="${version}_ALL"
776 #column headers 782 g1000_colheader_AFR="${version}_AFR"
777 g1000_colheader_ALL="${version}_ALL" 783 g1000_colheader_AMR="${version}_AMR"
778 g1000_colheader_AFR="${version}_AFR" 784 g1000_colheader_ASN="${version}_ASN"
779 g1000_colheader_AMR="${version}_AMR" 785 g1000_colheader_EUR="${version}_EUR"
780 g1000_colheader_ASN="${version}_ASN" 786 g1000_colheader_EAS="${version}_EAS"
781 g1000_colheader_EUR="${version}_EUR" 787 g1000_colheader_SAS="${version}_SAS"
782 g1000_colheader_EAS="${version}_EAS" 788 g1000_colheader_CEU="${version}_CEU"
783 g1000_colheader_SAS="${version}_SAS" 789 g1000_colheader_YRI="${version}_YRI"
784 g1000_colheader_CEU="${version}_CEU" 790 g1000_colheader_JPTCHB="${version}_JPTCHB"
785 g1000_colheader_YRI="${version}_YRI" 791
786 g1000_colheader_JPTCHB="${version}_JPTCHB" 792 doALL="N"
787 793 doAMR="N"
788 doALL="N" 794 doAFR="N"
789 doAMR="N" 795 doASN="N"
790 doAFR="N" 796 doEAS="N"
791 doASN="N" 797 doSAS="N"
792 doEAS="N" 798 doEUR="N"
793 doSAS="N" 799 doCEU="N"
794 doEUR="N" 800 doYRI="N"
795 doCEU="N" 801 doJPTCHB="N"
796 doYRI="N" 802
797 doJPTCHB="N" 803
798 804 if [ $version == "1000g2012apr" ]
799 805 then
800 if [ $version == "1000g2012apr" ] 806 fileID="2012_04"
801 then 807 doALL="Y"
802 fileID="2012_04" 808 if [ $buildver == "hg19" ]
803 doALL="Y" 809 then
804 if [ $buildver == "hg19" ] 810 doAMR="Y"
805 then 811 doAFR="Y"
806 doAMR="Y" 812 doASN="Y"
807 doAFR="Y" 813 doEUR="Y"
808 doASN="Y" 814 fi
809 doEUR="Y" 815 elif [ $version == "1000g2014oct" ]
810 fi 816 then
811 elif [ $version == "1000g2014oct" ] 817 fileID="2014_10"
812 then 818 doALL="Y"
813 fileID="2014_10" 819 doAMR="Y"
814 doALL="Y" 820 doAFR="Y"
815 doAMR="Y" 821 doEUR="Y"
816 doAFR="Y" 822 doEAS="Y"
817 doEUR="Y" 823 if [ $buildver == "hg19" ]
818 doEAS="Y" 824 then
819 if [ $buildver == "hg19" ] 825 doSAS="Y"
820 then 826 fi
821 doSAS="Y" 827
822 fi 828 elif [[ $version == "1000g2015aug" ]]
823 829 then
824 elif [[ $version == "1000g2015aug" ]] 830 fileID="2015_08"
825 then 831 doALL="Y"
826 fileID="2015_08" 832 doAMR="Y"
827 doALL="Y" 833 doAFR="Y"
828 doAMR="Y" 834 doEUR="Y"
829 doAFR="Y" 835 doEAS="Y"
830 doEUR="Y" 836 doSAS="Y"
831 doEAS="Y" 837
832 doSAS="Y" 838 elif [[ $version == "1000g2012feb" ]]
833 839 then
834 elif [[ $version == "1000g2012feb" ]] 840 fileID="2012_02"
835 then 841 doALL="Y"
836 fileID="2012_02" 842 elif [[ $version == "1000g2010nov" ]]
837 doALL="Y" 843 then
838 elif [[ $version == "1000g2010nov" ]] 844 fileID="2010_11"
839 then 845 doALL="Y"
840 fileID="2010_11" 846 elif [[ $version == "1000g2010jul" ]]
841 doALL="Y" 847 then
842 elif [[ $version == "1000g2010jul" ]] 848 fileID="2010_07"
843 then 849 doALL="N"
844 fileID="2010_07" 850 doCEU="Y"
845 doALL="N" 851 doYRI="Y"
846 doCEU="Y" 852 doJPTCHB="Y"
847 doYRI="Y" 853 else
848 doJPTCHB="Y" 854 echo "unrecognized 1000g version, skipping"
849 else 855 fi
850 echo "unrecognized 1000g version, skipping" 856
851 fi 857 #ALL
852 858 if [ $doALL == "Y" ]
853 #ALL 859 then
854 if [ $doALL == "Y" ] 860 echo -e "\n1000Genomes ALL"
855 then 861 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_all" annovarinput $humandb 2>&1
856 echo -e "\n1000Genomes ALL" 862
857 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_all" annovarinput $humandb 2>&1 863 annovarout=annovarinput.${buildver}_ALL.sites.${fileID}_dropped
858 864 sed -i '1i\db\t'$g1000_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
859 annovarout=annovarinput.${buildver}_ALL.sites.${fileID}_dropped 865 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_ALL
860 sed -i '1i\db\t'$g1000_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 866 fi
861 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_ALL 867
862 fi 868 # AFR
863 869 if [ $doAFR == "Y" ]
864 # AFR 870 then
865 if [ $doAFR == "Y" ] 871 echo -e "\n1000Genomes AFR"
866 then 872 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_afr" annovarinput $humandb 2>&1
867 echo -e "\n1000Genomes AFR" 873
868 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_afr" annovarinput $humandb 2>&1 874 annovarout=annovarinput.${buildver}_AFR.sites.${fileID}_dropped
869 875 sed -i '1i\db\t'$g1000_colheader_AFR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
870 annovarout=annovarinput.${buildver}_AFR.sites.${fileID}_dropped 876 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_AFR
871 sed -i '1i\db\t'$g1000_colheader_AFR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 877 fi
872 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_AFR 878
873 fi 879
874 880 # AMR
875 881 if [ $doAMR == "Y" ]
876 # AMR 882 then
877 if [ $doAMR == "Y" ] 883 echo -e "\n1000Genomes AMR"
878 then 884 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_amr" annovarinput $humandb 2>&1
879 echo -e "\n1000Genomes AMR" 885
880 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_amr" annovarinput $humandb 2>&1 886 annovarout=annovarinput.${buildver}_AMR.sites.${fileID}_dropped
881 887 sed -i '1i\db\t'$g1000_colheader_AMR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
882 annovarout=annovarinput.${buildver}_AMR.sites.${fileID}_dropped 888 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_AMR
883 sed -i '1i\db\t'$g1000_colheader_AMR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 889 fi
884 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_AMR 890
885 fi 891 # ASN
886 892 if [ $doASN == "Y" ]
887 # ASN 893 then
888 if [ $doASN == "Y" ] 894 echo -e "\n1000Genomes ASN"
889 then 895 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_asn" annovarinput $humandb 2>&1
890 echo -e "\n1000Genomes ASN" 896
891 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_asn" annovarinput $humandb 2>&1 897 annovarout=annovarinput.${buildver}_ASN.sites.${fileID}_dropped
892 898 sed -i '1i\db\t'$g1000_colheader_ASN'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
893 annovarout=annovarinput.${buildver}_ASN.sites.${fileID}_dropped 899 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_ASN
894 sed -i '1i\db\t'$g1000_colheader_ASN'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 900 fi
895 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_ASN 901
896 fi 902 # EAS
897 903 if [ $doEAS == "Y" ]
898 # EAS 904 then
899 if [ $doEAS == "Y" ] 905 echo -e "\n1000Genomes EAS"
900 then 906 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_eas" annovarinput $humandb 2>&1
901 echo -e "\n1000Genomes EAS" 907
902 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_eas" annovarinput $humandb 2>&1 908 annovarout=annovarinput.${buildver}_EAS.sites.${fileID}_dropped
903 909 sed -i '1i\db\t'$g1000_colheader_EAS'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
904 annovarout=annovarinput.${buildver}_EAS.sites.${fileID}_dropped 910 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_EAS
905 sed -i '1i\db\t'$g1000_colheader_EAS'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 911 fi
906 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_EAS 912
907 fi 913 # SAS
908 914 if [ $doSAS == "Y" ]
909 # SAS 915 then
910 if [ $doSAS == "Y" ] 916 echo -e "\n1000Genomes SAS"
911 then 917 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_sas" annovarinput $humandb 2>&1
912 echo -e "\n1000Genomes SAS" 918
913 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_sas" annovarinput $humandb 2>&1 919 annovarout=annovarinput.${buildver}_SAS.sites.${fileID}_dropped
914 920 sed -i '1i\db\t'$g1000_colheader_SAS'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
915 annovarout=annovarinput.${buildver}_SAS.sites.${fileID}_dropped 921 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_SAS
916 sed -i '1i\db\t'$g1000_colheader_SAS'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 922 fi
917 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_SAS 923
918 fi 924 # EUR
919 925 if [ $doEUR == "Y" ]
920 # EUR 926 then
921 if [ $doEUR == "Y" ] 927 echo -e "\n1000Genomes EUR"
922 then 928 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_eur" annovarinput $humandb 2>&1
923 echo -e "\n1000Genomes EUR" 929
924 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_eur" annovarinput $humandb 2>&1 930 annovarout=annovarinput.${buildver}_EUR.sites.${fileID}_dropped
925 931 sed -i '1i\db\t'$g1000_colheader_EUR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
926 annovarout=annovarinput.${buildver}_EUR.sites.${fileID}_dropped 932 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_EUR
927 sed -i '1i\db\t'$g1000_colheader_EUR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 933 fi
928 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_EUR 934
929 fi 935 # CEU
930 936 if [ $doCEU == "Y" ]
931 # CEU 937 then
932 if [ $doCEU == "Y" ] 938 echo -e "\n1000Genomes CEU"
933 then 939 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_ceu" annovarinput $humandb 2>&1
934 echo -e "\n1000Genomes CEU" 940
935 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_ceu" annovarinput $humandb 2>&1 941 annovarout=annovarinput.${buildver}_CEU.sites.${fileID}_dropped
936 942 sed -i '1i\db\t'$g1000_colheader_CEU'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
937 annovarout=annovarinput.${buildver}_CEU.sites.${fileID}_dropped 943 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_CEU
938 sed -i '1i\db\t'$g1000_colheader_CEU'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 944 fi
939 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_CEU 945
940 fi 946 # YRI
941 947 if [ $doYRI == "Y" ]
942 # YRI 948 then
943 if [ $doYRI == "Y" ] 949 echo -e "\n1000Genomes YRI"
944 then 950 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_yri" annovarinput $humandb 2>&1
945 echo -e "\n1000Genomes YRI" 951
946 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_yri" annovarinput $humandb 2>&1 952 annovarout=annovarinput.${buildver}_YRI.sites.${fileID}_dropped
947 953 sed -i '1i\db\t'$g1000_colheader_YRI'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
948 annovarout=annovarinput.${buildver}_YRI.sites.${fileID}_dropped 954 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_YRI
949 sed -i '1i\db\t'$g1000_colheader_YRI'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 955
950 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_YRI 956
951 957 fi
952 958
953 fi 959 #JPTCHB
954 960 if [ $doJPTCHB == "Y" ]
955 #JPTCHB 961 then
956 if [ $doJPTCHB == "Y" ] 962 echo -e "\n1000Genomes JPTCHB"
957 then 963 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_jptchb" annovarinput $humandb 2>&1
958 echo -e "\n1000Genomes JPTCHB" 964
959 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_jptchb" annovarinput $humandb 2>&1 965 annovarout=annovarinput.${buildver}_JPTCHB.sites.${fileID}_dropped
960 966 sed -i '1i\db\t'$g1000_colheader_JPTCHB'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
961 annovarout=annovarinput.${buildver}_JPTCHB.sites.${fileID}_dropped 967 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_JPTCHB
962 sed -i '1i\db\t'$g1000_colheader_JPTCHB'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 968 fi
963 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_JPTCHB 969
964 fi 970 done
965 971 fi
966 done 972
967 fi 973
968 974
969 975
970 976 #### IMPACT SCORE ANNOTATIONS
971 977
972 #### IMPACT SCORE ANNOTATIONS 978
973 979 if [ $ljb2_sift == "Y" ]
974 980 then
975 if [ $ljb2_sift == "Y" ] 981 echo -e "\nLJB2 SIFT Annotation"
976 then 982 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_sift annovarinput $humandb 2>&1
977 echo -e "\nLJB2 SIFT Annotation" 983
978 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_sift annovarinput $humandb 2>&1 984 annovarout=annovarinput.${buildver}_ljb2_sift_dropped
979 985 sed -i '1i\db\tLJB2_SIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
980 annovarout=annovarinput.${buildver}_ljb2_sift_dropped 986 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_SIFT
981 sed -i '1i\db\tLJB2_SIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 987 fi
982 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_SIFT 988
983 fi 989 if [ $ljb2_pp2hdiv == "Y" ]
984 990 then
985 if [ $ljb2_pp2hdiv == "Y" ] 991 echo -e "\nLJB2 pp2hdiv Annotation"
986 then 992 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hdiv annovarinput $humandb 2>&1
987 echo -e "\nLJB2 pp2hdiv Annotation" 993
988 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hdiv annovarinput $humandb 2>&1 994 annovarout=annovarinput.${buildver}_ljb2_pp2hdiv_dropped
989 995 sed -i '1i\db\tLJB2_PolyPhen2_HDIV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
990 annovarout=annovarinput.${buildver}_ljb2_pp2hdiv_dropped 996 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HDIV
991 sed -i '1i\db\tLJB2_PolyPhen2_HDIV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 997 fi
992 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HDIV 998
993 fi 999 if [ $ljb2_pp2hvar == "Y" ]
994 1000 then
995 if [ $ljb2_pp2hvar == "Y" ] 1001 echo -e "\nLJB2 pp2hvar Annotation"
996 then 1002 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hvar annovarinput $humandb 2>&1
997 echo -e "\nLJB2 pp2hvar Annotation" 1003
998 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hvar annovarinput $humandb 2>&1 1004 annovarout=annovarinput.${buildver}_ljb2_pp2hvar_dropped
999 1005
1000 annovarout=annovarinput.${buildver}_ljb2_pp2hvar_dropped 1006 head $annovarout
1001 1007 sed -i '1i\db\tLJB2_PolyPhen2_HVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1002 head $annovarout 1008 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HVAR
1003 sed -i '1i\db\tLJB2_PolyPhen2_HVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1009 fi
1004 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HVAR 1010
1005 fi 1011 if [ $ljb2_lrt == "Y" ]
1006 1012 then
1007 if [ $ljb2_lrt == "Y" ] 1013 echo -e "\nLJB2 LRT Annotation"
1008 then 1014 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_lrt annovarinput $humandb 2>&1
1009 echo -e "\nLJB2 LRT Annotation" 1015
1010 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_lrt annovarinput $humandb 2>&1 1016 annovarout=annovarinput.${buildver}_ljb2_lrt_dropped
1011 1017 sed -i '1i\db\tLJB2_LRT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1012 annovarout=annovarinput.${buildver}_ljb2_lrt_dropped 1018 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_LRT
1013 sed -i '1i\db\tLJB2_LRT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1019 fi
1014 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_LRT 1020
1015 fi 1021 if [ $ljb2_mt == "Y" ]
1016 1022 then
1017 if [ $ljb2_mt == "Y" ] 1023 echo -e "\nLJB2 mutationtaster Annotation"
1018 then 1024 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_mt annovarinput $humandb 2>&1
1019 echo -e "\nLJB2 mutationtaster Annotation" 1025
1020 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_mt annovarinput $humandb 2>&1 1026 annovarout=annovarinput.${buildver}_ljb2_mt_dropped
1021 1027 sed -i '1i\db\tLJB2_MutationTaster\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1022 annovarout=annovarinput.${buildver}_ljb2_mt_dropped 1028 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_MutationTaster
1023 sed -i '1i\db\tLJB2_MutationTaster\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1029 fi
1024 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_MutationTaster 1030
1025 fi 1031 if [ $ljb2_ma == "Y" ]
1026 1032 then
1027 if [ $ljb2_ma == "Y" ] 1033 echo -e "\nLJB2 mutationassessor Annotation"
1028 then 1034 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_ma annovarinput $humandb 2>&1
1029 echo -e "\nLJB2 mutationassessor Annotation" 1035
1030 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_ma annovarinput $humandb 2>&1 1036 annovarout=annovarinput.${buildver}_ljb2_ma_dropped
1031 1037 sed -i '1i\db\tLJB2_MutationAssessor\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1032 annovarout=annovarinput.${buildver}_ljb2_ma_dropped 1038 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_MutationAssessor
1033 sed -i '1i\db\tLJB2_MutationAssessor\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1039 fi
1034 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_MutationAssessor 1040
1035 fi 1041 if [ $ljb2_fathmm == "Y" ]
1036 1042 then
1037 if [ $ljb2_fathmm == "Y" ] 1043 echo -e "\nLJB2 FATHMM Annotation"
1038 then 1044 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_fathmm annovarinput $humandb 2>&1
1039 echo -e "\nLJB2 FATHMM Annotation" 1045
1040 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_fathmm annovarinput $humandb 2>&1 1046 annovarout=annovarinput.${buildver}_ljb2_fathmm_dropped
1041 1047 sed -i '1i\db\tLJB2_FATHMM\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1042 annovarout=annovarinput.${buildver}_ljb2_fathmm_dropped 1048 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_FATHMM
1043 sed -i '1i\db\tLJB2_FATHMM\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1049 fi
1044 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_FATHMM 1050
1045 fi 1051 if [ $ljb2_gerp == "Y" ]
1046 1052 then
1047 if [ $ljb2_gerp == "Y" ] 1053 echo -e "\nLJB2 GERP++ Annotation"
1048 then 1054 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_gerp++ annovarinput $humandb 2>&1
1049 echo -e "\nLJB2 GERP++ Annotation" 1055
1050 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_gerp++ annovarinput $humandb 2>&1 1056 annovarout=annovarinput.${buildver}_ljb2_gerp++_dropped
1051 1057 sed -i '1i\db\tLJB2_GERP++\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1052 annovarout=annovarinput.${buildver}_ljb2_gerp++_dropped 1058 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_GERP++
1053 sed -i '1i\db\tLJB2_GERP++\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1059 fi
1054 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_GERP++ 1060
1055 fi 1061 if [ $ljb2_phylop == "Y" ]
1056 1062 then
1057 if [ $ljb2_phylop == "Y" ] 1063 echo -e "\nLJB2 PhyloP Annotation"
1058 then 1064 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_phylop annovarinput $humandb 2>&1
1059 echo -e "\nLJB2 PhyloP Annotation" 1065
1060 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_phylop annovarinput $humandb 2>&1 1066 annovarout=annovarinput.${buildver}_ljb2_phylop_dropped
1061 1067 sed -i '1i\db\tLJB2_PhyloP\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1062 annovarout=annovarinput.${buildver}_ljb2_phylop_dropped 1068 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PhyloP
1063 sed -i '1i\db\tLJB2_PhyloP\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1069 fi
1064 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PhyloP 1070
1065 fi 1071 if [ $ljb2_siphy == "Y" ]
1066 1072 then
1067 if [ $ljb2_siphy == "Y" ] 1073 echo -e "\nLJB2 SiPhy Annotation"
1068 then 1074 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_siphy annovarinput $humandb 2>&1
1069 echo -e "\nLJB2 SiPhy Annotation" 1075
1070 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_siphy annovarinput $humandb 2>&1 1076 annovarout=annovarinput.${buildver}_ljb2_siphy_dropped
1071 1077 sed -i '1i\db\tLJB2_SiPhy\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1072 annovarout=annovarinput.${buildver}_ljb2_siphy_dropped 1078 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_SiPhy
1073 sed -i '1i\db\tLJB2_SiPhy\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1079 fi
1074 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_SiPhy 1080
1075 fi 1081
1076 1082
1077 1083 ### OLD IMPACT SCORE ANNOTATIONS
1078 1084
1079 ### OLD IMPACT SCORE ANNOTATIONS 1085 # SIFT
1080 1086 if [ $avsift == "Y" ]
1081 # SIFT 1087 then
1082 if [ $avsift == "Y" ] 1088 echo -e "\nSIFT Annotation"
1083 then 1089 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype avsift annovarinput $humandb 2>&1
1084 echo -e "\nSIFT Annotation" 1090
1085 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype avsift annovarinput $humandb 2>&1 1091 annovarout=annovarinput.${buildver}_avsift_dropped
1086 1092 sed -i '1i\db\tAVSIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1087 annovarout=annovarinput.${buildver}_avsift_dropped 1093 joinresults originalfile $annovarout 3 4 5 6 7 B.AVSIFT
1088 sed -i '1i\db\tAVSIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1094 fi
1089 joinresults originalfile $annovarout 3 4 5 6 7 B.AVSIFT 1095
1090 fi 1096 #ljb refers to Liu, Jian, Boerwinkle paper in Human Mutation with pubmed ID 21520341. Cite this paper if you use the scores
1091 1097 # SIFT2
1092 #ljb refers to Liu, Jian, Boerwinkle paper in Human Mutation with pubmed ID 21520341. Cite this paper if you use the scores 1098 if [ $ljbsift == "Y" ]
1093 # SIFT2 1099 then
1094 if [ $ljbsift == "Y" ] 1100 echo -e "\nLJB SIFT Annotation"
1095 then 1101 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_sift annovarinput $humandb 2>&1
1096 echo -e "\nLJB SIFT Annotation" 1102
1097 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_sift annovarinput $humandb 2>&1 1103 annovarout=annovarinput.${buildver}_ljb_sift_dropped
1098 1104 sed -i '1i\db\tLJB_SIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1099 annovarout=annovarinput.${buildver}_ljb_sift_dropped 1105 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB_SIFT
1100 sed -i '1i\db\tLJB_SIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1106 fi
1101 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB_SIFT 1107
1102 fi 1108
1103 1109 # PolyPhen2
1104 1110 if [ $polyphen2 == "Y" ]
1105 # PolyPhen2 1111 then
1106 if [ $polyphen2 == "Y" ] 1112 echo -e "\nPolyPhen Annotation"
1107 then 1113 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_pp2 annovarinput $humandb 2>&1
1108 echo -e "\nPolyPhen Annotation" 1114
1109 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_pp2 annovarinput $humandb 2>&1 1115 annovarout=annovarinput.${buildver}_ljb_pp2_dropped
1110 1116 sed -i '1i\db\tPolyPhen2\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1111 annovarout=annovarinput.${buildver}_ljb_pp2_dropped 1117 joinresults originalfile $annovarout 3 4 5 6 7 B.PolyPhen2
1112 sed -i '1i\db\tPolyPhen2\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1118 fi
1113 joinresults originalfile $annovarout 3 4 5 6 7 B.PolyPhen2 1119
1114 fi 1120
1115 1121 # MutationTaster
1116 1122 if [ $mutationtaster == "Y" ]
1117 # MutationTaster 1123 then
1118 if [ $mutationtaster == "Y" ] 1124 echo -e "\nMutationTaster Annotation"
1119 then 1125 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_mt annovarinput $humandb 2>&1
1120 echo -e "\nMutationTaster Annotation" 1126
1121 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_mt annovarinput $humandb 2>&1 1127 annovarout=annovarinput.${buildver}_ljb_mt_dropped
1122 1128 sed -i '1i\db\tMutationTaster\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1123 annovarout=annovarinput.${buildver}_ljb_mt_dropped 1129 joinresults originalfile $annovarout 3 4 5 6 7 B.MutationTaster
1124 sed -i '1i\db\tMutationTaster\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1130 fi
1125 joinresults originalfile $annovarout 3 4 5 6 7 B.MutationTaster 1131
1126 fi 1132
1127 1133 # LRT
1128 1134 if [ $lrt == "Y" ]
1129 # LRT 1135 then
1130 if [ $lrt == "Y" ] 1136 echo -e "\nLRT Annotation"
1131 then 1137 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_lrt annovarinput $humandb 2>&1
1132 echo -e "\nLRT Annotation" 1138
1133 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_lrt annovarinput $humandb 2>&1 1139 annovarout=annovarinput.${buildver}_ljb_lrt_dropped
1134 1140 sed -i '1i\db\tLikelihoodRatioTestScore\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1135 annovarout=annovarinput.${buildver}_ljb_lrt_dropped 1141 joinresults originalfile $annovarout 3 4 5 6 7 B.LikelihoodRatioTestScore
1136 sed -i '1i\db\tLikelihoodRatioTestScore\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1142 fi
1137 joinresults originalfile $annovarout 3 4 5 6 7 B.LikelihoodRatioTestScore 1143
1138 fi 1144 # PhyloP
1139 1145 if [ $phylop == "Y" ]
1140 # PhyloP 1146 then
1141 if [ $phylop == "Y" ] 1147 echo -e "\nPhyloP Annotation"
1142 then 1148 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_phylop annovarinput $humandb 2>&1
1143 echo -e "\nPhyloP Annotation" 1149
1144 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_phylop annovarinput $humandb 2>&1 1150 annovarout=annovarinput.${buildver}_ljb_phylop_dropped
1145 1151 sed -i '1i\db\tPhyloP\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1146 annovarout=annovarinput.${buildver}_ljb_phylop_dropped 1152 joinresults originalfile $annovarout 3 4 5 6 7 B.PhyloP
1147 sed -i '1i\db\tPhyloP\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1153 fi
1148 joinresults originalfile $annovarout 3 4 5 6 7 B.PhyloP 1154
1149 fi 1155
1150 1156 ### ESP Exome Variant Server
1151 1157 if [ $esp != "None" ]
1152 ### ESP Exome Variant Server 1158 then
1153 if [ $esp != "None" ] 1159 echo -e "\nESP Annotation"
1154 then 1160 for version in $espstr
1155 echo -e "\nESP Annotation" 1161 do
1156 for version in $espstr 1162 echo "version: $version"
1157 do 1163 # 6500si ALL
1158 echo "version: $version" 1164 if [ $version == "esp6500si_all" ]
1159 # 6500si ALL 1165 then
1160 if [ $version == "esp6500si_all" ] 1166 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_all annovarinput $humandb 2>&1
1161 then 1167
1162 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_all annovarinput $humandb 2>&1 1168 annovarout=annovarinput.${buildver}_esp6500si_all_dropped
1163 1169 sed -i '1i\db\t'$esp6500si_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1164 annovarout=annovarinput.${buildver}_esp6500si_all_dropped 1170 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_ALL
1165 sed -i '1i\db\t'$esp6500si_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1171 fi
1166 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_ALL 1172
1167 fi 1173
1168 1174 # 6500si European American
1169 1175 if [ $version == "esp6500si_ea" ]
1170 # 6500si European American 1176 then
1171 if [ $version == "esp6500si_ea" ] 1177 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_ea annovarinput $humandb 2>&1
1172 then 1178
1173 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_ea annovarinput $humandb 2>&1 1179 annovarout=annovarinput.${buildver}_esp6500si_ea_dropped
1174 1180 sed -i '1i\db\t'$esp6500si_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout
1175 annovarout=annovarinput.${buildver}_esp6500si_ea_dropped 1181 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_EA
1176 sed -i '1i\db\t'$esp6500si_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout 1182 fi
1177 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_EA 1183
1178 fi 1184 # 6500si African Americans
1179 1185 if [ $version == "esp6500si_aa" ]
1180 # 6500si African Americans 1186 then
1181 if [ $version == "esp6500si_aa" ] 1187 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_aa annovarinput $humandb 2>&1
1182 then 1188
1183 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_aa annovarinput $humandb 2>&1 1189 annovarout=annovarinput.${buildver}_esp6500si_aa_dropped
1184 1190 sed -i '1i\db\t'$esp6500si_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout
1185 annovarout=annovarinput.${buildver}_esp6500si_aa_dropped 1191 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_AA
1186 sed -i '1i\db\t'$esp6500si_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout 1192 fi
1187 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_AA 1193
1188 fi 1194
1189 1195 # 6500 ALL
1190 1196 if [ $version == "esp6500_all" ]
1191 # 6500 ALL 1197 then
1192 if [ $version == "esp6500_all" ] 1198 ls
1193 then 1199 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_all annovarinput $humandb 2>&1
1194 ls 1200
1195 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_all annovarinput $humandb 2>&1 1201 annovarout=annovarinput.${buildver}_esp6500_all_dropped
1196 1202 sed -i '1i\db\t'$esp6500_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout
1197 annovarout=annovarinput.${buildver}_esp6500_all_dropped 1203 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_ALL
1198 sed -i '1i\db\t'$esp6500_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout 1204 fi
1199 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_ALL 1205
1200 fi 1206
1201 1207 # 6500 European American
1202 1208 if [ $version == "esp6500_ea" ]
1203 # 6500 European American 1209 then
1204 if [ $version == "esp6500_ea" ] 1210 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_ea annovarinput $humandb 2>&1
1205 then 1211 annovarout=annovarinput.${buildver}_esp6500_ea_dropped
1206 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_ea annovarinput $humandb 2>&1 1212 sed -i '1i\db\t'$esp6500_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1207 annovarout=annovarinput.${buildver}_esp6500_ea_dropped 1213 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_EA
1208 sed -i '1i\db\t'$esp6500_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1214 fi
1209 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_EA 1215
1210 fi 1216 # 6500 African Americans
1211 1217 if [ $version == "esp6500_aa" ]
1212 # 6500 African Americans 1218 then
1213 if [ $version == "esp6500_aa" ] 1219 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_aa annovarinput $humandb 2>&1
1214 then 1220
1215 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_aa annovarinput $humandb 2>&1 1221 annovarout=annovarinput.${buildver}_esp6500_aa_dropped
1216 1222 sed -i '1i\db\t'$esp6500_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1217 annovarout=annovarinput.${buildver}_esp6500_aa_dropped 1223 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_AA
1218 sed -i '1i\db\t'$esp6500_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1224 fi
1219 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_AA 1225
1220 fi 1226
1221 1227 # 5400 ALL
1222 1228 if [ $version == "esp5400_all" ]
1223 # 5400 ALL 1229 then
1224 if [ $version == "esp5400_all" ] 1230 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_all annovarinput $humandb 2>&1
1225 then 1231
1226 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_all annovarinput $humandb 2>&1 1232 annovarout=annovarinput.${buildver}_esp5400_all_dropped
1227 1233 sed -i '1i\db\t'$esp5400_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1228 annovarout=annovarinput.${buildver}_esp5400_all_dropped 1234 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_ALL
1229 sed -i '1i\db\t'$esp5400_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1235 fi
1230 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_ALL 1236
1231 fi 1237
1232 1238 # 5400 European American
1233 1239 if [ $version == "esp5400_ea" ]
1234 # 5400 European American 1240 then
1235 if [ $version == "esp5400_ea" ] 1241 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_ea annovarinput $humandb 2>&1
1236 then 1242
1237 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_ea annovarinput $humandb 2>&1 1243 annovarout=annovarinput.${buildver}_esp5400_ea_dropped
1238 1244 sed -i '1i\db\t'$esp5400_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1239 annovarout=annovarinput.${buildver}_esp5400_ea_dropped 1245 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_EA
1240 sed -i '1i\db\t'$esp5400_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1246 fi
1241 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_EA 1247
1242 fi 1248 # 5400 African Americans
1243 1249 if [ $version == "esp5400_aa" ]
1244 # 5400 African Americans 1250 then
1245 if [ $version == "esp5400_aa" ] 1251 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_aa annovarinput $humandb 2>&1
1246 then 1252
1247 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_aa annovarinput $humandb 2>&1 1253 annovarout=annovarinput.${buildver}_esp5400_aa_dropped
1248 1254 sed -i '1i\db\t'$esp5400_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1249 annovarout=annovarinput.${buildver}_esp5400_aa_dropped 1255 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_AA
1250 sed -i '1i\db\t'$esp5400_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1256 fi
1251 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_AA 1257
1252 fi 1258 done
1253 1259 fi
1254 done 1260
1255 fi 1261 #ExAC-03 database
1256 1262 if [ $exac03 == "Y" ]
1257 1263 then
1258 #ExAC-03 database 1264 echo -e "\nExAC03 Annotation"
1259 if [ $exac03 == "Y" ] 1265 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver --otherinfo -dbtype exac03 annovarinput $humandb 2>&1
1260 then 1266
1261 echo -e "\nExAC03 Annotation" 1267 #annovarout=annovarinput.${buildver}_exac03_dropped
1262 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver --otherinfo -dbtype exac03 annovarinput $humandb 2>&1 1268
1263 1269 # split allelefrequency column into several columns, one per population
1264 #annovarout=annovarinput.${buildver}_exac03_dropped 1270 awk 'BEGIN{FS="\t"
1265 1271 OFS="\t"
1266 # split allelefrequency column into several columns, one per population 1272 }{
1267 awk 'BEGIN{FS="\t" 1273 gsub(",","\t",$2)
1268 OFS="\t" 1274 print $0
1269 }{ 1275 }END{}' annovarinput.${buildver}_exac03_dropped > $annovarout
1270 gsub(",","\t",$2) 1276
1271 print $0 1277 sed -i '1i\db\tExAC_ALL\tExAC_AFR\tExAC_AMR\tExAC_EAS\tExAC_FIN\tExAC_NFE\tExAC_OTH\tExAC_SAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1272 }END{}' annovarinput.${buildver}_exac03_dropped > $annovarout 1278 joinresults originalfile $annovarout 10 11 12 13 14 B.ExAC_ALL,B.ExAC_AFR,B.ExAC_AMR,B.ExAC_EAS,B.ExAC_FIN,B.ExAC_NFE,B.ExAC_OTH,B.ExAC_SAS
1273 1279 fi
1274 sed -i '1i\db\tExAC_Freq\tExAC_AFR\tExAC_AMR\tExAC_EAS\tExAC_FIN\tExAC_NFE\tExAC_OTH\tExAC_SAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1280
1275 joinresults originalfile $annovarout 10 11 12 13 14 B.ExAC_Freq,B.ExAC_AFR,B.ExAC_AMR,B.ExAC_EAS,B.ExAC_FIN,B.ExAC_NFE,B.ExAC_OTH,B.ExAC_SAS 1281 #ExAC-03 database
1276 fi 1282 if [ $exac03nonpsych == "Y" ]
1277 1283 then
1284 echo -e "\nExAC03 non-psych Annotation"
1285 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver --otherinfo -dbtype exac03nonpsych annovarinput $humandb 2>&1
1286
1287 #annovarout=annovarinput.${buildver}_exac03_dropped
1288
1289 # split allelefrequency column into several columns, one per population
1290 awk 'BEGIN{FS="\t"
1291 OFS="\t"
1292 }{
1293 gsub(",","\t",$2)
1294 print $0
1295 }END{}' annovarinput.${buildver}_exac03nonpsych_dropped > $annovarout
1296
1297 sed -i '1i\db\tExAC_non-phsych_ALL\tExAC_non-phsych_AFR\tExAC_non-phsych_AMR\tExAC_non-phsych_EAS\tExAC_non-phsych_FIN\tExAC_non-phsych_NFE\tExAC_non-phsych_OTH\tExAC_non-phsych_SAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1298 joinresults originalfile $annovarout 10 11 12 13 14 B.ExAC_non-phsych_ALL,B.ExAC_non-phsych_AFR,B.ExAC_non-phsych_AMR,B.ExAC_non-phsych_EAS,B.ExAC_non-phsych_FIN,B.ExAC_non-phsych_NFE,B.ExAC_non-phsych_OTH,B.ExAC_non-phsych_SAS
1299 fi
1300
1301 #ExAC-03 database
1302 if [ $exac03nontcga == "Y" ]
1303 then
1304 echo -e "\nExAC03 non-tcga Annotation"
1305 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver --otherinfo -dbtype exac03nontcga annovarinput $humandb 2>&1
1306
1307 #annovarout=annovarinput.${buildver}_exac03_dropped
1308
1309 # split allelefrequency column into several columns, one per population
1310 awk 'BEGIN{FS="\t"
1311 OFS="\t"
1312 }{
1313 gsub(",","\t",$2)
1314 print $0
1315 }END{}' annovarinput.${buildver}_exac03nontcga_dropped > $annovarout
1316
1317 sed -i '1i\db\tExAC_non-TCGA_ALL\tExAC_non-TCGA_AFR\tExAC_non-TCGA_AMR\tExAC_non-TCGA_EAS\tExAC_non-TCGA_FIN\tExAC_non-TCGA_NFE\tExAC_non-TCGA_OTH\tExAC_non-TCGA_SAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1318 joinresults originalfile $annovarout 10 11 12 13 14 B.ExAC_non-TCGA_ALL,B.ExAC_non-TCGA_AFR,B.ExAC_non-TCGA_AMR,B.ExAC_non-TCGA_EAS,B.ExAC_non-TCGA_FIN,B.ExAC_non-TCGA_NFE,B.ExAC_non-TCGA_OTH,B.ExAC_non-TCGA_SAS
1319 fi
1320
1321 #dbscSNV 1.1
1322 if [ $dbscsnv11 == "Y" ]
1323 then
1324 echo -e "\ndbscSNV11 Annotation"
1325 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver -dbtype dbscsnv11 annovarinput $humandb 2>&1
1326
1327 #annovarout="annovarinput.${buildver}_dbscsnv11_dropped"
1328 awk 'BEGIN{FS="\t"
1329 OFS="\t"
1330 }{
1331 gsub(",","\t",$2)
1332 print $0
1333 }END{}' annovarinput.${buildver}_dbscsnv11_dropped > $annovarout
1334
1335 sed -i '1i\db\tdbscSNV11_ADA_SCORE\tdbscSNV11_RF_SCORE\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1336 joinresults originalfile $annovarout 4 5 6 7 8 B.dbscSNV11_ADA_SCORE,B.dbscSNV11_RF_SCORE
1337 fi
1338
1339
1340 #kaviar_20150923
1341 if [ $kaviar_20150923 == "Y" ]
1342 then
1343 echo -e "\nkaviar_20150923 Annotation"
1344 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver -dbtype kaviar_20150923 annovarinput $humandb 2>&1
1345
1346 #annovarout="annovarinput.${buildver}_kaviar_20150923_dropped"
1347 awk 'BEGIN{FS="\t"
1348 OFS="\t"
1349 }{
1350 gsub(",","\t",$2)
1351 print $0
1352 }END{}' annovarinput.${buildver}_kaviar_20150923_dropped > $annovarout
1353
1354 sed -i '1i\db\tKaviar_AF\tKaviar_AC\tKaviar_AN\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1355 joinresults originalfile $annovarout 5 6 7 8 9 B.Kaviar_AF,B.Kaviar_AC,B.Kaviar_AN
1356 fi
1357
1358 #hrcr1
1359 if [ $hrcr1 == "Y" ]
1360 then
1361 echo -e "\nhrcr1 Annotation"
1362 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver -dbtype hrcr1 annovarinput $humandb 2>&1
1363
1364 #annovarout="annovarinput.${buildver}_dbscsnv11_dropped"
1365 awk 'BEGIN{FS="\t"
1366 OFS="\t"
1367 }{
1368 gsub(",","\t",$2)
1369 print $0
1370 }END{}' annovarinput.${buildver}_hrcr1_dropped > $annovarout
1371 sed -i '1i\db\tHRC_AF\tHRC_AC\tHRC_AN\tHRC_non1000G_AF\tHRC_non1000G_AC\tHRC_non1000g_AN\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1372 joinresults originalfile $annovarout 8 9 10 11 12 B.HRC_AF,B.HRC_AC,B.HRC_AN,B.HRC_non1000g_AF,B.HRC_non1000g_AC,B.HRC_non1000g_AN
1373 fi
1374
1375 #dbnsfp30a
1376 if [ $dbnsfp30a == "Y" ]
1377 then
1378 echo -e "\ndbnsfp30a Annotation"
1379 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver -dbtype dbnsfp30a annovarinput $humandb 2>&1
1380
1381 #annovarout="annovarinput.${buildver}_dbnsfp30a_dropped"
1382 awk 'BEGIN{FS="\t"
1383 OFS="\t"
1384 }{
1385 gsub(",","\t",$2)
1386 print $0
1387 }END{}' annovarinput.${buildver}_dbnsfp30a_dropped > $annovarout
1388
1389 sed -i '1i\db\tdbNSFP_SIFT_score\tdbNSFP_SIFT_pred\tdbNSFP_Polyphen2_HDIV_score\tdbNSFP_Polyphen2_HDIV_pred\tdbNSFP_Polyphen2_HVAR_score\tdbNSFP_Polyphen2_HVAR_pred\tdbNSFP_LRT_score\tdbNSFP_LRT_pred\tdbNSFP_MutationTaster_score\tdbNSFP_MutationTaster_pred\tdbNSFP_MutationAssessor_score\tdbNSFP_MutationAssessor_pred\tdbNSFP_FATHMM_score\tdbNSFP_FATHMM_pred\tdbNSFP_PROVEAN_score\tdbNSFP_PROVEAN_pred\tdbNSFP_VEST3_score\tdbNSFP_CADD_raw\tdbNSFP_CADD_phredDANN_score\tdbNSFP_fathmm-MKL_coding_score\tdbNSFP_fathmm-MKL_coding_pred\tdbNSFP_MetaSVM_score\tdbNSFP_MetaSVM_pred\tdbNSFP_MetaLR_score\tdbNSFP_MetaLR_pred\tdbNSFP_integrated_fitCons_score\tdbNSFP_integrated_confidence_value\tdbNSFP_GERP_RS\tdbNSFP_phyloP7way_vertebrate\tdbNSFP_phyloP20way_mammalian\tdbNSFP_phastCons7way_vertebrate\tdbNSFP_phastCons20way_mammalian\tdbNSFP_SiPhy_29way_logOdds\tdbNSFP_unknown\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1390 joinresults originalfile $annovarout 36 37 38 39 40 B.dbNSFP_SIFT_score,B.dbNSFP_SIFT_pred,B.dbNSFP_Polyphen2_HDIV_score,B.dbNSFP_Polyphen2_HDIV_pred,B.dbNSFP_Polyphen2_HVAR_score,B.dbNSFP_Polyphen2_HVAR_pred,B.dbNSFP_LRT_score,B.dbNSFP_LRT_pred,B.dbNSFP_MutationTaster_score,B.dbNSFP_MutationTaster_pred,B.dbNSFP_MutationAssessor_score,B.dbNSFP_MutationAssessor_pred,B.dbNSFP_FATHMM_score,B.dbNSFP_FATHMM_pred,B.dbNSFP_PROVEAN_score,B.dbNSFP_PROVEAN_pred,B.dbNSFP_VEST3_score,B.dbNSFP_CADD_raw,B.dbNSFP_CADD_phredDANN_score,B.dbNSFP_fathmm-MKL_coding_score,B.dbNSFP_fathmm-MKL_coding_pred,B.dbNSFP_MetaSVM_score,B.dbNSFP_MetaSVM_pred,B.dbNSFP_MetaLR_score,B.dbNSFP_MetaLR_pred,B.dbNSFP_integrated_fitCons_score,B.dbNSFP_integrated_confidence_value,B.dbNSFP_GERP_RS,B.dbNSFP_phyloP7way_vertebrate,B.dbNSFP_phyloP20way_mammalian,B.dbNSFP_phastCons7way_vertebrate,B.dbNSFP_phastCons20way_mammalian,B.dbNSFP_SiPhy_29way_logOdds
1391
1392 fi
1393
1394 #mitimpact2
1395 if [ $mitimpact2 == "Y" ]
1396 then
1397 echo -e "\nmitimpact2 Annotation"
1398 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver -dbtype mitimpact2 annovarinput $humandb 2>&1
1399
1400 #annovarout="annovarinput.${buildver}_mitimpact2_dropped"
1401 awk 'BEGIN{FS="\t"
1402 OFS="\t"
1403 }{
1404 gsub(",","\t",$2)
1405 print $0
1406 }END{}' annovarinput.${buildver}_mitimpact2_dropped > $annovarout
1407
1408 sed -i '1i\db\tMITimpact2_Gene_symbol\tMITimpact2_OXPHOS_Complex\tMITimpact2_Ensembl_Gene_ID\tMITimpact2_Ensembl_Protein_ID\tMITimpact2_Uniprot_Name\tMITimpact2_Uniprot_ID\tMITimpact2_NCBI_Gene_ID\tMITimpact2_NCBI_Protein_ID\tMITimpact2_Gene_pos\tMITimpact2_AA_pos\tMITimpact2_AA_sub\tMITimpact2_Codon_sub\tMITimpact2_dbSNP_ID\tMITimpact2_PhyloP_46V\tMITimpact2_PhastCons_46V\tMITimpact2_PhyloP_100V\tMITimpact2_PhastCons_100V\tMITimpact2_SiteVar\tMITimpact2_PolyPhen2_prediction\tMITimpact2_PolyPhen2_score\tMITimpact2_SIFT_prediction\tMITimpact2_SIFT_score\tMITimpact2_FatHmm_prediction\tMITimpact2_FatHmm_score\tMITimpact2_PROVEAN_prediction\tMITimpact2_PROVEAN_score\tMITimpact2_MutAss_prediction\tMITimpact2_MutAss_score\tMITimpact2_EFIN_Swiss_Prot_Score\tMITimpact2_EFIN_Swiss_Prot_Prediction\tMITimpact2_EFIN_HumDiv_Score\tMITimpact2_EFIN_HumDiv_Prediction\tMITimpact2_CADD_score\tMITimpact2_CADD_Phred_score\tMITimpact2_CADD_prediction\tMITimpact2_Carol_prediction\tMITimpact2_Carol_score\tMITimpact2_Condel_score\tMITimpact2_Condel_pred\tMITimpact2_COVEC_WMV\tMITimpact2_COVEC_WMV_prediction\tMITimpact2_PolyPhen2_score_transf\tMITimpact2_PolyPhen2_pred_transf\tMITimpact2_SIFT_score_transf\tMITimpact2_SIFT_pred_transf\tMITimpact2_MutAss_score_transf\tMITimpact2_MutAss_pred_transf\tMITimpact2_Perc_coevo_Sites\tMITimpact2_Mean_MI_score\tMITimpact2_COSMIC_ID\tMITimpact2_Tumor_site\tMITimpact2_Examined_samples\tMITimpact2_Mutation_frequency\tMITimpact2_US\tMITimpact2_Status\tMITimpact2_Associated_disease\tMITimpact2_Presence_in_TD\tMITimpact2_Class_predicted\tMITimpact2_Prob_N\tMITimpact2_Prob_P\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1409 joinresults originalfile $annovarout 62 63 64 65 66 B.MITimpact2_Gene_symbol,B.MITimpact2_OXPHOS_Complex,B.MITimpact2_Ensembl_Gene_ID,B.MITimpact2_Ensembl_Protein_ID,B.MITimpact2_Uniprot_Name,B.MITimpact2_Uniprot_ID,B.MITimpact2_NCBI_Gene_ID,B.MITimpact2_NCBI_Protein_ID,B.MITimpact2_Gene_pos,B.MITimpact2_AA_pos,B.MITimpact2_AA_sub,B.MITimpact2_Codon_sub,B.MITimpact2_dbSNP_ID,B.MITimpact2_PhyloP_46V,B.MITimpact2_PhastCons_46V,B.MITimpact2_PhyloP_100V,B.MITimpact2_PhastCons_100V,B.MITimpact2_SiteVar,B.MITimpact2_PolyPhen2_prediction,B.MITimpact2_PolyPhen2_score,B.MITimpact2_SIFT_prediction,B.MITimpact2_SIFT_score,B.MITimpact2_FatHmm_prediction,B.MITimpact2_FatHmm_score,B.MITimpact2_PROVEAN_prediction,B.MITimpact2_PROVEAN_score,B.MITimpact2_MutAss_prediction,B.MITimpact2_MutAss_score,B.MITimpact2_EFIN_Swiss_Prot_Score,B.MITimpact2_EFIN_Swiss_Prot_Prediction,B.MITimpact2_EFIN_HumDiv_Score,B.MITimpact2_EFIN_HumDiv_Prediction,B.MITimpact2_CADD_score,B.MITimpact2_CADD_Phred_score,B.MITimpact2_CADD_prediction,B.MITimpact2_Carol_prediction,B.MITimpact2_Carol_score,B.MITimpact2_Condel_score,B.MITimpact2_Condel_pred,B.MITimpact2_COVEC_WMV,B.MITimpact2_COVEC_WMV_prediction,B.MITimpact2_PolyPhen2_score_transf,B.MITimpact2_PolyPhen2_pred_transf,B.MITimpact2_SIFT_score_transf,B.MITimpact2_SIFT_pred_transf,B.MITimpact2_MutAss_score_transf,B.MITimpact2_MutAss_pred_transf,B.MITimpact2_Perc_coevo_Sites,B.MITimpact2_Mean_MI_score,B.MITimpact2_COSMIC_ID,B.MITimpact2_Tumor_site,B.MITimpact2_Examined_samples,B.MITimpact2_Mutation_frequency,B.MITimpact2_US,B.MITimpact2_Status,B.MITimpact2_Associated_disease,B.MITimpact2_Presence_in_TD,B.MITimpact2_Class_predicted,B.MITimpact2_Prob_N,B.MITimpact2_Prob_P
1410 fi
1411
1412 #mitimpact24
1413 if [ $mitimpact24 == "Y" ]
1414 then
1415 echo -e "\nmitimpact24 Annotation"
1416 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver -dbtype mitimpact24 annovarinput $humandb 2>&1
1417
1418 #annovarout="annovarinput.${buildver}_mitimpact24_dropped"
1419 awk 'BEGIN{FS="\t"
1420 OFS="\t"
1421 }{
1422 gsub(",","\t",$24)
1423 print $0
1424 }END{}' annovarinput.${buildver}_mitimpact24_dropped > $annovarout
1425
1426 sed -i '1i\db\tMITimpact24_Gene_symbol\tMITimpact24_OXPHOS_Complex\tMITimpact24_Ensembl_Gene_ID\tMITimpact24_Ensembl_Protein_ID\tMITimpact24_Uniprot_Name\tMITimpact24_Uniprot_ID\tMITimpact24_NCBI_Gene_ID\tMITimpact24_NCBI_Protein_ID\tMITimpact24_Gene_pos\tMITimpact24_AA_pos\tMITimpact24_AA_sub\tMITimpact24_Codon_sub\tMITimpact24_dbSNP_ID\tMITimpact24_PhyloP_46V\tMITimpact24_PhastCons_46V\tMITimpact24_PhyloP_100V\tMITimpact24_PhastCons_100V\tMITimpact24_SiteVar\tMITimpact24_PolyPhen24_prediction\tMITimpact24_PolyPhen24_score\tMITimpact24_SIFT_prediction\tMITimpact24_SIFT_score\tMITimpact24_FatHmm_prediction\tMITimpact24_FatHmm_score\tMITimpact24_PROVEAN_prediction\tMITimpact24_PROVEAN_score\tMITimpact24_MutAss_prediction\tMITimpact24_MutAss_score\tMITimpact24_EFIN_Swiss_Prot_Score\tMITimpact24_EFIN_Swiss_Prot_Prediction\tMITimpact24_EFIN_HumDiv_Score\tMITimpact24_EFIN_HumDiv_Prediction\tMITimpact24_CADD_score\tMITimpact24_CADD_Phred_score\tMITimpact24_CADD_prediction\tMITimpact24_Carol_prediction\tMITimpact24_Carol_score\tMITimpact24_Condel_score\tMITimpact24_Condel_pred\tMITimpact24_COVEC_WMV\tMITimpact24_COVEC_WMV_prediction\tMITimpact24_PolyPhen24_score_transf\tMITimpact24_PolyPhen24_pred_transf\tMITimpact24_SIFT_score_transf\tMITimpact24_SIFT_pred_transf\tMITimpact24_MutAss_score_transf\tMITimpact24_MutAss_pred_transf\tMITimpact24_Perc_coevo_Sites\tMITimpact24_Mean_MI_score\tMITimpact24_COSMIC_ID\tMITimpact24_Tumor_site\tMITimpact24_Examined_samples\tMITimpact24_Mutation_frequency\tMITimpact24_US\tMITimpact24_Status\tMITimpact24_Associated_disease\tMITimpact24_Presence_in_TD\tMITimpact24_Class_predicted\tMITimpact24_Prob_N\tMITimpact24_Prob_P\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1427 joinresults originalfile $annovarout 62 63 64 65 66 B.MITimpact24_Gene_symbol,B.MITimpact24_OXPHOS_Complex,B.MITimpact24_Ensembl_Gene_ID,B.MITimpact24_Ensembl_Protein_ID,B.MITimpact24_Uniprot_Name,B.MITimpact24_Uniprot_ID,B.MITimpact24_NCBI_Gene_ID,B.MITimpact24_NCBI_Protein_ID,B.MITimpact24_Gene_pos,B.MITimpact24_AA_pos,B.MITimpact24_AA_sub,B.MITimpact24_Codon_sub,B.MITimpact24_dbSNP_ID,B.MITimpact24_PhyloP_46V,B.MITimpact24_PhastCons_46V,B.MITimpact24_PhyloP_100V,B.MITimpact24_PhastCons_100V,B.MITimpact24_SiteVar,B.MITimpact24_PolyPhen24_prediction,B.MITimpact24_PolyPhen24_score,B.MITimpact24_SIFT_prediction,B.MITimpact24_SIFT_score,B.MITimpact24_FatHmm_prediction,B.MITimpact24_FatHmm_score,B.MITimpact24_PROVEAN_prediction,B.MITimpact24_PROVEAN_score,B.MITimpact24_MutAss_prediction,B.MITimpact24_MutAss_score,B.MITimpact24_EFIN_Swiss_Prot_Score,B.MITimpact24_EFIN_Swiss_Prot_Prediction,B.MITimpact24_EFIN_HumDiv_Score,B.MITimpact24_EFIN_HumDiv_Prediction,B.MITimpact24_CADD_score,B.MITimpact24_CADD_Phred_score,B.MITimpact24_CADD_prediction,B.MITimpact24_Carol_prediction,B.MITimpact24_Carol_score,B.MITimpact24_Condel_score,B.MITimpact24_Condel_pred,B.MITimpact24_COVEC_WMV,B.MITimpact24_COVEC_WMV_prediction,B.MITimpact24_PolyPhen24_score_transf,B.MITimpact24_PolyPhen24_pred_transf,B.MITimpact24_SIFT_score_transf,B.MITimpact24_SIFT_pred_transf,B.MITimpact24_MutAss_score_transf,B.MITimpact24_MutAss_pred_transf,B.MITimpact24_Perc_coevo_Sites,B.MITimpact24_Mean_MI_score,B.MITimpact24_COSMIC_ID,B.MITimpact24_Tumor_site,B.MITimpact24_Examined_samples,B.MITimpact24_Mutation_frequency,B.MITimpact24_US,B.MITimpact24_Status,B.MITimpact24_Associated_disease,B.MITimpact24_Presence_in_TD,B.MITimpact24_Class_predicted,B.MITimpact24_Prob_N,B.MITimpact24_Prob_P
1428 fi
1429
1430
1278 #GoNL database 1431 #GoNL database
1279 if [ $gonl == "Y" ] 1432 if [ $gonl == "Y" ]
1280 then 1433 then
1281 1434
1282 if [ $buildver == "hg19" ] 1435 if [ $buildver == "hg19" ]
1283 then 1436 then
1284 echo -e "\nGoNL Annotation" 1437 echo -e "\nGoNL Annotation"
1285 $scriptsdir/annotate_variation.pl --filter --buildver $buildver --otherinfo -dbtype generic -genericdbfile ${buildver}_gonl.txt annovarinput $humandb 2>&1 1438 $scriptsdir/annotate_variation.pl --filter --buildver $buildver --otherinfo -dbtype generic -genericdbfile ${buildver}_gonl.txt annovarinput $humandb 2>&1
1286 1439
1287 ls 1440 ls
1288 annovarout=annovarinput.${buildver}_generic_dropped 1441 annovarout=annovarinput.${buildver}_generic_dropped
1289 1442
1290 head $annovarout 1443 head $annovarout
1291 1444
1292 sed -i '1i\db\tGoNL\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1445 sed -i '1i\db\tGoNL\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1293 joinresults originalfile $annovarout 3 4 5 6 7 B.GoNL 1446 joinresults originalfile $annovarout 3 4 5 6 7 B.GoNL
1294 1447
1295 fi 1448 fi
1296 1449
1297 fi 1450 fi
1298 1451
1299 #SPIDEX database 1452 #SPIDEX database
1300 if [ $spidex == "Y" ] 1453 if [ $spidex == "Y" ]
1301 then 1454 then
1302 1455
1303 if [ $buildver == "hg19" ] 1456 if [ $buildver == "hg19" ]
1304 then 1457 then
1305 echo -e "\nSPIDEX Annotation" 1458 echo -e "\nSPIDEX Annotation"
1306 $scriptsdir/annotate_variation.pl --filter --buildver $buildver --otherinfo -dbtype spidex annovarinput $humandb 2>&1 1459 $scriptsdir/annotate_variation.pl --filter --buildver $buildver --otherinfo -dbtype spidex annovarinput $humandb 2>&1
1307 1460
1308 # split allelefrequency column into several columns, one per population 1461 # split allelefrequency column into several columns, one per population
1309 awk 'BEGIN{FS="\t" 1462 awk 'BEGIN{FS="\t"
1310 OFS="\t" 1463 OFS="\t"
1311 }{ 1464 }{
1312 gsub(",","\t",$2) 1465 gsub(",","\t",$2)
1313 print $0 1466 print $0
1314 }END{}' annovarinput.${buildver}_spidex_dropped > $annovarout 1467 }END{}' annovarinput.${buildver}_spidex_dropped > $annovarout
1315 1468
1316 #annovarout=annovarinput.${buildver}_spidex_dropped 1469 #annovarout=annovarinput.${buildver}_spidex_dropped
1317 #head $annovarout 1470 #head $annovarout
1318 1471
1319 sed -i '1i\db\tSPIDEX_dpsi_max_tissue\tSPIDEX_dpsi_zscore\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1472 sed -i '1i\db\tSPIDEX_dpsi_max_tissue\tSPIDEX_dpsi_zscore\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1320 joinresults originalfile $annovarout 4 5 6 7 8 B.SPIDEX_dpsi_max_tissue,B.SPIDEX_dpsi_zscore 1473 joinresults originalfile $annovarout 4 5 6 7 8 B.SPIDEX_dpsi_max_tissue,B.SPIDEX_dpsi_zscore
1321
1322 fi 1474 fi
1323 1475
1324 fi 1476 fi
1325 1477
1326 1478 #GERP++
1327 #GERP++ 1479 if [ $gerp == "Y" ]
1328 if [ $gerp == "Y" ] 1480 then
1329 then 1481 echo -e "\nGERP++ Annotation"
1330 echo -e "\nGERP++ Annotation" 1482 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype gerp++gt2 annovarinput $humandb 2>&1
1331 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype gerp++gt2 annovarinput $humandb 2>&1 1483
1332 1484 annovarout="annovarinput.${buildver}_gerp++gt2_dropped"
1333 annovarout="annovarinput.${buildver}_gerp++gt2_dropped" 1485 sed -i '1i\db\tGERP++\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1334 sed -i '1i\db\tGERP++\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1486 joinresults originalfile $annovarout 3 4 5 6 7 B.GERP++
1335 joinresults originalfile $annovarout 3 4 5 6 7 B.GERP++ 1487 fi
1336 fi 1488
1337 1489
1338 1490 #COSMIC
1339 #COSMIC 1491 if [[ $cosmic61 == "Y" && $buildver == "hg19" ]]
1340 if [[ $cosmic61 == "Y" && $buildver == "hg19" ]] 1492 then
1341 then 1493 echo -e "\nCOSMIC61 Annotation"
1342 echo -e "\nCOSMIC61 Annotation" 1494 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic61 annovarinput $humandb 2>&1
1343 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic61 annovarinput $humandb 2>&1 1495
1344 1496 annovarout="annovarinput.${buildver}_cosmic61_dropped"
1345 annovarout="annovarinput.${buildver}_cosmic61_dropped" 1497 sed -i '1i\db\tCOSMIC61\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1346 sed -i '1i\db\tCOSMIC61\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1498 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC61
1347 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC61 1499
1348 1500 fi
1349 fi 1501
1350 1502 if [[ $cosmic63 == "Y" && $buildver == "hg19" ]]
1351 if [[ $cosmic63 == "Y" && $buildver == "hg19" ]] 1503 then
1352 then 1504 echo -e "\nCOSMIC63 Annotation"
1353 echo -e "\nCOSMIC63 Annotation" 1505 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic63 annovarinput $humandb 2>&1
1354 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic63 annovarinput $humandb 2>&1 1506
1355 1507 annovarout="annovarinput.${buildver}_cosmic63_dropped"
1356 annovarout="annovarinput.${buildver}_cosmic63_dropped" 1508 sed -i '1i\db\tCOSMIC63\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1357 sed -i '1i\db\tCOSMIC63\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1509 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC63
1358 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC63 1510
1359 1511 fi
1360 fi 1512
1361 1513 if [[ $cosmic64 == "Y" && $buildver == "hg19" ]]
1362 if [[ $cosmic64 == "Y" && $buildver == "hg19" ]] 1514 then
1363 then 1515 echo -e "\nCOSMIC64 Annotation"
1364 echo -e "\nCOSMIC64 Annotation" 1516 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic64 annovarinput $humandb 2>&1
1365 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic64 annovarinput $humandb 2>&1 1517
1366 1518 annovarout="annovarinput.${buildver}_cosmic64_dropped"
1367 annovarout="annovarinput.${buildver}_cosmic64_dropped" 1519 sed -i '1i\db\tCOSMIC64\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1368 sed -i '1i\db\tCOSMIC64\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1520 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC64
1369 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC64 1521
1370 1522 fi
1371 fi 1523
1372 1524 if [[ $cosmic65 == "Y" && $buildver == "hg19" ]]
1373 if [[ $cosmic65 == "Y" && $buildver == "hg19" ]] 1525 then
1374 then 1526 echo -e "\nCOSMIC65 Annotation"
1375 echo -e "\nCOSMIC65 Annotation" 1527 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic65 annovarinput $humandb 2>&1
1376 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic65 annovarinput $humandb 2>&1 1528
1377 1529 annovarout="annovarinput.${buildver}_cosmic65_dropped"
1378 annovarout="annovarinput.${buildver}_cosmic65_dropped" 1530 sed -i '1i\db\tCOSMIC65\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1379 sed -i '1i\db\tCOSMIC65\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1531 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC65
1380 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC65 1532
1381 1533 fi
1382 fi 1534
1383 1535 if [[ $cosmic67 == "Y" && $buildver == "hg19" ]]
1384 if [[ $cosmic67 == "Y" && $buildver == "hg19" ]] 1536 then
1385 then 1537 echo -e "\nCOSMIC67 Annotation"
1386 echo -e "\nCOSMIC67 Annotation" 1538 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic67 annovarinput $humandb 2>&1
1387 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic67 annovarinput $humandb 2>&1 1539
1388 1540 annovarout="annovarinput.${buildver}_cosmic67_dropped"
1389 annovarout="annovarinput.${buildver}_cosmic67_dropped" 1541 sed -i '1i\db\tCOSMIC67\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1390 sed -i '1i\db\tCOSMIC67\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1542 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC67
1391 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC67 1543
1392 1544 fi
1393 fi 1545
1394 1546 if [[ $cosmic68 == "Y" && $buildver == "hg19" ]]
1395 if [[ $cosmic68 == "Y" && $buildver == "hg19" ]] 1547 then
1396 then 1548 echo -e "\nCOSMIC68 Annotation"
1397 echo -e "\nCOSMIC68 Annotation" 1549 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic68 annovarinput $humandb 2>&1
1398 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic68 annovarinput $humandb 2>&1 1550
1399 1551 annovarout="annovarinput.${buildver}_cosmic68_dropped"
1400 annovarout="annovarinput.${buildver}_cosmic68_dropped" 1552 sed -i '1i\db\tCOSMIC68\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1401 sed -i '1i\db\tCOSMIC68\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1553 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC68
1402 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC68 1554
1403 1555 fi
1404 fi 1556
1405 1557 if [[ $cosmic70 == "Y" && $buildver == "hg19" ]]
1406 if [[ $cosmic70 == "Y" && $buildver == "hg19" ]] 1558 then
1407 then 1559 echo -e "\nCOSMIC70 Annotation"
1408 echo -e "\nCOSMIC70 Annotation" 1560 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic70 annovarinput $humandb 2>&1
1409 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic70 annovarinput $humandb 2>&1 1561
1410 1562 annovarout="annovarinput.${buildver}_cosmic70_dropped"
1411 annovarout="annovarinput.${buildver}_cosmic70_dropped" 1563 sed -i '1i\db\tCOSMIC70\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1412 sed -i '1i\db\tCOSMIC70\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1564 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC70
1413 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC70 1565
1414 1566 fi
1415 fi 1567
1416 1568 if [[ $clinvar == "Y" && $buildver == "hg19" ]]
1417 if [[ $clinvar == "Y" && $buildver == "hg19" ]] 1569 then
1418 then 1570 echo -e "\nCLINVAR Annotation"
1419 echo -e "\nCLINVAR Annotation" 1571 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype clinvar_20140211 annovarinput $humandb 2>&1
1420 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype clinvar_20140211 annovarinput $humandb 2>&1 1572
1421 1573 annovarout="annovarinput.${buildver}_clinvar_20140211_dropped"
1422 annovarout="annovarinput.${buildver}_clinvar_20140211_dropped" 1574 sed -i '1i\db\tCLINVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1423 sed -i '1i\db\tCLINVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1575 joinresults originalfile $annovarout 3 4 5 6 7 B.CLINVAR
1424 joinresults originalfile $annovarout 3 4 5 6 7 B.CLINVAR 1576
1425 1577 fi
1426 fi 1578
1427 1579 if [[ $nci60 == "Y" && $buildver == "hg19" ]]
1428 if [[ $nci60 == "Y" && $buildver == "hg19" ]] 1580 then
1429 then 1581 echo -e "\nNCI60 Annotation"
1430 echo -e "\nNCI60 Annotation" 1582 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype nci60 annovarinput $humandb 2>&1
1431 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype nci60 annovarinput $humandb 2>&1 1583
1432 1584 annovarout="annovarinput.${buildver}_nci60_dropped"
1433 annovarout="annovarinput.${buildver}_nci60_dropped" 1585 sed -i '1i\db\tNCI60\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1434 sed -i '1i\db\tNCI60\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1586 joinresults originalfile $annovarout 3 4 5 6 7 B.NCI60
1435 joinresults originalfile $annovarout 3 4 5 6 7 B.NCI60 1587
1436 1588 fi
1437 fi 1589
1438 1590 #cg46
1439 #cg46 1591 if [[ $cg46 == "Y" ]]
1440 if [[ $cg46 == "Y" ]] 1592 then
1441 then 1593 echo -e "\nCG 46 genomes Annotation"
1442 echo -e "\nCG 46 genomes Annotation" 1594 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg46 annovarinput $humandb 2>&1
1443 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg46 annovarinput $humandb 2>&1 1595
1444 1596 annovarout="annovarinput.${buildver}_cg46_dropped"
1445 annovarout="annovarinput.${buildver}_cg46_dropped" 1597 sed -i '1i\db\t'${cg46_colheader}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1446 sed -i '1i\db\t'${cg46_colheader}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1598 joinresults originalfile $annovarout 3 4 5 6 7 B.${cg46_colheader}
1447 joinresults originalfile $annovarout 3 4 5 6 7 B.${cg46_colheader} 1599
1448 1600 fi
1449 fi 1601
1450 1602
1451 1603 #cg69
1452 #cg69 1604 if [[ $cg69 == "Y" ]]
1453 if [[ $cg69 == "Y" ]] 1605 then
1454 then 1606 echo -e "\nCG 69 genomes Annotation"
1455 echo -e "\nCG 69 genomes Annotation" 1607 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg69 annovarinput $humandb 2>&1
1456 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg69 annovarinput $humandb 2>&1 1608
1457 1609 annovarout="annovarinput.${buildver}_cg69_dropped"
1458 annovarout="annovarinput.${buildver}_cg69_dropped" 1610 sed -i '1i\db\t'${cg69_colheader}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1459 sed -i '1i\db\t'${cg69_colheader}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 1611 joinresults originalfile $annovarout 3 4 5 6 7 B.${cg69_colheader}
1460 joinresults originalfile $annovarout 3 4 5 6 7 B.${cg69_colheader} 1612
1461 1613 fi
1462 fi 1614
1463 1615
1464 1616
1465 1617 if [ $convertcoords == "Y" ]
1466 if [ $convertcoords == "Y" ] 1618 then
1467 then 1619 echo "converting back coordinates"
1468 echo "converting back coordinates" 1620 awk 'BEGIN{
1469 awk 'BEGIN{ 1621 FS="\t";
1470 FS="\t"; 1622 OFS="\t";
1471 OFS="\t"; 1623 }{
1472 }{ 1624 if (FNR==1)
1473 if (FNR==1) 1625 print $0
1474 print $0 1626 if(FNR>1) {
1475 if(FNR>1) { 1627 $"'"${chrcol}"'" = "chr"$"'"${chrcol}"'"
1476 $"'"${chrcol}"'" = "chr"$"'"${chrcol}"'" 1628 if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" -= 1 };
1477 if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" -= 1 }; 1629 if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "" };
1478 if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "" }; 1630 if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" -=1; $"'"${obscol}"'" = "" };
1479 if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" -=1; $"'"${obscol}"'" = "" }; 1631 if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" -= 1 };
1480 if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" -= 1 }; 1632 print $0
1481 print $0 1633 }
1482 1634 }
1483 } 1635 END{
1484 } 1636 }' originalfile > originalfile_coords
1485 END{ 1637 else
1486 }' originalfile > originalfile_coords 1638 mv originalfile originalfile_coords
1487 else 1639 fi
1488 mv originalfile originalfile_coords 1640
1489 fi 1641 #restore "chr" prefix?
1490 1642
1491 #restore "chr" prefix? 1643 #move to outputfile
1492 1644 if [ ! -s annovarinput.invalid_input ]
1493 #move to outputfile 1645 then
1494 if [ ! -s annovarinput.invalid_input ] 1646 echo "Congrats, your input file contained no invalid lines!" > annovarinput.invalid_input
1495 then 1647 fi
1496 echo "Congrats, your input file contained no invalid lines!" > annovarinput.invalid_input 1648
1497 fi 1649 cp originalfile_coords $outfile_all
1498 1650 cp annovarinput.invalid_input $outfile_invalid 2>&1
1499 cp originalfile_coords $outfile_all 1651
1500 cp annovarinput.invalid_input $outfile_invalid 2>&1 1652 sed -i 's/chrchr/chr/g' $outfile_all
1501 1653 sed -i 's/chrchr/chr/g' $outfile_invalid
1502 sed -i 's/chrchr/chr/g' $outfile_all 1654
1503 sed -i 's/chrchr/chr/g' $outfile_invalid
1504
1505 fi #if $dorunannovar 1655 fi #if $dorunannovar
1506 1656
1507 1657
1508 1658
1509 1659