Mercurial > repos > saskia-hiltemann > annovar
comparison tools/annovar/annovar.sh @ 8:d6af2a78617f draft
added support for databases upto 4 march 2016
author | saskia-hiltemann |
---|---|
date | Fri, 04 Mar 2016 11:32:50 -0500 |
parents | 69e2067a120d |
children | f7ff063c738e |
comparison
equal
deleted
inserted
replaced
7:69e2067a120d | 8:d6af2a78617f |
---|---|
2 | 2 |
3 test="N" | 3 test="N" |
4 dofilter="N" | 4 dofilter="N" |
5 | 5 |
6 ######################### | 6 ######################### |
7 # DEFINE SOME | 7 # DEFINE SOME |
8 # FUNCTIONS | 8 # FUNCTIONS |
9 ######################### | 9 ######################### |
10 | 10 |
11 function usage(){ | 11 function usage(){ |
12 echo "usage: $0 todo" | 12 echo "usage: $0 todo" |
13 } | 13 } |
14 | 14 |
15 function runfilter(){ | 15 function runfilter(){ |
16 ifile=$1 | 16 ifile=$1 |
17 columnname=$2 | 17 columnname=$2 |
18 threshold=$3 | 18 threshold=$3 |
19 | 19 |
20 if [[ $threshold == "-1" ]] | 20 if [[ $threshold == "-1" ]] |
21 then | 21 then |
22 echo "not filtering" | 22 echo "not filtering" |
23 return | 23 return |
24 fi | 24 fi |
25 | 25 |
26 echo "filtering: $columnname, $threshold" | 26 echo "filtering: $columnname, $threshold" |
27 cat $ifile | 27 cat $ifile |
28 | 28 |
29 #get column number corresponding to column header | 29 #get column number corresponding to column header |
30 column=`awk 'BEGIN{ | 30 column=`awk 'BEGIN{ |
31 FS="\t"; | 31 FS="\t"; |
32 col=-1 | 32 col=-1 |
33 }{ | 33 }{ |
34 if(FNR==1){ | 34 if(FNR==1){ |
35 for(i=1;i<=NF;i++){ | 35 for(i=1;i<=NF;i++){ |
36 if($i == "'"${columnname}"'") | 36 if($i == "'"${columnname}"'") |
37 col=i | 37 col=i |
38 } | 38 } |
39 print col | 39 print col |
40 } | 40 } |
41 }' $ifile ` | 41 }' $ifile ` |
42 | 42 |
43 if [ $column == -1 ] | 43 if [ $column == -1 ] |
44 then | 44 then |
45 echo "no such column, exiting" | 45 echo "no such column, exiting" |
46 return | 46 return |
47 fi | 47 fi |
48 | 48 |
49 #perform filtering using the threshold | 49 #perform filtering using the threshold |
50 awk 'BEGIN{ | 50 awk 'BEGIN{ |
51 FS="\t"; | 51 FS="\t"; |
52 OFS="\t"; | 52 OFS="\t"; |
53 }{ | 53 }{ |
54 if(FNR==1) | 54 if(FNR==1) |
55 print $0; | 55 print $0; |
56 if(FNR>1){ | 56 if(FNR>1){ |
57 if( $"'"${column}"'" == "" ) # empty column, then print | 57 if( $"'"${column}"'" == "" ) # empty column, then print |
58 print $0 | 58 print $0 |
59 else if ("'"${threshold}"'" == "text"){} #if set to text dont check threshold | 59 else if ("'"${threshold}"'" == "text"){} #if set to text dont check threshold |
60 | 60 |
61 else if ($"'"${column}"'" < "'"${threshold}"'") #else do check it | 61 else if ($"'"${column}"'" < "'"${threshold}"'") #else do check it |
62 print $0 | 62 print $0 |
63 } | 63 } |
64 }' $ifile > tmpfile | 64 }' $ifile > tmpfile |
65 | 65 |
66 mv tmpfile $ifile | 66 mv tmpfile $ifile |
67 } | 67 } |
68 | 68 |
69 # arguments: originalfile,resultfile,chrcol,startcol,endcol,refcol,obscol,addcols | 69 # arguments: originalfile,resultfile,chrcol,startcol,endcol,refcol,obscol,addcols |
70 function joinresults(){ | 70 function joinresults(){ |
71 ofile=$1 | 71 ofile=$1 |
72 rfile=$2 | 72 rfile=$2 |
73 colchr=$3 | 73 colchr=$3 |
74 colstart=$4 | 74 colstart=$4 |
75 colend=$5 | 75 colend=$5 |
76 colref=$6 | 76 colref=$6 |
77 colobs=$7 | 77 colobs=$7 |
78 addcols=$8 #e.g. "B.col1,B.col2" | 78 addcols=$8 #e.g. "B.col1,B.col2" |
79 | 79 |
80 test="N" | 80 test="N" |
81 | 81 |
82 # echo "joining result with original file" | 82 # echo "joining result with original file" |
83 if [ $test == "Y" ] | 83 if [ $test == "Y" ] |
84 then | 84 then |
85 echo "ofile: $ofile" | 85 echo "ofile: $ofile" |
86 head $ofile | 86 head $ofile |
87 echo "rfile: $rfile" | 87 echo "rfile: $rfile" |
88 head $rfile | 88 head $rfile |
89 fi | 89 fi |
90 numlines=`wc $rfile | cut -d" " -f2` | 90 numlines=`wc $rfile | cut -d" " -f2` |
91 | 91 |
92 # if empty results file, just add header fields | 92 # if empty results file, just add header fields |
93 if [[ ! -s $rfile ]] | 93 if [[ ! -s $rfile ]] |
94 then | 94 then |
95 dummycol=${addcols:2} | 95 dummycol=${addcols:2} |
96 outputcol=${dummycol//",B."/" "} | 96 outputcol=${dummycol//",B."/" "} |
97 numcommas=`echo "$addcols" | grep -o "," | wc -l` | 97 numcommas=`echo "$addcols" | grep -o "," | wc -l` |
98 | 98 |
99 awk 'BEGIN{FS="\t";OFS="\t"}{ | 99 awk 'BEGIN{FS="\t";OFS="\t"}{ |
100 if(FNR==1) | 100 if(FNR==1) |
101 print $0,"'"$outputcol"'"; | 101 print $0,"'"$outputcol"'"; |
102 else{ | 102 else{ |
103 printf $0 | 103 printf $0 |
104 for(i=0;i<="'"$numcommas"'"+1;i++) | 104 for(i=0;i<="'"$numcommas"'"+1;i++) |
105 printf "\t" | 105 printf "\t" |
106 printf "\n" | 106 printf "\n" |
107 } | 107 } |
108 }END{}' $ofile > tempofile | 108 }END{}' $ofile > tempofile |
109 | 109 |
110 mv tempofile $ofile | 110 mv tempofile $ofile |
111 return | 111 return |
112 fi | 112 fi |
113 | 113 |
114 | 114 |
115 #get input file column names for cgatools join | 115 #get input file column names for cgatools join |
116 col_chr_name=`head -1 $rfile | cut -f${colchr}` | 116 col_chr_name=`head -1 $rfile | cut -f${colchr}` |
117 col_start_name=`head -1 $rfile | cut -f${colstart}` | 117 col_start_name=`head -1 $rfile | cut -f${colstart}` |
118 col_end_name=`head -1 $rfile | cut -f${colend}` | 118 col_end_name=`head -1 $rfile | cut -f${colend}` |
119 col_ref_name=`head -1 $rfile | cut -f${colref}` | 119 col_ref_name=`head -1 $rfile | cut -f${colref}` |
120 col_obs_name=`head -1 $rfile | cut -f${colobs}` | 120 col_obs_name=`head -1 $rfile | cut -f${colobs}` |
121 | 121 |
122 #get annotation file column names for cgatools join | 122 #get annotation file column names for cgatools join |
123 chr_name=`head -1 $ofile | cut -f${chrcol}` | 123 chr_name=`head -1 $ofile | cut -f${chrcol}` |
124 start_name=`head -1 $ofile | cut -f${startcol}` | 124 start_name=`head -1 $ofile | cut -f${startcol}` |
125 end_name=`head -1 $ofile | cut -f${endcol}` | 125 end_name=`head -1 $ofile | cut -f${endcol}` |
126 ref_name=`head -1 $ofile | cut -f${refcol}` | 126 ref_name=`head -1 $ofile | cut -f${refcol}` |
127 obs_name=`head -1 $ofile | cut -f${obscol}` | 127 obs_name=`head -1 $ofile | cut -f${obscol}` |
128 | 128 |
129 if [ $test == "Y" ] | 129 if [ $test == "Y" ] |
130 then | 130 then |
131 echo "input file" | 131 echo "input file" |
132 echo "chr col: $col_chr_name ($colchr)" | 132 echo "chr col: $col_chr_name ($colchr)" |
133 echo "start col: $col_start_name ($colstart)" | 133 echo "start col: $col_start_name ($colstart)" |
134 echo "end col: $col_end_name ($colend)" | 134 echo "end col: $col_end_name ($colend)" |
135 echo "ref col: $col_ref_name ($colref)" | 135 echo "ref col: $col_ref_name ($colref)" |
136 echo "obs col: $col_obs_name ($colobs)" | 136 echo "obs col: $col_obs_name ($colobs)" |
137 echo "" | 137 echo "" |
138 echo "annotation file" | 138 echo "annotation file" |
139 echo "chr col: $chr_name ($chrcol)" | 139 echo "chr col: $chr_name ($chrcol)" |
140 echo "start col: $start_name ($startcol)" | 140 echo "start col: $start_name ($startcol)" |
141 echo "end col: $end_name ($endcol)" | 141 echo "end col: $end_name ($endcol)" |
142 echo "ref col: $ref_name ($refcol)" | 142 echo "ref col: $ref_name ($refcol)" |
143 echo "obs col: $obs_name ($obscol)" | 143 echo "obs col: $obs_name ($obscol)" |
144 fi | 144 fi |
145 | 145 |
146 #perform join | 146 #perform join |
147 cgatools join --beta \ | 147 cgatools join --beta \ |
148 --input $ofile $rfile \ | 148 --input $ofile $rfile \ |
149 --output temporiginal \ | 149 --output temporiginal \ |
150 --match ${chr_name}:${col_chr_name} \ | 150 --match ${chr_name}:${col_chr_name} \ |
151 --match ${start_name}:${col_start_name} \ | 151 --match ${start_name}:${col_start_name} \ |
152 --match ${end_name}:${col_end_name} \ | 152 --match ${end_name}:${col_end_name} \ |
153 --match ${ref_name}:${col_ref_name} \ | 153 --match ${ref_name}:${col_ref_name} \ |
154 --match ${obs_name}:${col_obs_name} \ | 154 --match ${obs_name}:${col_obs_name} \ |
155 --select A.*,$addcols \ | 155 --select A.*,$addcols \ |
156 --always-dump \ | 156 --always-dump \ |
157 --output-mode compact | 157 --output-mode compact |
158 | 158 |
159 #replace originalfile | 159 #replace originalfile |
160 sed -i 's/^>//g' temporiginal #join sometimes adds a '>' symbol to header | 160 sed -i 's/^>//g' temporiginal #join sometimes adds a '>' symbol to header |
161 mv temporiginal originalfile | 161 mv temporiginal originalfile |
162 | 162 |
163 if [ $test == "Y" ] | 163 if [ $test == "Y" ] |
164 then | 164 then |
165 echo "joining complete" | 165 echo "joining complete" |
166 head originalfile | 166 head originalfile |
167 echo "" | 167 echo "" |
168 fi | 168 fi |
169 | |
170 } | 169 } |
171 | 170 |
172 | 171 |
173 | 172 |
174 | 173 |
175 ################################# | 174 ################################# |
176 # | 175 # |
177 # PARSE PARAMETERS | 176 # PARSE PARAMETERS |
178 # | 177 # |
179 ################################# | 178 ################################# |
180 | 179 |
181 | 180 |
182 set -- `getopt -n$0 -u -a --longoptions="inputfile: buildver: humandb: varfile: VCF: chrcol: startcol: endcol: refcol: obscol: vartypecol: convertcoords: geneanno: hgvs: verdbsnp: tfbs: mce: cytoband: segdup: dgv: gwas: ver1000g: cg46: cg69: impactscores: newimpactscores: otherinfo: esp: exac03: spidex: gonl: gerp: cosmic61: cosmic63: cosmic64: cosmic65: cosmic67: cosmic68: clinvar: nci60: outall: outfilt: outinvalid: scriptsdir: dorunannovar: dofilter: filt_dbsnp: filt1000GALL: filt1000GAFR: filt1000GAMR: filt1000GASN: filt1000GEUR: filtESP6500ALL: filtESP6500EA: filtESP6500AA: filtcg46: filtcg69: dummy:" "h:" "$@"` || usage | 181 set -- `getopt -n$0 -u -a --longoptions="inputfile: buildver: humandb: varfile: VCF: chrcol: startcol: endcol: refcol: obscol: vartypecol: convertcoords: geneanno: hgvs: verdbsnp: tfbs: mce: cytoband: segdup: dgv: gwas: ver1000g: cg46: cg69: impactscores: newimpactscores: otherinfo: esp: exac03: exac03nonpsych: exac03nontcga: dbscsnv11: kaviar_20150923: hrcr1: mitimpact2: mitimpact24: dbnsfp30a: spidex: gonl: gerp: cosmic61: cosmic63: cosmic64: cosmic65: cosmic67: cosmic68: clinvar: nci60: outall: outfilt: outinvalid: scriptsdir: dorunannovar: dofilter: filt_dbsnp: filt1000GALL: filt1000GAFR: filt1000GAMR: filt1000GASN: filt1000GEUR: filtESP6500ALL: filtESP6500EA: filtESP6500AA: filtcg46: filtcg69: dummy:" "h:" "$@"` || usage |
183 [ $# -eq 0 ] && usage | 182 [ $# -eq 0 ] && usage |
184 | 183 |
185 | 184 |
186 | 185 |
187 while [ $# -gt 0 ] | 186 while [ $# -gt 0 ] |
188 do | 187 do |
189 case "$1" in | 188 case "$1" in |
190 --inputfile) infile=$2;shift;; # inputfile | 189 --inputfile) infile=$2;shift;; # inputfile |
191 --buildver) buildvertmp=$2;shift;; # hg18 or hg19 | 190 --buildver) buildvertmp=$2;shift;; # hg18 or hg19 |
192 --humandb) humandbtmp=$2;shift;; # location of humandb database | 191 --humandb) humandbtmp=$2;shift;; # location of humandb database |
193 --varfile) varfile=$2;shift;; # Y or N | 192 --varfile) varfile=$2;shift;; # Y or N |
194 --VCF) vcf=$2;shift;; #Y or N | 193 --VCF) vcf=$2;shift;; #Y or N |
195 --chrcol) chrcol=$2;shift;; # which column has chr | 194 --chrcol) chrcol=$2;shift;; # which column has chr |
196 --startcol) startcol=$2;shift;; # which column has start | 195 --startcol) startcol=$2;shift;; # which column has start coord |
197 --endcol) endcol=$2;shift;; # which column has end | 196 --endcol) endcol=$2;shift;; # which column has end coord |
198 --refcol) refcol=$2;shift;; # which column has ref | 197 --refcol) refcol=$2;shift;; # which column has ref allele |
199 --obscol) obscol=$2;shift;; # which column has alt | 198 --obscol) obscol=$2;shift;; # which column has alt allele |
200 --vartypecol) vartypecol=$2;shift;; # which column has vartype | 199 --vartypecol) vartypecol=$2;shift;; # which column has vartype |
201 --convertcoords) convertcoords=$2;shift;; # Y or N convert coordinate from CG to 1-based? | 200 --convertcoords) convertcoords=$2;shift;; # Y or N convert coordinate from CG to 1-based? |
202 --geneanno) geneanno=$2;shift;; # comma-separated list of strings refSeq, knowngene, ensgene | 201 --geneanno) geneanno=$2;shift;; # comma-separated list of strings refSeq, knowngene, ensgene |
203 --hgvs) hgvs=$2;shift;; | 202 --hgvs) hgvs=$2;shift;; |
204 --verdbsnp) verdbsnp=$2;shift;; #comma-separated list of dbsnp version to annotate with (e.g. "132,135NonFlagged,137,138")" | 203 --verdbsnp) verdbsnp=$2;shift;; #comma-separated list of dbsnp version to annotate with (e.g. "132,135NonFlagged,137,138")" |
205 --tfbs) tfbs=$2;shift;; # Y or N | 204 --tfbs) tfbs=$2;shift;; # Y or N |
206 --mce) mce=$2;shift;; # Y or N | 205 --mce) mce=$2;shift;; # Y or N |
207 --cytoband) cytoband=$2;shift;; # Y or N | 206 --cytoband) cytoband=$2;shift;; # Y or N |
208 --segdup) segdup=$2;shift;; # Y or N | 207 --segdup) segdup=$2;shift;; # Y or N |
209 --dgv) dgv=$2;shift;; # Y or N | 208 --dgv) dgv=$2;shift;; # Y or N |
210 --gwas) gwas=$2;shift;; # Y or N | 209 --gwas) gwas=$2;shift;; # Y or N |
211 --ver1000g) ver1000g=$2;shift;; # Y or N | 210 --ver1000g) ver1000g=$2;shift;; # Y or N |
212 --cg46) cg46=$2;shift;; | 211 --cg46) cg46=$2;shift;; # Y or N |
213 --cg69) cg69=$2;shift;; | 212 --cg69) cg69=$2;shift;; # Y or N |
214 --impactscores) impactscores=$2;shift;; # Y or N | 213 --impactscores) impactscores=$2;shift;; # Y or N |
215 --newimpactscores) newimpactscores=$2;shift;; # Y or N | 214 --newimpactscores) newimpactscores=$2;shift;; # Y or N |
216 --otherinfo) otherinfo=$2;shift;; | 215 --otherinfo) otherinfo=$2;shift;; # display additional columns? |
217 --scriptsdir) scriptsdirtmp=$2;shift;; # Y or N | 216 --scriptsdir) scriptsdirtmp=$2;shift;; # Y or N |
218 --esp) esp=$2;shift;; # Y or N | 217 --esp) esp=$2;shift;; # Y or N |
219 --exac03) exac03=$2;shift;; | 218 --exac03) exac03=$2;shift;; # Y or N |
220 --gonl) gonl=$2;shift;; | 219 --exac03nonpsych) exac03nonpsych=$2;shift;; # Y or N |
221 --spidex) spidex=$2;shift;; | 220 --exac03nontcga) exac03nontcga=$2;shift;; # Y or N |
222 --gerp) gerp=$2;shift;; # Y or N | 221 --dbscsnv11) dbscsnv11=$2;shift;; |
223 --cosmic61) cosmic61=$2;shift;; # Y or N | 222 --kaviar_20150923) kaviar_20150923=$2;shift;; |
224 --cosmic63) cosmic63=$2;shift;; # Y or N | 223 --hrcr1) hrcr1=$2;shift;; |
225 --cosmic64) cosmic64=$2;shift;; # Y or N | 224 --mitimpact2) mitimpact2=$2;shift;; |
226 --cosmic65) cosmic65=$2;shift;; # Y or N | 225 --mitimpact24) mitimpact24=$2;shift;; |
227 --cosmic67) cosmic67=$2;shift;; # Y or N | 226 --dbnsfp30a) dbnsfp30a=$2;shift;; |
228 --cosmic68) cosmic68=$2;shift;; # Y or N | 227 --gonl) gonl=$2;shift;; # Y or N |
229 --nci60) nci60=$2;shift;; # Y or N | 228 --spidex) spidex=$2;shift;; # Y or N |
230 --clinvar) clinvar=$2;shift;; # Y or N | 229 --gerp) gerp=$2;shift;; # Y or N |
231 --filt_dbsnp) filt_dbsnp=$2;shift;; | 230 --cosmic61) cosmic61=$2;shift;; # Y or N |
232 --filt1000GALL) threshold_1000g_ALL=$2;shift;; #threshold value | 231 --cosmic63) cosmic63=$2;shift;; # Y or N |
233 --filt1000GAFR) threshold_1000g_AFR=$2;shift;; #threshold value | 232 --cosmic64) cosmic64=$2;shift;; # Y or N |
234 --filt1000GAMR) threshold_1000g_AMR=$2;shift;; #threshold value | 233 --cosmic65) cosmic65=$2;shift;; # Y or N |
235 --filt1000GASN) threshold_1000g_ASN=$2;shift;; #threshold value | 234 --cosmic67) cosmic67=$2;shift;; # Y or N |
236 --filt1000GEUR) threshold_1000g_EUR=$2;shift;; #threshold value | 235 --cosmic68) cosmic68=$2;shift;; # Y or N |
237 --filtESP6500ALL) threshold_ESP6500_ALL=$2;shift;; #threshold value | 236 --nci60) nci60=$2;shift;; # Y or N |
238 --filtESP6500EA) threshold_ESP6500_EA=$2;shift;; #threshold value | 237 --clinvar) clinvar=$2;shift;; # Y or N |
239 --filtESP6500AA) threshold_ESP6500_AA=$2;shift;; #threshold value | 238 --filt_dbsnp) filt_dbsnp=$2;shift;; # Y or N |
240 --filtcg46) threshold_cg46=$2;shift;; | 239 --filt1000GALL) threshold_1000g_ALL=$2;shift;; #threshold value |
241 --filtcg69) threshold_cg69=$2;shift;; | 240 --filt1000GAFR) threshold_1000g_AFR=$2;shift;; #threshold value |
242 --outall) outfile_all=$2;shift;; # file | 241 --filt1000GAMR) threshold_1000g_AMR=$2;shift;; #threshold value |
243 --outfilt) outfile_filt=$2;shift;; # file | 242 --filt1000GASN) threshold_1000g_ASN=$2;shift;; #threshold value |
244 --outinvalid) outfile_invalid=$2;shift;; #file | 243 --filt1000GEUR) threshold_1000g_EUR=$2;shift;; #threshold value |
245 --dorunannovar) dorunannovar=$2;shift;; #Y or N | 244 --filtESP6500ALL) threshold_ESP6500_ALL=$2;shift;; #threshold value |
246 -h) shift;; | 245 --filtESP6500EA) threshold_ESP6500_EA=$2;shift;; #threshold value |
247 --) shift;break;; | 246 --filtESP6500AA) threshold_ESP6500_AA=$2;shift;; #threshold value |
248 -*) usage;; | 247 --filtcg46) threshold_cg46=$2;shift;; |
249 *) break;; | 248 --filtcg69) threshold_cg69=$2;shift;; |
249 --outall) outfile_all=$2;shift;; # file | |
250 --outfilt) outfile_filt=$2;shift;; # file | |
251 --outinvalid) outfile_invalid=$2;shift;; #file | |
252 --dorunannovar) dorunannovar=$2;shift;; #Y or N | |
253 -h) shift;; | |
254 --) shift;break;; | |
255 -*) usage;; | |
256 *) break;; | |
250 esac | 257 esac |
251 shift | 258 shift |
252 done | 259 done |
253 | 260 |
254 #sometimes galaxy screws up these variables after updates, if comma-separated list, use only what is before first comma | 261 #sometimes galaxy screws up these variables after updates, if comma-separated list, use only what is before first comma |
257 scriptsdir=${scriptsdirtmp%,*} | 264 scriptsdir=${scriptsdirtmp%,*} |
258 | 265 |
259 | 266 |
260 if [ $test == "Y" ] | 267 if [ $test == "Y" ] |
261 then | 268 then |
262 echo "dorunannovar: $dorunannovar" | 269 echo "dorunannovar: $dorunannovar" |
263 echo "infile: $infile" | 270 echo "infile: $infile" |
264 echo "buildver: $buildver" | 271 echo "buildver: $buildver" |
265 echo "annovardb: $humandb" | 272 echo "annovardb: $humandb" |
266 echo "verdbnsp: $verdbsnp" | 273 echo "verdbnsp: $verdbsnp" |
267 echo "geneanno: $geneanno" | 274 echo "geneanno: $geneanno" |
268 echo "tfbs: $tfbs" | 275 echo "tfbs: $tfbs" |
269 echo "mce: $mce" | 276 echo "mce: $mce" |
270 echo "cytoband: $cytoband" | 277 echo "cytoband: $cytoband" |
271 echo "segdup: $segdup" | 278 echo "segdup: $segdup" |
272 echo "dgv: $dgv" | 279 echo "dgv: $dgv" |
273 echo "gwas: $gwas" | 280 echo "gwas: $gwas" |
274 echo "g1000: ${g1000}" | 281 echo "g1000: ${g1000}" |
275 echo "cg46: ${cg46}" | 282 echo "cg46: ${cg46}" |
276 echo "cg69: ${cg69}" | 283 echo "cg69: ${cg69}" |
277 echo "impactscores: $impactscores" | 284 echo "impactscores: $impactscores" |
278 echo "impactscores: $newimpactscores" | 285 echo "impactscores: $newimpactscores" |
279 echo "esp: $esp" | 286 echo "esp: $esp" |
280 echo "gerp: $gerp" | 287 echo "gerp: $gerp" |
281 echo "cosmic: $cosmic" | 288 echo "cosmic: $cosmic" |
282 echo "outfile: $outfile_all" | 289 echo "outfile: $outfile_all" |
283 echo "outinvalid: $outfile_invalid" | 290 echo "outinvalid: $outfile_invalid" |
284 echo "outfiltered: $outfile_filt" | 291 echo "outfiltered: $outfile_filt" |
285 echo "varfile: $varfile" | 292 echo "varfile: $varfile" |
286 echo "vcf" $vcf | 293 echo "vcf" $vcf |
287 echo "chrcol: $chrcol" | 294 echo "chrcol: $chrcol" |
288 echo "startcol: $startcol" | 295 echo "startcol: $startcol" |
289 echo "endcol: $endcol" | 296 echo "endcol: $endcol" |
290 echo "refcol: $refcol" | 297 echo "refcol: $refcol" |
291 echo "obscol: $obscol" | 298 echo "obscol: $obscol" |
292 echo "convertcoords: $convertcoords" | 299 echo "convertcoords: $convertcoords" |
293 echo "vartypecol: $vartypecol" | 300 echo "vartypecol: $vartypecol" |
294 echo "dofilter: $dofilter" | 301 echo "dofilter: $dofilter" |
295 echo "threshold_1000g_ALL : $threshold_1000g_ALL" | 302 echo "threshold_1000g_ALL : $threshold_1000g_ALL" |
296 echo "threshold_1000g_AFR : $threshold_1000g_AFR" | 303 echo "threshold_1000g_AFR : $threshold_1000g_AFR" |
297 echo "threshold_1000g_AMR : $threshold_1000g_AMR" | 304 echo "threshold_1000g_AMR : $threshold_1000g_AMR" |
298 echo "threshold_1000g_ASN : $threshold_1000g_ASN" | 305 echo "threshold_1000g_ASN : $threshold_1000g_ASN" |
299 echo "threshold_1000g_EUR : $threshold_1000g_EUR" | 306 echo "threshold_1000g_EUR : $threshold_1000g_EUR" |
300 echo "threshold_ESP6500_ALL: $threshold_ESP6500_ALL" | 307 echo "threshold_ESP6500_ALL: $threshold_ESP6500_ALL" |
301 echo "threshold_ESP6500_EA : $threshold_ESP6500_EA" | 308 echo "threshold_ESP6500_EA : $threshold_ESP6500_EA" |
302 echo "threshold_ESP6500_AA : $threshold_ESP6500_AA" | 309 echo "threshold_ESP6500_AA : $threshold_ESP6500_AA" |
303 | |
304 fi | 310 fi |
305 | 311 |
306 | 312 |
307 | 313 |
308 ############################################ | 314 ############################################ |
309 # | 315 # |
310 # Annotate Variants | 316 # Annotate Variants |
311 # | 317 # |
312 ############################################ | 318 ############################################ |
313 | 319 |
314 #parse geneanno param | 320 # parse geneanno param |
315 refgene="N" | 321 refgene="N" |
316 knowngene="N" | 322 knowngene="N" |
317 ensgene="N" | 323 ensgene="N" |
318 | 324 |
319 if [[ $geneanno =~ "refSeq" ]] | 325 if [[ $geneanno =~ "refSeq" ]] |
320 then | 326 then |
321 refgene="Y" | 327 refgene="Y" |
322 fi | 328 fi |
323 if [[ $geneanno =~ "knowngene" ]] | 329 if [[ $geneanno =~ "knowngene" ]] |
324 then | 330 then |
325 knowngene="Y" | 331 knowngene="Y" |
326 fi | 332 fi |
327 if [[ $geneanno =~ "ensgene" ]] | 333 if [[ $geneanno =~ "ensgene" ]] |
328 then | 334 then |
329 ensgene="Y" | 335 ensgene="Y" |
330 fi | 336 fi |
331 if [ $hgvs == "N" ] | 337 if [ $hgvs == "N" ] |
332 then | 338 then |
333 hgvs="" | 339 hgvs="" |
334 fi | 340 fi |
335 | 341 |
336 #parse verdbsnp/1000g/esp strings | 342 #parse verdbsnp/1000g/esp strings |
337 dbsnpstr=${verdbsnp//,/ } | 343 dbsnpstr=${verdbsnp//,/ } |
338 filt_dbsnpstr=${filt_dbsnp//,/ } | 344 filt_dbsnpstr=${filt_dbsnp//,/ } |
339 g1000str=${ver1000g//,/ } | 345 g1000str=${ver1000g//,/ } |
340 espstr=${esp//,/ } | 346 espstr=${esp//,/ } |
341 | 347 |
342 if [ $test == "Y" ] | 348 if [ $test == "Y" ] |
343 then | 349 then |
344 echo "annotate dbsnp: $dbsnpstr" | 350 echo "annotate dbsnp: $dbsnpstr" |
345 echo "annotate esp: $espstr" | 351 echo "annotate esp: $espstr" |
346 echo "filter dbsnp: $filt_dbsnpstr" | 352 echo "filter dbsnp: $filt_dbsnpstr" |
347 fi | 353 fi |
348 | 354 |
349 mutationtaster="N" | 355 mutationtaster="N" |
350 avsift="N" | 356 avsift="N" |
351 lrt="N" | 357 lrt="N" |
354 ljbsift="N" | 360 ljbsift="N" |
355 | 361 |
356 #parse old impactscores param (obsolete) | 362 #parse old impactscores param (obsolete) |
357 if [[ $impactscores =~ "mutationtaster" ]] | 363 if [[ $impactscores =~ "mutationtaster" ]] |
358 then | 364 then |
359 mutationtaster="Y" | 365 mutationtaster="Y" |
360 fi | 366 fi |
361 if [[ $impactscores =~ "sift" ]] | 367 if [[ $impactscores =~ "sift" ]] |
362 then | 368 then |
363 avsift="Y" | 369 avsift="Y" |
364 fi | 370 fi |
365 if [[ $impactscores =~ "lrt" ]] | 371 if [[ $impactscores =~ "lrt" ]] |
366 then | 372 then |
367 lrt="Y" | 373 lrt="Y" |
368 fi | 374 fi |
369 if [[ $impactscores =~ "ljbsift" ]] | 375 if [[ $impactscores =~ "ljbsift" ]] |
370 then | 376 then |
371 ljbsift="Y" | 377 ljbsift="Y" |
372 fi | 378 fi |
373 if [[ $impactscores =~ "ljb2sift" ]] | 379 if [[ $impactscores =~ "ljb2sift" ]] |
374 then | 380 then |
375 ljb2sift="Y" | 381 ljb2sift="Y" |
376 fi | 382 fi |
377 if [[ $impactscores =~ "pp2" ]] | 383 if [[ $impactscores =~ "pp2" ]] |
378 then | 384 then |
379 polyphen2="Y" | 385 polyphen2="Y" |
380 fi | 386 fi |
381 if [[ $impactscores =~ "phylop" ]] | 387 if [[ $impactscores =~ "phylop" ]] |
382 then | 388 then |
383 phylop="Y" | 389 phylop="Y" |
384 fi | 390 fi |
385 | 391 |
386 if [[ $varfile == "Y" ]] | 392 if [[ $varfile == "Y" ]] |
387 then | 393 then |
388 convertcoords="Y" | 394 convertcoords="Y" |
389 fi | 395 fi |
390 | 396 |
391 #ljb refers to Liu, Jian, Boerwinkle paper in Human Mutation with pubmed ID 21520341. Cite this paper if you use the scores | 397 #ljb refers to Liu, Jian, Boerwinkle paper in Human Mutation with pubmed ID 21520341. Cite this paper if you use the scores |
392 | 398 |
393 ljb2_sift="N" | 399 ljb2_sift="N" |
403 | 409 |
404 # parse ljb2 newimpactscores param | 410 # parse ljb2 newimpactscores param |
405 # ljb2_sift, ljb2_pp2hdiv, ljb2_pp2hvar, ljb2_lrt, ljb2_mt, ljb2_ma, ljb2_fathmm, ljb2_gerp++, ljb2_phylop, ljb2_siphy | 411 # ljb2_sift, ljb2_pp2hdiv, ljb2_pp2hvar, ljb2_lrt, ljb2_mt, ljb2_ma, ljb2_fathmm, ljb2_gerp++, ljb2_phylop, ljb2_siphy |
406 if [[ $newimpactscores =~ "ljb2_sift" ]] | 412 if [[ $newimpactscores =~ "ljb2_sift" ]] |
407 then | 413 then |
408 ljb2_sift="Y" | 414 ljb2_sift="Y" |
409 fi | 415 fi |
410 if [[ $newimpactscores =~ "ljb2_pp2hdiv" ]] | 416 if [[ $newimpactscores =~ "ljb2_pp2hdiv" ]] |
411 then | 417 then |
412 ljb2_pp2hdiv="Y" | 418 ljb2_pp2hdiv="Y" |
413 fi | 419 fi |
414 if [[ $newimpactscores =~ "ljb2_pp2hvar" ]] | 420 if [[ $newimpactscores =~ "ljb2_pp2hvar" ]] |
415 then | 421 then |
416 ljb2_pp2hvar="Y" | 422 ljb2_pp2hvar="Y" |
417 fi | 423 fi |
418 if [[ $newimpactscores =~ "ljb2_lrt" ]] | 424 if [[ $newimpactscores =~ "ljb2_lrt" ]] |
419 then | 425 then |
420 ljb2_lrt="Y" | 426 ljb2_lrt="Y" |
421 fi | 427 fi |
422 if [[ $newimpactscores =~ "ljb2_mt" ]] | 428 if [[ $newimpactscores =~ "ljb2_mt" ]] |
423 then | 429 then |
424 ljb2_mt="Y" | 430 ljb2_mt="Y" |
425 fi | 431 fi |
426 if [[ $newimpactscores =~ "ljb2_ma" ]] | 432 if [[ $newimpactscores =~ "ljb2_ma" ]] |
427 then | 433 then |
428 ljb2_ma="Y" | 434 ljb2_ma="Y" |
429 fi | 435 fi |
430 if [[ $newimpactscores =~ "ljb2_fathmm" ]] | 436 if [[ $newimpactscores =~ "ljb2_fathmm" ]] |
431 then | 437 then |
432 ljb2_fathmm="Y" | 438 ljb2_fathmm="Y" |
433 fi | 439 fi |
434 if [[ $newimpactscores =~ "ljb2_gerp" ]] | 440 if [[ $newimpactscores =~ "ljb2_gerp" ]] |
435 then | 441 then |
436 ljb2_gerp="Y" | 442 ljb2_gerp="Y" |
437 fi | 443 fi |
438 if [[ $newimpactscores =~ "ljb2_phylop" ]] | 444 if [[ $newimpactscores =~ "ljb2_phylop" ]] |
439 then | 445 then |
440 ljb2_phylop="Y" | 446 ljb2_phylop="Y" |
441 fi | 447 fi |
442 if [[ $newimpactscores =~ "ljb2_siphy" ]] | 448 if [[ $newimpactscores =~ "ljb2_siphy" ]] |
443 then | 449 then |
444 ljb2_siphy="Y" | 450 ljb2_siphy="Y" |
445 fi | 451 fi |
446 | 452 |
447 if [ $otherinfo == "N" ] | 453 if [ $otherinfo == "N" ] |
448 then | 454 then |
449 otherinfo="" | 455 otherinfo="" |
450 fi | 456 fi |
451 | 457 |
452 | 458 |
453 #column header names we will be adding | 459 #column header names we will be adding |
454 # ESP 6500 | 460 # ESP 6500 |
474 #run annovar or filter only? | 480 #run annovar or filter only? |
475 if [ $dorunannovar == "Y" ] | 481 if [ $dorunannovar == "Y" ] |
476 then | 482 then |
477 | 483 |
478 | 484 |
479 #################################### | 485 #################################### |
480 # | 486 # |
481 # PREPARE INPUT FILE | 487 # PREPARE INPUT FILE |
482 # | 488 # |
483 #################################### | 489 #################################### |
484 | 490 |
485 echo "converting input file" | 491 echo "converting input file" |
486 vcfheader="" | 492 vcfheader="" |
487 if [ $vcf == "Y" ] #if CG varfile, convert | 493 if [ $vcf == "Y" ] #if CG varfile, convert |
488 then | 494 then |
489 # convert vcf to annovarinput | 495 # convert vcf to annovarinput |
490 $scriptsdir/convert2annovar.pl --format vcf4old --allallele --includeinfo --outfile annovarinput $infile 2>&1 | 496 $scriptsdir/convert2annovar.pl --format vcf4old --allallele --includeinfo --outfile annovarinput $infile 2>&1 |
491 | 497 |
492 #construct header line from vcf file | 498 #construct header line from vcf file |
493 cat $infile | grep "#CHROM" > additionalcols | 499 cat $infile | grep "#CHROM" > additionalcols |
494 sed -i 's/#//g' additionalcols | 500 sed -i 's/#//g' additionalcols |
495 vcfheader="\t`cat additionalcols`" | 501 vcfheader="\t`cat additionalcols`" |
496 echo "vcfheader:$vcfheader" | 502 echo "vcfheader:$vcfheader" |
497 echo -e "chromosome\tbegin\tend\treference\tobserved\t`cat additionalcols`" > originalfile | 503 echo -e "chromosome\tbegin\tend\treference\tobserved\t`cat additionalcols`" > originalfile |
498 cat annovarinput >> originalfile | 504 cat annovarinput >> originalfile |
499 | 505 |
500 chrcol=1 | 506 chrcol=1 |
501 startcol=2 | 507 startcol=2 |
502 endcol=3 | 508 endcol=3 |
503 refcol=4 | 509 refcol=4 |
504 obscol=5 | 510 obscol=5 |
505 | 511 |
506 | 512 |
507 elif [ $varfile == "Y" ] #if CG varfile, convert | 513 elif [ $varfile == "Y" ] #if CG varfile, convert |
508 then | 514 then |
509 # convert varfile | 515 # convert varfile |
510 $scriptsdir/convert2annovar.pl --format cg --outfile annovarinput $infile 2>&1 | 516 $scriptsdir/convert2annovar.pl --format cg --outfile annovarinput $infile 2>&1 |
511 echo -e "chromosome\tbegin\tend\treference\talleleSeq\tvarType\thaplotype" > originalfile | 517 echo -e "chromosome\tbegin\tend\treference\talleleSeq\tvarType\thaplotype" > originalfile |
512 cat annovarinput | cut -f1-6,8 >> originalfile | 518 cat annovarinput | cut -f1-6,8 >> originalfile |
513 cat annovarinput | cut -f1-5 >> annovarinput2 | 519 cat annovarinput | cut -f1-5 >> annovarinput2 |
514 mv annovarinput2 annovarinput | 520 mv annovarinput2 annovarinput |
515 | 521 |
516 chrcol=1 | 522 chrcol=1 |
517 startcol=2 | 523 startcol=2 |
518 endcol=3 | 524 endcol=3 |
519 refcol=4 | 525 refcol=4 |
520 obscol=5 | 526 obscol=5 |
521 | 527 |
522 elif [ $convertcoords == "Y" ] # if CG-coordinates, convert | 528 elif [ $convertcoords == "Y" ] # if CG-coordinates, convert |
523 then | 529 then |
524 #echo "rearranging columns and converting coordinates" | 530 #echo "rearranging columns and converting coordinates" |
525 awk 'BEGIN{ | 531 awk 'BEGIN{ |
526 FS="\t"; | 532 FS="\t"; |
527 OFS="\t"; | 533 OFS="\t"; |
528 }{ | 534 }{ |
529 if(FNR>1) { | 535 if(FNR>1) { |
530 gsub(/chr/,"",$"'"${chrcol}"'") | 536 gsub(/chr/,"",$"'"${chrcol}"'") |
531 if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" += 1 }; | 537 if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" += 1 }; |
532 if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "-" }; | 538 if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "-" }; |
533 if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" +=1; $"'"${obscol}"'" = "-" }; | 539 if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" +=1; $"'"${obscol}"'" = "-" }; |
534 if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" += 1 }; | 540 if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" += 1 }; |
535 | 541 |
536 printf("%s\t%s\t%s\t%s\t%s\n" ,$"'"${chrcol}"'",$"'"${startcol}"'",$"'"${endcol}"'",$"'"${refcol}"'",$"'"${obscol}"'"); | 542 printf("%s\t%s\t%s\t%s\t%s\n" ,$"'"${chrcol}"'",$"'"${startcol}"'",$"'"${endcol}"'",$"'"${refcol}"'",$"'"${obscol}"'"); |
537 } | 543 } |
538 } | 544 } |
539 END{ | 545 END{ |
540 }' $infile > annovarinput | 546 }' $infile > annovarinput |
541 | 547 |
542 #remove any "chr" prefixes | 548 #remove any "chr" prefixes |
543 #sed -i '2,$s/chr//g' annovarinput | 549 #sed -i '2,$s/chr//g' annovarinput |
544 | 550 |
545 awk 'BEGIN{ | 551 awk 'BEGIN{ |
546 FS="\t"; | 552 FS="\t"; |
547 OFS="\t"; | 553 OFS="\t"; |
548 }{ | 554 }{ |
549 | 555 |
550 if(FNR>=1) { | 556 if(FNR>=1) { |
551 gsub(/chr/,"",$"'"${chrcol}"'") | 557 gsub(/chr/,"",$"'"${chrcol}"'") |
552 if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" += 1 }; | 558 if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" += 1 }; |
553 if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "-" }; | 559 if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "-" }; |
554 if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" +=1; $"'"${obscol}"'" = "-" }; | 560 if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" +=1; $"'"${obscol}"'" = "-" }; |
555 if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" += 1 }; | 561 if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" += 1 }; |
556 | 562 |
557 print $0 | 563 print $0 |
558 } | 564 } |
559 } | 565 } |
560 END{ | 566 END{ |
561 }' $infile > originalfile | 567 }' $infile > originalfile |
562 | 568 |
563 #remove any "chr" prefixes | 569 #remove any "chr" prefixes |
564 #sed -i '2,$s/chr//g' originalfile | 570 #sed -i '2,$s/chr//g' originalfile |
565 sed -i 's/omosome/chromosome/g' originalfile | 571 sed -i 's/omosome/chromosome/g' originalfile |
566 | 572 |
567 | 573 |
568 else #only rearrange columns if already 1-based coordinates | 574 else #only rearrange columns if already 1-based coordinates |
569 echo "rearranging columns " | 575 echo "rearranging columns " |
570 awk 'BEGIN{ | 576 awk 'BEGIN{ |
571 FS="\t"; | 577 FS="\t"; |
572 OFS="\t"; | 578 OFS="\t"; |
573 }{ | 579 }{ |
574 if(FNR>1) { | 580 if(FNR>1) { |
575 printf("%s\t%s\t%s\t%s\t%s\n",$"'"${chrcol}"'",$"'"${startcol}"'",$"'"${endcol}"'",$"'"${refcol}"'",$"'"${obscol}"'"); | 581 printf("%s\t%s\t%s\t%s\t%s\n",$"'"${chrcol}"'",$"'"${startcol}"'",$"'"${endcol}"'",$"'"${refcol}"'",$"'"${obscol}"'"); |
576 } | 582 } |
577 } | 583 } |
578 END{ | 584 END{ |
579 }' $infile > annovarinput | 585 }' $infile > annovarinput |
580 | 586 |
581 #remove any "chr" prefixes | 587 #remove any "chr" prefixes |
582 sed -i '2,$s/chr//g' annovarinput | 588 sed -i '2,$s/chr//g' annovarinput |
583 sed '2,$s/chr//g' $infile > originalfile | 589 sed '2,$s/chr//g' $infile > originalfile |
584 sed -i 's/omosome/chromosome/g' originalfile | 590 sed -i 's/omosome/chromosome/g' originalfile |
585 fi | 591 fi |
586 | 592 |
587 echo "...finished conversion" | 593 echo "...finished conversion" |
588 | 594 |
589 | 595 |
590 | 596 |
591 | 597 #################################### |
592 #################################### | 598 # |
593 # | 599 # RUN ANNOVAR COMMANDS |
594 # RUN ANNOVAR COMMANDS | 600 # |
595 # | 601 #################################### |
596 #################################### | 602 |
597 | 603 |
598 | 604 |
599 | 605 ###### gene-based annotation ####### |
600 ###### gene-based annotation ####### | 606 |
601 | 607 # RefSeq Gene |
602 # RefSeq Gene | 608 if [ $refgene == "Y" ] |
603 if [ $refgene == "Y" ] | 609 then |
604 then | 610 echo -e "\nrefSeq gene" |
605 echo -e "\nrefSeq gene" | 611 $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype gene ${hgvs} annovarinput $humandb 2>&1 |
606 $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype gene ${hgvs} annovarinput $humandb 2>&1 | 612 |
607 | 613 annovarout=annovarinput.variant_function |
608 annovarout=annovarinput.variant_function | 614 sed -i '1i\RefSeq_Func\tRefSeq_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
609 sed -i '1i\RefSeq_Func\tRefSeq_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 615 joinresults originalfile $annovarout 3 4 5 6 7 B.RefSeq_Func,B.RefSeq_Gene |
610 joinresults originalfile $annovarout 3 4 5 6 7 B.RefSeq_Func,B.RefSeq_Gene | 616 |
611 | 617 annovarout=annovarinput.exonic_variant_function |
612 annovarout=annovarinput.exonic_variant_function | 618 sed -i '1i\linenum\tRefSeq_ExonicFunc\tRefSeq_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
613 sed -i '1i\linenum\tRefSeq_ExonicFunc\tRefSeq_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 619 joinresults originalfile $annovarout 4 5 6 7 8 B.RefSeq_ExonicFunc,B.RefSeq_AAChange |
614 joinresults originalfile $annovarout 4 5 6 7 8 B.RefSeq_ExonicFunc,B.RefSeq_AAChange | 620 fi |
615 fi | 621 |
616 | 622 |
617 | 623 # UCSC KnownGene |
618 # UCSC KnownGene | 624 if [ $knowngene == "Y" ] |
619 if [ $knowngene == "Y" ] | 625 then |
620 then | 626 echo -e "\nUCSC known gene" |
621 echo -e "\nUCSC known gene" | 627 $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype knowngene annovarinput $humandb 2>&1 |
622 $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype knowngene annovarinput $humandb 2>&1 | 628 |
623 | 629 annovarout=annovarinput.variant_function |
624 annovarout=annovarinput.variant_function | 630 sed -i '1i\UCSCKnownGene_Func\tUCSCKnownGene_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
625 sed -i '1i\UCSCKnownGene_Func\tUCSCKnownGene_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 631 joinresults originalfile $annovarout 3 4 5 6 7 B.UCSCKnownGene_Func,B.UCSCKnownGene_Gene |
626 joinresults originalfile $annovarout 3 4 5 6 7 B.UCSCKnownGene_Func,B.UCSCKnownGene_Gene | 632 |
627 | 633 annovarout=annovarinput.exonic_variant_function |
628 annovarout=annovarinput.exonic_variant_function | 634 sed -i '1i\linenum\tUCSCKnownGene_ExonicFunc\tUCSCKnownGene_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
629 sed -i '1i\linenum\tUCSCKnownGene_ExonicFunc\tUCSCKnownGene_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 635 joinresults originalfile $annovarout 4 5 6 7 8 B.UCSCKnownGene_ExonicFunc,B.UCSCKnownGene_AAChange |
630 joinresults originalfile $annovarout 4 5 6 7 8 B.UCSCKnownGene_ExonicFunc,B.UCSCKnownGene_AAChange | 636 fi |
631 fi | 637 |
632 | 638 |
633 | 639 # Emsembl Gene |
634 # Emsembl Gene | 640 if [ $ensgene == "Y" ] |
635 if [ $ensgene == "Y" ] | 641 then |
636 then | 642 echo -e "\nEnsembl gene" |
637 echo -e "\nEnsembl gene" | 643 $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype ensgene annovarinput $humandb 2>&1 |
638 $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype ensgene annovarinput $humandb 2>&1 | 644 |
639 | 645 annovarout=annovarinput.variant_function |
640 annovarout=annovarinput.variant_function | 646 sed -i '1i\EnsemblGene_Func\tEnsemblGene_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
641 sed -i '1i\EnsemblGene_Func\tEnsemblGene_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 647 joinresults originalfile $annovarout 3 4 5 6 7 B.EnsemblGene_Func,B.EnsemblGene_Gene |
642 joinresults originalfile $annovarout 3 4 5 6 7 B.EnsemblGene_Func,B.EnsemblGene_Gene | 648 |
643 | 649 annovarout=annovarinput.exonic_variant_function |
644 annovarout=annovarinput.exonic_variant_function | 650 sed -i '1i\linenum\tEnsemblGene_ExonicFunc\tEnsemblGene_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
645 sed -i '1i\linenum\tEnsemblGene_ExonicFunc\tEnsemblGene_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 651 joinresults originalfile $annovarout 4 5 6 7 8 B.EnsemblGene_ExonicFunc,B.EnsemblGene_AAChange |
646 joinresults originalfile $annovarout 4 5 6 7 8 B.EnsemblGene_ExonicFunc,B.EnsemblGene_AAChange | 652 fi |
647 fi | 653 |
648 | 654 |
649 | 655 |
650 | 656 ###### region-based annotation ####### |
651 ###### region-based annotation ####### | 657 |
652 | 658 |
653 | 659 # Transcription Factor Binding Sites Annotation |
654 # Transcription Factor Binding Sites Annotation | 660 if [ $mce == "Y" ] |
655 if [ $mce == "Y" ] | 661 then |
656 then | 662 echo -e "\nMost Conserved Elements" |
657 echo -e "\nMost Conserved Elements" | 663 |
658 | 664 if [ $buildver == "hg18" ] |
659 if [ $buildver == "hg18" ] | 665 then |
660 then | 666 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype mce44way annovarinput $humandb 2>&1 |
661 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype mce44way annovarinput $humandb 2>&1 | 667 annovarout=annovarinput.${buildver}_phastConsElements44way |
662 annovarout=annovarinput.${buildver}_phastConsElements44way | 668 sed -i '1i\db\tphastConsElements44way\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
663 sed -i '1i\db\tphastConsElements44way\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 669 joinresults originalfile $annovarout 3 4 5 6 7 B.phastConsElements44way |
664 joinresults originalfile $annovarout 3 4 5 6 7 B.phastConsElements44way | 670 |
665 | 671 else #hg19 |
666 else #hg19 | 672 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype mce46way annovarinput $humandb 2>&1 |
667 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype mce46way annovarinput $humandb 2>&1 | 673 annovarout=annovarinput.${buildver}_phastConsElements46way |
668 annovarout=annovarinput.${buildver}_phastConsElements46way | 674 sed -i '1i\db\tphastConsElements46way\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
669 sed -i '1i\db\tphastConsElements46way\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 675 joinresults originalfile $annovarout 3 4 5 6 7 B.phastConsElements46way |
670 joinresults originalfile $annovarout 3 4 5 6 7 B.phastConsElements46way | 676 fi |
671 fi | 677 |
672 | 678 fi |
673 fi | 679 |
674 | 680 |
675 | 681 |
676 | 682 # Transcription Factor Binding Sites Annotation |
677 # Transcription Factor Binding Sites Annotation | 683 if [ $tfbs == "Y" ] |
678 if [ $tfbs == "Y" ] | 684 then |
679 then | 685 echo -e "\nTranscription Factor Binding Site Annotation" |
680 echo -e "\nTranscription Factor Binding Site Annotation" | 686 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype tfbs annovarinput $humandb 2>&1 |
681 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype tfbs annovarinput $humandb 2>&1 | 687 |
682 | 688 # arguments: originalfile, resultfile,chrcol,startcol,endcol,refcol,obscol,selectcolumns |
683 # arguments: originalfile, resultfile,chrcol,startcol,endcol,refcol,obscol,selectcolumns | 689 annovarout=annovarinput.${buildver}_tfbsConsSites |
684 annovarout=annovarinput.${buildver}_tfbsConsSites | 690 sed -i '1i\db\tTFBS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
685 sed -i '1i\db\tTFBS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 691 joinresults originalfile $annovarout 3 4 5 6 7 B.TFBS |
686 joinresults originalfile $annovarout 3 4 5 6 7 B.TFBS | 692 fi |
687 fi | 693 |
688 | 694 |
689 | 695 |
690 | 696 # Identify cytogenetic band for genetic variants |
691 # Identify cytogenetic band for genetic variants | 697 if [ $cytoband == "Y" ] |
692 if [ $cytoband == "Y" ] | 698 then |
693 then | 699 echo -e "\nCytogenic band Annotation" |
694 echo -e "\nCytogenic band Annotation" | 700 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype band annovarinput $humandb 2>&1 |
695 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype band annovarinput $humandb 2>&1 | 701 |
696 | 702 annovarout=annovarinput.${buildver}_cytoBand |
697 annovarout=annovarinput.${buildver}_cytoBand | 703 sed -i '1i\db\tBand\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
698 sed -i '1i\db\tBand\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 704 joinresults originalfile $annovarout 3 4 5 6 7 B.Band |
699 joinresults originalfile $annovarout 3 4 5 6 7 B.Band | 705 fi |
700 fi | 706 |
701 | 707 |
702 | 708 # Identify variants located in segmental duplications |
703 # Identify variants located in segmental duplications | 709 if [ $segdup == "Y" ] |
704 if [ $segdup == "Y" ] | 710 then |
705 then | 711 echo -e "\nSegmental Duplications Annotation" |
706 echo -e "\nSegmental Duplications Annotation" | 712 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype segdup annovarinput $humandb 2>&1 |
707 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype segdup annovarinput $humandb 2>&1 | 713 |
708 | 714 annovarout=annovarinput.${buildver}_genomicSuperDups |
709 annovarout=annovarinput.${buildver}_genomicSuperDups | 715 sed -i '1i\db\tSegDup\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
710 sed -i '1i\db\tSegDup\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 716 joinresults originalfile $annovarout 3 4 5 6 7 B.SegDup |
711 joinresults originalfile $annovarout 3 4 5 6 7 B.SegDup | 717 fi |
712 fi | 718 |
713 | 719 |
714 | 720 |
715 | 721 # Identify previously reported structural variants in DGV |
716 # Identify previously reported structural variants in DGV | 722 if [ $dgv == "Y" ] |
717 if [ $dgv == "Y" ] | 723 then |
718 then | 724 echo -e "\nDGV Annotation" |
719 echo -e "\nDGV Annotation" | 725 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype dgvMerged annovarinput $humandb 2>&1 |
720 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype dgvMerged annovarinput $humandb 2>&1 | 726 |
721 | 727 annovarout=annovarinput.${buildver}_dgvMerged |
722 annovarout=annovarinput.${buildver}_dgvMerged | 728 sed -i '1i\db\tDGV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
723 sed -i '1i\db\tDGV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 729 joinresults originalfile $annovarout 3 4 5 6 7 B.DGV |
724 joinresults originalfile $annovarout 3 4 5 6 7 B.DGV | 730 fi |
725 fi | 731 |
726 | 732 |
727 | 733 # Identify variants reported in previously published GWAS studies |
728 # Identify variants reported in previously published GWAS studies | 734 if [ $gwas == "Y" ] |
729 if [ $gwas == "Y" ] | 735 then |
730 then | 736 echo -e "\nGWAS Annotation" |
731 echo -e "\nGWAS Annotation" | 737 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype gwascatalog annovarinput $humandb 2>&1 |
732 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype gwascatalog annovarinput $humandb 2>&1 | 738 |
733 | 739 annovarout=annovarinput.${buildver}_gwasCatalog |
734 annovarout=annovarinput.${buildver}_gwasCatalog | 740 sed -i '1i\db\tGWAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
735 sed -i '1i\db\tGWAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 741 joinresults originalfile $annovarout 3 4 5 6 7 B.GWAS |
736 joinresults originalfile $annovarout 3 4 5 6 7 B.GWAS | 742 fi |
737 fi | 743 |
738 | 744 |
739 | 745 |
740 | 746 |
741 | 747 ###### filter-based annotation ####### |
742 ###### filter-based annotation ####### | 748 |
743 | 749 #dbSNP |
744 #dbSNP | 750 for version in $dbsnpstr |
745 for version in $dbsnpstr | 751 do |
746 do | 752 if [ $version == "None" ] |
747 if [ $version == "None" ] | 753 then |
748 then | 754 break |
749 break | 755 fi |
750 fi | 756 echo -e "\ndbSNP region Annotation, version: $version" |
751 echo -e "\ndbSNP region Annotation, version: $version" | 757 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ${version} annovarinput $humandb 2>&1 |
752 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ${version} annovarinput $humandb 2>&1 | 758 |
753 | 759 columnname=${version} |
754 columnname=${version} | 760 if [[ $columnname == snp* ]] |
755 if [[ $columnname == snp* ]] | 761 then |
756 then | 762 columnname="db${version}" |
757 columnname="db${version}" | 763 fi |
758 fi | 764 |
759 | 765 annovarout=annovarinput.${buildver}_${version}_dropped |
760 annovarout=annovarinput.${buildver}_${version}_dropped | 766 sed -i '1i\db\t'${columnname}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
761 sed -i '1i\db\t'${columnname}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 767 joinresults originalfile $annovarout 3 4 5 6 7 B.${columnname} |
762 joinresults originalfile $annovarout 3 4 5 6 7 B.${columnname} | 768 |
763 | 769 done |
764 | 770 |
765 done | 771 |
766 | 772 |
767 | 773 #1000 Genomes |
768 | 774 |
769 #1000 Genomes | 775 if [ $ver1000g != "None" ] |
770 | 776 then |
771 if [ $ver1000g != "None" ] | 777 |
772 then | 778 for version in $g1000str |
773 | 779 do |
774 for version in $g1000str | 780 #column headers |
775 do | 781 g1000_colheader_ALL="${version}_ALL" |
776 #column headers | 782 g1000_colheader_AFR="${version}_AFR" |
777 g1000_colheader_ALL="${version}_ALL" | 783 g1000_colheader_AMR="${version}_AMR" |
778 g1000_colheader_AFR="${version}_AFR" | 784 g1000_colheader_ASN="${version}_ASN" |
779 g1000_colheader_AMR="${version}_AMR" | 785 g1000_colheader_EUR="${version}_EUR" |
780 g1000_colheader_ASN="${version}_ASN" | 786 g1000_colheader_EAS="${version}_EAS" |
781 g1000_colheader_EUR="${version}_EUR" | 787 g1000_colheader_SAS="${version}_SAS" |
782 g1000_colheader_EAS="${version}_EAS" | 788 g1000_colheader_CEU="${version}_CEU" |
783 g1000_colheader_SAS="${version}_SAS" | 789 g1000_colheader_YRI="${version}_YRI" |
784 g1000_colheader_CEU="${version}_CEU" | 790 g1000_colheader_JPTCHB="${version}_JPTCHB" |
785 g1000_colheader_YRI="${version}_YRI" | 791 |
786 g1000_colheader_JPTCHB="${version}_JPTCHB" | 792 doALL="N" |
787 | 793 doAMR="N" |
788 doALL="N" | 794 doAFR="N" |
789 doAMR="N" | 795 doASN="N" |
790 doAFR="N" | 796 doEAS="N" |
791 doASN="N" | 797 doSAS="N" |
792 doEAS="N" | 798 doEUR="N" |
793 doSAS="N" | 799 doCEU="N" |
794 doEUR="N" | 800 doYRI="N" |
795 doCEU="N" | 801 doJPTCHB="N" |
796 doYRI="N" | 802 |
797 doJPTCHB="N" | 803 |
798 | 804 if [ $version == "1000g2012apr" ] |
799 | 805 then |
800 if [ $version == "1000g2012apr" ] | 806 fileID="2012_04" |
801 then | 807 doALL="Y" |
802 fileID="2012_04" | 808 if [ $buildver == "hg19" ] |
803 doALL="Y" | 809 then |
804 if [ $buildver == "hg19" ] | 810 doAMR="Y" |
805 then | 811 doAFR="Y" |
806 doAMR="Y" | 812 doASN="Y" |
807 doAFR="Y" | 813 doEUR="Y" |
808 doASN="Y" | 814 fi |
809 doEUR="Y" | 815 elif [ $version == "1000g2014oct" ] |
810 fi | 816 then |
811 elif [ $version == "1000g2014oct" ] | 817 fileID="2014_10" |
812 then | 818 doALL="Y" |
813 fileID="2014_10" | 819 doAMR="Y" |
814 doALL="Y" | 820 doAFR="Y" |
815 doAMR="Y" | 821 doEUR="Y" |
816 doAFR="Y" | 822 doEAS="Y" |
817 doEUR="Y" | 823 if [ $buildver == "hg19" ] |
818 doEAS="Y" | 824 then |
819 if [ $buildver == "hg19" ] | 825 doSAS="Y" |
820 then | 826 fi |
821 doSAS="Y" | 827 |
822 fi | 828 elif [[ $version == "1000g2015aug" ]] |
823 | 829 then |
824 elif [[ $version == "1000g2015aug" ]] | 830 fileID="2015_08" |
825 then | 831 doALL="Y" |
826 fileID="2015_08" | 832 doAMR="Y" |
827 doALL="Y" | 833 doAFR="Y" |
828 doAMR="Y" | 834 doEUR="Y" |
829 doAFR="Y" | 835 doEAS="Y" |
830 doEUR="Y" | 836 doSAS="Y" |
831 doEAS="Y" | 837 |
832 doSAS="Y" | 838 elif [[ $version == "1000g2012feb" ]] |
833 | 839 then |
834 elif [[ $version == "1000g2012feb" ]] | 840 fileID="2012_02" |
835 then | 841 doALL="Y" |
836 fileID="2012_02" | 842 elif [[ $version == "1000g2010nov" ]] |
837 doALL="Y" | 843 then |
838 elif [[ $version == "1000g2010nov" ]] | 844 fileID="2010_11" |
839 then | 845 doALL="Y" |
840 fileID="2010_11" | 846 elif [[ $version == "1000g2010jul" ]] |
841 doALL="Y" | 847 then |
842 elif [[ $version == "1000g2010jul" ]] | 848 fileID="2010_07" |
843 then | 849 doALL="N" |
844 fileID="2010_07" | 850 doCEU="Y" |
845 doALL="N" | 851 doYRI="Y" |
846 doCEU="Y" | 852 doJPTCHB="Y" |
847 doYRI="Y" | 853 else |
848 doJPTCHB="Y" | 854 echo "unrecognized 1000g version, skipping" |
849 else | 855 fi |
850 echo "unrecognized 1000g version, skipping" | 856 |
851 fi | 857 #ALL |
852 | 858 if [ $doALL == "Y" ] |
853 #ALL | 859 then |
854 if [ $doALL == "Y" ] | 860 echo -e "\n1000Genomes ALL" |
855 then | 861 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_all" annovarinput $humandb 2>&1 |
856 echo -e "\n1000Genomes ALL" | 862 |
857 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_all" annovarinput $humandb 2>&1 | 863 annovarout=annovarinput.${buildver}_ALL.sites.${fileID}_dropped |
858 | 864 sed -i '1i\db\t'$g1000_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
859 annovarout=annovarinput.${buildver}_ALL.sites.${fileID}_dropped | 865 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_ALL |
860 sed -i '1i\db\t'$g1000_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 866 fi |
861 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_ALL | 867 |
862 fi | 868 # AFR |
863 | 869 if [ $doAFR == "Y" ] |
864 # AFR | 870 then |
865 if [ $doAFR == "Y" ] | 871 echo -e "\n1000Genomes AFR" |
866 then | 872 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_afr" annovarinput $humandb 2>&1 |
867 echo -e "\n1000Genomes AFR" | 873 |
868 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_afr" annovarinput $humandb 2>&1 | 874 annovarout=annovarinput.${buildver}_AFR.sites.${fileID}_dropped |
869 | 875 sed -i '1i\db\t'$g1000_colheader_AFR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
870 annovarout=annovarinput.${buildver}_AFR.sites.${fileID}_dropped | 876 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_AFR |
871 sed -i '1i\db\t'$g1000_colheader_AFR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 877 fi |
872 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_AFR | 878 |
873 fi | 879 |
874 | 880 # AMR |
875 | 881 if [ $doAMR == "Y" ] |
876 # AMR | 882 then |
877 if [ $doAMR == "Y" ] | 883 echo -e "\n1000Genomes AMR" |
878 then | 884 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_amr" annovarinput $humandb 2>&1 |
879 echo -e "\n1000Genomes AMR" | 885 |
880 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_amr" annovarinput $humandb 2>&1 | 886 annovarout=annovarinput.${buildver}_AMR.sites.${fileID}_dropped |
881 | 887 sed -i '1i\db\t'$g1000_colheader_AMR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
882 annovarout=annovarinput.${buildver}_AMR.sites.${fileID}_dropped | 888 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_AMR |
883 sed -i '1i\db\t'$g1000_colheader_AMR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 889 fi |
884 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_AMR | 890 |
885 fi | 891 # ASN |
886 | 892 if [ $doASN == "Y" ] |
887 # ASN | 893 then |
888 if [ $doASN == "Y" ] | 894 echo -e "\n1000Genomes ASN" |
889 then | 895 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_asn" annovarinput $humandb 2>&1 |
890 echo -e "\n1000Genomes ASN" | 896 |
891 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_asn" annovarinput $humandb 2>&1 | 897 annovarout=annovarinput.${buildver}_ASN.sites.${fileID}_dropped |
892 | 898 sed -i '1i\db\t'$g1000_colheader_ASN'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
893 annovarout=annovarinput.${buildver}_ASN.sites.${fileID}_dropped | 899 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_ASN |
894 sed -i '1i\db\t'$g1000_colheader_ASN'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 900 fi |
895 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_ASN | 901 |
896 fi | 902 # EAS |
897 | 903 if [ $doEAS == "Y" ] |
898 # EAS | 904 then |
899 if [ $doEAS == "Y" ] | 905 echo -e "\n1000Genomes EAS" |
900 then | 906 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_eas" annovarinput $humandb 2>&1 |
901 echo -e "\n1000Genomes EAS" | 907 |
902 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_eas" annovarinput $humandb 2>&1 | 908 annovarout=annovarinput.${buildver}_EAS.sites.${fileID}_dropped |
903 | 909 sed -i '1i\db\t'$g1000_colheader_EAS'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
904 annovarout=annovarinput.${buildver}_EAS.sites.${fileID}_dropped | 910 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_EAS |
905 sed -i '1i\db\t'$g1000_colheader_EAS'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 911 fi |
906 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_EAS | 912 |
907 fi | 913 # SAS |
908 | 914 if [ $doSAS == "Y" ] |
909 # SAS | 915 then |
910 if [ $doSAS == "Y" ] | 916 echo -e "\n1000Genomes SAS" |
911 then | 917 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_sas" annovarinput $humandb 2>&1 |
912 echo -e "\n1000Genomes SAS" | 918 |
913 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_sas" annovarinput $humandb 2>&1 | 919 annovarout=annovarinput.${buildver}_SAS.sites.${fileID}_dropped |
914 | 920 sed -i '1i\db\t'$g1000_colheader_SAS'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
915 annovarout=annovarinput.${buildver}_SAS.sites.${fileID}_dropped | 921 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_SAS |
916 sed -i '1i\db\t'$g1000_colheader_SAS'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 922 fi |
917 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_SAS | 923 |
918 fi | 924 # EUR |
919 | 925 if [ $doEUR == "Y" ] |
920 # EUR | 926 then |
921 if [ $doEUR == "Y" ] | 927 echo -e "\n1000Genomes EUR" |
922 then | 928 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_eur" annovarinput $humandb 2>&1 |
923 echo -e "\n1000Genomes EUR" | 929 |
924 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_eur" annovarinput $humandb 2>&1 | 930 annovarout=annovarinput.${buildver}_EUR.sites.${fileID}_dropped |
925 | 931 sed -i '1i\db\t'$g1000_colheader_EUR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
926 annovarout=annovarinput.${buildver}_EUR.sites.${fileID}_dropped | 932 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_EUR |
927 sed -i '1i\db\t'$g1000_colheader_EUR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 933 fi |
928 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_EUR | 934 |
929 fi | 935 # CEU |
930 | 936 if [ $doCEU == "Y" ] |
931 # CEU | 937 then |
932 if [ $doCEU == "Y" ] | 938 echo -e "\n1000Genomes CEU" |
933 then | 939 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_ceu" annovarinput $humandb 2>&1 |
934 echo -e "\n1000Genomes CEU" | 940 |
935 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_ceu" annovarinput $humandb 2>&1 | 941 annovarout=annovarinput.${buildver}_CEU.sites.${fileID}_dropped |
936 | 942 sed -i '1i\db\t'$g1000_colheader_CEU'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
937 annovarout=annovarinput.${buildver}_CEU.sites.${fileID}_dropped | 943 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_CEU |
938 sed -i '1i\db\t'$g1000_colheader_CEU'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 944 fi |
939 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_CEU | 945 |
940 fi | 946 # YRI |
941 | 947 if [ $doYRI == "Y" ] |
942 # YRI | 948 then |
943 if [ $doYRI == "Y" ] | 949 echo -e "\n1000Genomes YRI" |
944 then | 950 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_yri" annovarinput $humandb 2>&1 |
945 echo -e "\n1000Genomes YRI" | 951 |
946 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_yri" annovarinput $humandb 2>&1 | 952 annovarout=annovarinput.${buildver}_YRI.sites.${fileID}_dropped |
947 | 953 sed -i '1i\db\t'$g1000_colheader_YRI'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
948 annovarout=annovarinput.${buildver}_YRI.sites.${fileID}_dropped | 954 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_YRI |
949 sed -i '1i\db\t'$g1000_colheader_YRI'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 955 |
950 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_YRI | 956 |
951 | 957 fi |
952 | 958 |
953 fi | 959 #JPTCHB |
954 | 960 if [ $doJPTCHB == "Y" ] |
955 #JPTCHB | 961 then |
956 if [ $doJPTCHB == "Y" ] | 962 echo -e "\n1000Genomes JPTCHB" |
957 then | 963 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_jptchb" annovarinput $humandb 2>&1 |
958 echo -e "\n1000Genomes JPTCHB" | 964 |
959 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_jptchb" annovarinput $humandb 2>&1 | 965 annovarout=annovarinput.${buildver}_JPTCHB.sites.${fileID}_dropped |
960 | 966 sed -i '1i\db\t'$g1000_colheader_JPTCHB'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
961 annovarout=annovarinput.${buildver}_JPTCHB.sites.${fileID}_dropped | 967 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_JPTCHB |
962 sed -i '1i\db\t'$g1000_colheader_JPTCHB'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 968 fi |
963 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_JPTCHB | 969 |
964 fi | 970 done |
965 | 971 fi |
966 done | 972 |
967 fi | 973 |
968 | 974 |
969 | 975 |
970 | 976 #### IMPACT SCORE ANNOTATIONS |
971 | 977 |
972 #### IMPACT SCORE ANNOTATIONS | 978 |
973 | 979 if [ $ljb2_sift == "Y" ] |
974 | 980 then |
975 if [ $ljb2_sift == "Y" ] | 981 echo -e "\nLJB2 SIFT Annotation" |
976 then | 982 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_sift annovarinput $humandb 2>&1 |
977 echo -e "\nLJB2 SIFT Annotation" | 983 |
978 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_sift annovarinput $humandb 2>&1 | 984 annovarout=annovarinput.${buildver}_ljb2_sift_dropped |
979 | 985 sed -i '1i\db\tLJB2_SIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
980 annovarout=annovarinput.${buildver}_ljb2_sift_dropped | 986 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_SIFT |
981 sed -i '1i\db\tLJB2_SIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 987 fi |
982 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_SIFT | 988 |
983 fi | 989 if [ $ljb2_pp2hdiv == "Y" ] |
984 | 990 then |
985 if [ $ljb2_pp2hdiv == "Y" ] | 991 echo -e "\nLJB2 pp2hdiv Annotation" |
986 then | 992 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hdiv annovarinput $humandb 2>&1 |
987 echo -e "\nLJB2 pp2hdiv Annotation" | 993 |
988 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hdiv annovarinput $humandb 2>&1 | 994 annovarout=annovarinput.${buildver}_ljb2_pp2hdiv_dropped |
989 | 995 sed -i '1i\db\tLJB2_PolyPhen2_HDIV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
990 annovarout=annovarinput.${buildver}_ljb2_pp2hdiv_dropped | 996 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HDIV |
991 sed -i '1i\db\tLJB2_PolyPhen2_HDIV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 997 fi |
992 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HDIV | 998 |
993 fi | 999 if [ $ljb2_pp2hvar == "Y" ] |
994 | 1000 then |
995 if [ $ljb2_pp2hvar == "Y" ] | 1001 echo -e "\nLJB2 pp2hvar Annotation" |
996 then | 1002 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hvar annovarinput $humandb 2>&1 |
997 echo -e "\nLJB2 pp2hvar Annotation" | 1003 |
998 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hvar annovarinput $humandb 2>&1 | 1004 annovarout=annovarinput.${buildver}_ljb2_pp2hvar_dropped |
999 | 1005 |
1000 annovarout=annovarinput.${buildver}_ljb2_pp2hvar_dropped | 1006 head $annovarout |
1001 | 1007 sed -i '1i\db\tLJB2_PolyPhen2_HVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1002 head $annovarout | 1008 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HVAR |
1003 sed -i '1i\db\tLJB2_PolyPhen2_HVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1009 fi |
1004 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HVAR | 1010 |
1005 fi | 1011 if [ $ljb2_lrt == "Y" ] |
1006 | 1012 then |
1007 if [ $ljb2_lrt == "Y" ] | 1013 echo -e "\nLJB2 LRT Annotation" |
1008 then | 1014 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_lrt annovarinput $humandb 2>&1 |
1009 echo -e "\nLJB2 LRT Annotation" | 1015 |
1010 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_lrt annovarinput $humandb 2>&1 | 1016 annovarout=annovarinput.${buildver}_ljb2_lrt_dropped |
1011 | 1017 sed -i '1i\db\tLJB2_LRT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1012 annovarout=annovarinput.${buildver}_ljb2_lrt_dropped | 1018 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_LRT |
1013 sed -i '1i\db\tLJB2_LRT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1019 fi |
1014 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_LRT | 1020 |
1015 fi | 1021 if [ $ljb2_mt == "Y" ] |
1016 | 1022 then |
1017 if [ $ljb2_mt == "Y" ] | 1023 echo -e "\nLJB2 mutationtaster Annotation" |
1018 then | 1024 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_mt annovarinput $humandb 2>&1 |
1019 echo -e "\nLJB2 mutationtaster Annotation" | 1025 |
1020 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_mt annovarinput $humandb 2>&1 | 1026 annovarout=annovarinput.${buildver}_ljb2_mt_dropped |
1021 | 1027 sed -i '1i\db\tLJB2_MutationTaster\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1022 annovarout=annovarinput.${buildver}_ljb2_mt_dropped | 1028 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_MutationTaster |
1023 sed -i '1i\db\tLJB2_MutationTaster\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1029 fi |
1024 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_MutationTaster | 1030 |
1025 fi | 1031 if [ $ljb2_ma == "Y" ] |
1026 | 1032 then |
1027 if [ $ljb2_ma == "Y" ] | 1033 echo -e "\nLJB2 mutationassessor Annotation" |
1028 then | 1034 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_ma annovarinput $humandb 2>&1 |
1029 echo -e "\nLJB2 mutationassessor Annotation" | 1035 |
1030 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_ma annovarinput $humandb 2>&1 | 1036 annovarout=annovarinput.${buildver}_ljb2_ma_dropped |
1031 | 1037 sed -i '1i\db\tLJB2_MutationAssessor\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1032 annovarout=annovarinput.${buildver}_ljb2_ma_dropped | 1038 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_MutationAssessor |
1033 sed -i '1i\db\tLJB2_MutationAssessor\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1039 fi |
1034 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_MutationAssessor | 1040 |
1035 fi | 1041 if [ $ljb2_fathmm == "Y" ] |
1036 | 1042 then |
1037 if [ $ljb2_fathmm == "Y" ] | 1043 echo -e "\nLJB2 FATHMM Annotation" |
1038 then | 1044 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_fathmm annovarinput $humandb 2>&1 |
1039 echo -e "\nLJB2 FATHMM Annotation" | 1045 |
1040 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_fathmm annovarinput $humandb 2>&1 | 1046 annovarout=annovarinput.${buildver}_ljb2_fathmm_dropped |
1041 | 1047 sed -i '1i\db\tLJB2_FATHMM\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1042 annovarout=annovarinput.${buildver}_ljb2_fathmm_dropped | 1048 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_FATHMM |
1043 sed -i '1i\db\tLJB2_FATHMM\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1049 fi |
1044 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_FATHMM | 1050 |
1045 fi | 1051 if [ $ljb2_gerp == "Y" ] |
1046 | 1052 then |
1047 if [ $ljb2_gerp == "Y" ] | 1053 echo -e "\nLJB2 GERP++ Annotation" |
1048 then | 1054 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_gerp++ annovarinput $humandb 2>&1 |
1049 echo -e "\nLJB2 GERP++ Annotation" | 1055 |
1050 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_gerp++ annovarinput $humandb 2>&1 | 1056 annovarout=annovarinput.${buildver}_ljb2_gerp++_dropped |
1051 | 1057 sed -i '1i\db\tLJB2_GERP++\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1052 annovarout=annovarinput.${buildver}_ljb2_gerp++_dropped | 1058 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_GERP++ |
1053 sed -i '1i\db\tLJB2_GERP++\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1059 fi |
1054 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_GERP++ | 1060 |
1055 fi | 1061 if [ $ljb2_phylop == "Y" ] |
1056 | 1062 then |
1057 if [ $ljb2_phylop == "Y" ] | 1063 echo -e "\nLJB2 PhyloP Annotation" |
1058 then | 1064 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_phylop annovarinput $humandb 2>&1 |
1059 echo -e "\nLJB2 PhyloP Annotation" | 1065 |
1060 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_phylop annovarinput $humandb 2>&1 | 1066 annovarout=annovarinput.${buildver}_ljb2_phylop_dropped |
1061 | 1067 sed -i '1i\db\tLJB2_PhyloP\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1062 annovarout=annovarinput.${buildver}_ljb2_phylop_dropped | 1068 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PhyloP |
1063 sed -i '1i\db\tLJB2_PhyloP\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1069 fi |
1064 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PhyloP | 1070 |
1065 fi | 1071 if [ $ljb2_siphy == "Y" ] |
1066 | 1072 then |
1067 if [ $ljb2_siphy == "Y" ] | 1073 echo -e "\nLJB2 SiPhy Annotation" |
1068 then | 1074 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_siphy annovarinput $humandb 2>&1 |
1069 echo -e "\nLJB2 SiPhy Annotation" | 1075 |
1070 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_siphy annovarinput $humandb 2>&1 | 1076 annovarout=annovarinput.${buildver}_ljb2_siphy_dropped |
1071 | 1077 sed -i '1i\db\tLJB2_SiPhy\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1072 annovarout=annovarinput.${buildver}_ljb2_siphy_dropped | 1078 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_SiPhy |
1073 sed -i '1i\db\tLJB2_SiPhy\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1079 fi |
1074 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_SiPhy | 1080 |
1075 fi | 1081 |
1076 | 1082 |
1077 | 1083 ### OLD IMPACT SCORE ANNOTATIONS |
1078 | 1084 |
1079 ### OLD IMPACT SCORE ANNOTATIONS | 1085 # SIFT |
1080 | 1086 if [ $avsift == "Y" ] |
1081 # SIFT | 1087 then |
1082 if [ $avsift == "Y" ] | 1088 echo -e "\nSIFT Annotation" |
1083 then | 1089 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype avsift annovarinput $humandb 2>&1 |
1084 echo -e "\nSIFT Annotation" | 1090 |
1085 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype avsift annovarinput $humandb 2>&1 | 1091 annovarout=annovarinput.${buildver}_avsift_dropped |
1086 | 1092 sed -i '1i\db\tAVSIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1087 annovarout=annovarinput.${buildver}_avsift_dropped | 1093 joinresults originalfile $annovarout 3 4 5 6 7 B.AVSIFT |
1088 sed -i '1i\db\tAVSIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1094 fi |
1089 joinresults originalfile $annovarout 3 4 5 6 7 B.AVSIFT | 1095 |
1090 fi | 1096 #ljb refers to Liu, Jian, Boerwinkle paper in Human Mutation with pubmed ID 21520341. Cite this paper if you use the scores |
1091 | 1097 # SIFT2 |
1092 #ljb refers to Liu, Jian, Boerwinkle paper in Human Mutation with pubmed ID 21520341. Cite this paper if you use the scores | 1098 if [ $ljbsift == "Y" ] |
1093 # SIFT2 | 1099 then |
1094 if [ $ljbsift == "Y" ] | 1100 echo -e "\nLJB SIFT Annotation" |
1095 then | 1101 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_sift annovarinput $humandb 2>&1 |
1096 echo -e "\nLJB SIFT Annotation" | 1102 |
1097 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_sift annovarinput $humandb 2>&1 | 1103 annovarout=annovarinput.${buildver}_ljb_sift_dropped |
1098 | 1104 sed -i '1i\db\tLJB_SIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1099 annovarout=annovarinput.${buildver}_ljb_sift_dropped | 1105 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB_SIFT |
1100 sed -i '1i\db\tLJB_SIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1106 fi |
1101 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB_SIFT | 1107 |
1102 fi | 1108 |
1103 | 1109 # PolyPhen2 |
1104 | 1110 if [ $polyphen2 == "Y" ] |
1105 # PolyPhen2 | 1111 then |
1106 if [ $polyphen2 == "Y" ] | 1112 echo -e "\nPolyPhen Annotation" |
1107 then | 1113 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_pp2 annovarinput $humandb 2>&1 |
1108 echo -e "\nPolyPhen Annotation" | 1114 |
1109 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_pp2 annovarinput $humandb 2>&1 | 1115 annovarout=annovarinput.${buildver}_ljb_pp2_dropped |
1110 | 1116 sed -i '1i\db\tPolyPhen2\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1111 annovarout=annovarinput.${buildver}_ljb_pp2_dropped | 1117 joinresults originalfile $annovarout 3 4 5 6 7 B.PolyPhen2 |
1112 sed -i '1i\db\tPolyPhen2\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1118 fi |
1113 joinresults originalfile $annovarout 3 4 5 6 7 B.PolyPhen2 | 1119 |
1114 fi | 1120 |
1115 | 1121 # MutationTaster |
1116 | 1122 if [ $mutationtaster == "Y" ] |
1117 # MutationTaster | 1123 then |
1118 if [ $mutationtaster == "Y" ] | 1124 echo -e "\nMutationTaster Annotation" |
1119 then | 1125 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_mt annovarinput $humandb 2>&1 |
1120 echo -e "\nMutationTaster Annotation" | 1126 |
1121 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_mt annovarinput $humandb 2>&1 | 1127 annovarout=annovarinput.${buildver}_ljb_mt_dropped |
1122 | 1128 sed -i '1i\db\tMutationTaster\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1123 annovarout=annovarinput.${buildver}_ljb_mt_dropped | 1129 joinresults originalfile $annovarout 3 4 5 6 7 B.MutationTaster |
1124 sed -i '1i\db\tMutationTaster\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1130 fi |
1125 joinresults originalfile $annovarout 3 4 5 6 7 B.MutationTaster | 1131 |
1126 fi | 1132 |
1127 | 1133 # LRT |
1128 | 1134 if [ $lrt == "Y" ] |
1129 # LRT | 1135 then |
1130 if [ $lrt == "Y" ] | 1136 echo -e "\nLRT Annotation" |
1131 then | 1137 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_lrt annovarinput $humandb 2>&1 |
1132 echo -e "\nLRT Annotation" | 1138 |
1133 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_lrt annovarinput $humandb 2>&1 | 1139 annovarout=annovarinput.${buildver}_ljb_lrt_dropped |
1134 | 1140 sed -i '1i\db\tLikelihoodRatioTestScore\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1135 annovarout=annovarinput.${buildver}_ljb_lrt_dropped | 1141 joinresults originalfile $annovarout 3 4 5 6 7 B.LikelihoodRatioTestScore |
1136 sed -i '1i\db\tLikelihoodRatioTestScore\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1142 fi |
1137 joinresults originalfile $annovarout 3 4 5 6 7 B.LikelihoodRatioTestScore | 1143 |
1138 fi | 1144 # PhyloP |
1139 | 1145 if [ $phylop == "Y" ] |
1140 # PhyloP | 1146 then |
1141 if [ $phylop == "Y" ] | 1147 echo -e "\nPhyloP Annotation" |
1142 then | 1148 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_phylop annovarinput $humandb 2>&1 |
1143 echo -e "\nPhyloP Annotation" | 1149 |
1144 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_phylop annovarinput $humandb 2>&1 | 1150 annovarout=annovarinput.${buildver}_ljb_phylop_dropped |
1145 | 1151 sed -i '1i\db\tPhyloP\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1146 annovarout=annovarinput.${buildver}_ljb_phylop_dropped | 1152 joinresults originalfile $annovarout 3 4 5 6 7 B.PhyloP |
1147 sed -i '1i\db\tPhyloP\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1153 fi |
1148 joinresults originalfile $annovarout 3 4 5 6 7 B.PhyloP | 1154 |
1149 fi | 1155 |
1150 | 1156 ### ESP Exome Variant Server |
1151 | 1157 if [ $esp != "None" ] |
1152 ### ESP Exome Variant Server | 1158 then |
1153 if [ $esp != "None" ] | 1159 echo -e "\nESP Annotation" |
1154 then | 1160 for version in $espstr |
1155 echo -e "\nESP Annotation" | 1161 do |
1156 for version in $espstr | 1162 echo "version: $version" |
1157 do | 1163 # 6500si ALL |
1158 echo "version: $version" | 1164 if [ $version == "esp6500si_all" ] |
1159 # 6500si ALL | 1165 then |
1160 if [ $version == "esp6500si_all" ] | 1166 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_all annovarinput $humandb 2>&1 |
1161 then | 1167 |
1162 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_all annovarinput $humandb 2>&1 | 1168 annovarout=annovarinput.${buildver}_esp6500si_all_dropped |
1163 | 1169 sed -i '1i\db\t'$esp6500si_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1164 annovarout=annovarinput.${buildver}_esp6500si_all_dropped | 1170 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_ALL |
1165 sed -i '1i\db\t'$esp6500si_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1171 fi |
1166 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_ALL | 1172 |
1167 fi | 1173 |
1168 | 1174 # 6500si European American |
1169 | 1175 if [ $version == "esp6500si_ea" ] |
1170 # 6500si European American | 1176 then |
1171 if [ $version == "esp6500si_ea" ] | 1177 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_ea annovarinput $humandb 2>&1 |
1172 then | 1178 |
1173 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_ea annovarinput $humandb 2>&1 | 1179 annovarout=annovarinput.${buildver}_esp6500si_ea_dropped |
1174 | 1180 sed -i '1i\db\t'$esp6500si_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout |
1175 annovarout=annovarinput.${buildver}_esp6500si_ea_dropped | 1181 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_EA |
1176 sed -i '1i\db\t'$esp6500si_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout | 1182 fi |
1177 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_EA | 1183 |
1178 fi | 1184 # 6500si African Americans |
1179 | 1185 if [ $version == "esp6500si_aa" ] |
1180 # 6500si African Americans | 1186 then |
1181 if [ $version == "esp6500si_aa" ] | 1187 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_aa annovarinput $humandb 2>&1 |
1182 then | 1188 |
1183 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_aa annovarinput $humandb 2>&1 | 1189 annovarout=annovarinput.${buildver}_esp6500si_aa_dropped |
1184 | 1190 sed -i '1i\db\t'$esp6500si_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout |
1185 annovarout=annovarinput.${buildver}_esp6500si_aa_dropped | 1191 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_AA |
1186 sed -i '1i\db\t'$esp6500si_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout | 1192 fi |
1187 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_AA | 1193 |
1188 fi | 1194 |
1189 | 1195 # 6500 ALL |
1190 | 1196 if [ $version == "esp6500_all" ] |
1191 # 6500 ALL | 1197 then |
1192 if [ $version == "esp6500_all" ] | 1198 ls |
1193 then | 1199 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_all annovarinput $humandb 2>&1 |
1194 ls | 1200 |
1195 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_all annovarinput $humandb 2>&1 | 1201 annovarout=annovarinput.${buildver}_esp6500_all_dropped |
1196 | 1202 sed -i '1i\db\t'$esp6500_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout |
1197 annovarout=annovarinput.${buildver}_esp6500_all_dropped | 1203 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_ALL |
1198 sed -i '1i\db\t'$esp6500_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout | 1204 fi |
1199 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_ALL | 1205 |
1200 fi | 1206 |
1201 | 1207 # 6500 European American |
1202 | 1208 if [ $version == "esp6500_ea" ] |
1203 # 6500 European American | 1209 then |
1204 if [ $version == "esp6500_ea" ] | 1210 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_ea annovarinput $humandb 2>&1 |
1205 then | 1211 annovarout=annovarinput.${buildver}_esp6500_ea_dropped |
1206 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_ea annovarinput $humandb 2>&1 | 1212 sed -i '1i\db\t'$esp6500_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1207 annovarout=annovarinput.${buildver}_esp6500_ea_dropped | 1213 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_EA |
1208 sed -i '1i\db\t'$esp6500_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1214 fi |
1209 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_EA | 1215 |
1210 fi | 1216 # 6500 African Americans |
1211 | 1217 if [ $version == "esp6500_aa" ] |
1212 # 6500 African Americans | 1218 then |
1213 if [ $version == "esp6500_aa" ] | 1219 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_aa annovarinput $humandb 2>&1 |
1214 then | 1220 |
1215 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_aa annovarinput $humandb 2>&1 | 1221 annovarout=annovarinput.${buildver}_esp6500_aa_dropped |
1216 | 1222 sed -i '1i\db\t'$esp6500_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1217 annovarout=annovarinput.${buildver}_esp6500_aa_dropped | 1223 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_AA |
1218 sed -i '1i\db\t'$esp6500_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1224 fi |
1219 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_AA | 1225 |
1220 fi | 1226 |
1221 | 1227 # 5400 ALL |
1222 | 1228 if [ $version == "esp5400_all" ] |
1223 # 5400 ALL | 1229 then |
1224 if [ $version == "esp5400_all" ] | 1230 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_all annovarinput $humandb 2>&1 |
1225 then | 1231 |
1226 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_all annovarinput $humandb 2>&1 | 1232 annovarout=annovarinput.${buildver}_esp5400_all_dropped |
1227 | 1233 sed -i '1i\db\t'$esp5400_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1228 annovarout=annovarinput.${buildver}_esp5400_all_dropped | 1234 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_ALL |
1229 sed -i '1i\db\t'$esp5400_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1235 fi |
1230 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_ALL | 1236 |
1231 fi | 1237 |
1232 | 1238 # 5400 European American |
1233 | 1239 if [ $version == "esp5400_ea" ] |
1234 # 5400 European American | 1240 then |
1235 if [ $version == "esp5400_ea" ] | 1241 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_ea annovarinput $humandb 2>&1 |
1236 then | 1242 |
1237 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_ea annovarinput $humandb 2>&1 | 1243 annovarout=annovarinput.${buildver}_esp5400_ea_dropped |
1238 | 1244 sed -i '1i\db\t'$esp5400_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1239 annovarout=annovarinput.${buildver}_esp5400_ea_dropped | 1245 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_EA |
1240 sed -i '1i\db\t'$esp5400_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1246 fi |
1241 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_EA | 1247 |
1242 fi | 1248 # 5400 African Americans |
1243 | 1249 if [ $version == "esp5400_aa" ] |
1244 # 5400 African Americans | 1250 then |
1245 if [ $version == "esp5400_aa" ] | 1251 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_aa annovarinput $humandb 2>&1 |
1246 then | 1252 |
1247 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_aa annovarinput $humandb 2>&1 | 1253 annovarout=annovarinput.${buildver}_esp5400_aa_dropped |
1248 | 1254 sed -i '1i\db\t'$esp5400_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1249 annovarout=annovarinput.${buildver}_esp5400_aa_dropped | 1255 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_AA |
1250 sed -i '1i\db\t'$esp5400_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1256 fi |
1251 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_AA | 1257 |
1252 fi | 1258 done |
1253 | 1259 fi |
1254 done | 1260 |
1255 fi | 1261 #ExAC-03 database |
1256 | 1262 if [ $exac03 == "Y" ] |
1257 | 1263 then |
1258 #ExAC-03 database | 1264 echo -e "\nExAC03 Annotation" |
1259 if [ $exac03 == "Y" ] | 1265 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver --otherinfo -dbtype exac03 annovarinput $humandb 2>&1 |
1260 then | 1266 |
1261 echo -e "\nExAC03 Annotation" | 1267 #annovarout=annovarinput.${buildver}_exac03_dropped |
1262 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver --otherinfo -dbtype exac03 annovarinput $humandb 2>&1 | 1268 |
1263 | 1269 # split allelefrequency column into several columns, one per population |
1264 #annovarout=annovarinput.${buildver}_exac03_dropped | 1270 awk 'BEGIN{FS="\t" |
1265 | 1271 OFS="\t" |
1266 # split allelefrequency column into several columns, one per population | 1272 }{ |
1267 awk 'BEGIN{FS="\t" | 1273 gsub(",","\t",$2) |
1268 OFS="\t" | 1274 print $0 |
1269 }{ | 1275 }END{}' annovarinput.${buildver}_exac03_dropped > $annovarout |
1270 gsub(",","\t",$2) | 1276 |
1271 print $0 | 1277 sed -i '1i\db\tExAC_ALL\tExAC_AFR\tExAC_AMR\tExAC_EAS\tExAC_FIN\tExAC_NFE\tExAC_OTH\tExAC_SAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1272 }END{}' annovarinput.${buildver}_exac03_dropped > $annovarout | 1278 joinresults originalfile $annovarout 10 11 12 13 14 B.ExAC_ALL,B.ExAC_AFR,B.ExAC_AMR,B.ExAC_EAS,B.ExAC_FIN,B.ExAC_NFE,B.ExAC_OTH,B.ExAC_SAS |
1273 | 1279 fi |
1274 sed -i '1i\db\tExAC_Freq\tExAC_AFR\tExAC_AMR\tExAC_EAS\tExAC_FIN\tExAC_NFE\tExAC_OTH\tExAC_SAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1280 |
1275 joinresults originalfile $annovarout 10 11 12 13 14 B.ExAC_Freq,B.ExAC_AFR,B.ExAC_AMR,B.ExAC_EAS,B.ExAC_FIN,B.ExAC_NFE,B.ExAC_OTH,B.ExAC_SAS | 1281 #ExAC-03 database |
1276 fi | 1282 if [ $exac03nonpsych == "Y" ] |
1277 | 1283 then |
1284 echo -e "\nExAC03 non-psych Annotation" | |
1285 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver --otherinfo -dbtype exac03nonpsych annovarinput $humandb 2>&1 | |
1286 | |
1287 #annovarout=annovarinput.${buildver}_exac03_dropped | |
1288 | |
1289 # split allelefrequency column into several columns, one per population | |
1290 awk 'BEGIN{FS="\t" | |
1291 OFS="\t" | |
1292 }{ | |
1293 gsub(",","\t",$2) | |
1294 print $0 | |
1295 }END{}' annovarinput.${buildver}_exac03nonpsych_dropped > $annovarout | |
1296 | |
1297 sed -i '1i\db\tExAC_non-phsych_ALL\tExAC_non-phsych_AFR\tExAC_non-phsych_AMR\tExAC_non-phsych_EAS\tExAC_non-phsych_FIN\tExAC_non-phsych_NFE\tExAC_non-phsych_OTH\tExAC_non-phsych_SAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | |
1298 joinresults originalfile $annovarout 10 11 12 13 14 B.ExAC_non-phsych_ALL,B.ExAC_non-phsych_AFR,B.ExAC_non-phsych_AMR,B.ExAC_non-phsych_EAS,B.ExAC_non-phsych_FIN,B.ExAC_non-phsych_NFE,B.ExAC_non-phsych_OTH,B.ExAC_non-phsych_SAS | |
1299 fi | |
1300 | |
1301 #ExAC-03 database | |
1302 if [ $exac03nontcga == "Y" ] | |
1303 then | |
1304 echo -e "\nExAC03 non-tcga Annotation" | |
1305 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver --otherinfo -dbtype exac03nontcga annovarinput $humandb 2>&1 | |
1306 | |
1307 #annovarout=annovarinput.${buildver}_exac03_dropped | |
1308 | |
1309 # split allelefrequency column into several columns, one per population | |
1310 awk 'BEGIN{FS="\t" | |
1311 OFS="\t" | |
1312 }{ | |
1313 gsub(",","\t",$2) | |
1314 print $0 | |
1315 }END{}' annovarinput.${buildver}_exac03nontcga_dropped > $annovarout | |
1316 | |
1317 sed -i '1i\db\tExAC_non-TCGA_ALL\tExAC_non-TCGA_AFR\tExAC_non-TCGA_AMR\tExAC_non-TCGA_EAS\tExAC_non-TCGA_FIN\tExAC_non-TCGA_NFE\tExAC_non-TCGA_OTH\tExAC_non-TCGA_SAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | |
1318 joinresults originalfile $annovarout 10 11 12 13 14 B.ExAC_non-TCGA_ALL,B.ExAC_non-TCGA_AFR,B.ExAC_non-TCGA_AMR,B.ExAC_non-TCGA_EAS,B.ExAC_non-TCGA_FIN,B.ExAC_non-TCGA_NFE,B.ExAC_non-TCGA_OTH,B.ExAC_non-TCGA_SAS | |
1319 fi | |
1320 | |
1321 #dbscSNV 1.1 | |
1322 if [ $dbscsnv11 == "Y" ] | |
1323 then | |
1324 echo -e "\ndbscSNV11 Annotation" | |
1325 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver -dbtype dbscsnv11 annovarinput $humandb 2>&1 | |
1326 | |
1327 #annovarout="annovarinput.${buildver}_dbscsnv11_dropped" | |
1328 awk 'BEGIN{FS="\t" | |
1329 OFS="\t" | |
1330 }{ | |
1331 gsub(",","\t",$2) | |
1332 print $0 | |
1333 }END{}' annovarinput.${buildver}_dbscsnv11_dropped > $annovarout | |
1334 | |
1335 sed -i '1i\db\tdbscSNV11_ADA_SCORE\tdbscSNV11_RF_SCORE\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | |
1336 joinresults originalfile $annovarout 4 5 6 7 8 B.dbscSNV11_ADA_SCORE,B.dbscSNV11_RF_SCORE | |
1337 fi | |
1338 | |
1339 | |
1340 #kaviar_20150923 | |
1341 if [ $kaviar_20150923 == "Y" ] | |
1342 then | |
1343 echo -e "\nkaviar_20150923 Annotation" | |
1344 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver -dbtype kaviar_20150923 annovarinput $humandb 2>&1 | |
1345 | |
1346 #annovarout="annovarinput.${buildver}_kaviar_20150923_dropped" | |
1347 awk 'BEGIN{FS="\t" | |
1348 OFS="\t" | |
1349 }{ | |
1350 gsub(",","\t",$2) | |
1351 print $0 | |
1352 }END{}' annovarinput.${buildver}_kaviar_20150923_dropped > $annovarout | |
1353 | |
1354 sed -i '1i\db\tKaviar_AF\tKaviar_AC\tKaviar_AN\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | |
1355 joinresults originalfile $annovarout 5 6 7 8 9 B.Kaviar_AF,B.Kaviar_AC,B.Kaviar_AN | |
1356 fi | |
1357 | |
1358 #hrcr1 | |
1359 if [ $hrcr1 == "Y" ] | |
1360 then | |
1361 echo -e "\nhrcr1 Annotation" | |
1362 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver -dbtype hrcr1 annovarinput $humandb 2>&1 | |
1363 | |
1364 #annovarout="annovarinput.${buildver}_dbscsnv11_dropped" | |
1365 awk 'BEGIN{FS="\t" | |
1366 OFS="\t" | |
1367 }{ | |
1368 gsub(",","\t",$2) | |
1369 print $0 | |
1370 }END{}' annovarinput.${buildver}_hrcr1_dropped > $annovarout | |
1371 sed -i '1i\db\tHRC_AF\tHRC_AC\tHRC_AN\tHRC_non1000G_AF\tHRC_non1000G_AC\tHRC_non1000g_AN\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | |
1372 joinresults originalfile $annovarout 8 9 10 11 12 B.HRC_AF,B.HRC_AC,B.HRC_AN,B.HRC_non1000g_AF,B.HRC_non1000g_AC,B.HRC_non1000g_AN | |
1373 fi | |
1374 | |
1375 #dbnsfp30a | |
1376 if [ $dbnsfp30a == "Y" ] | |
1377 then | |
1378 echo -e "\ndbnsfp30a Annotation" | |
1379 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver -dbtype dbnsfp30a annovarinput $humandb 2>&1 | |
1380 | |
1381 #annovarout="annovarinput.${buildver}_dbnsfp30a_dropped" | |
1382 awk 'BEGIN{FS="\t" | |
1383 OFS="\t" | |
1384 }{ | |
1385 gsub(",","\t",$2) | |
1386 print $0 | |
1387 }END{}' annovarinput.${buildver}_dbnsfp30a_dropped > $annovarout | |
1388 | |
1389 sed -i '1i\db\tdbNSFP_SIFT_score\tdbNSFP_SIFT_pred\tdbNSFP_Polyphen2_HDIV_score\tdbNSFP_Polyphen2_HDIV_pred\tdbNSFP_Polyphen2_HVAR_score\tdbNSFP_Polyphen2_HVAR_pred\tdbNSFP_LRT_score\tdbNSFP_LRT_pred\tdbNSFP_MutationTaster_score\tdbNSFP_MutationTaster_pred\tdbNSFP_MutationAssessor_score\tdbNSFP_MutationAssessor_pred\tdbNSFP_FATHMM_score\tdbNSFP_FATHMM_pred\tdbNSFP_PROVEAN_score\tdbNSFP_PROVEAN_pred\tdbNSFP_VEST3_score\tdbNSFP_CADD_raw\tdbNSFP_CADD_phredDANN_score\tdbNSFP_fathmm-MKL_coding_score\tdbNSFP_fathmm-MKL_coding_pred\tdbNSFP_MetaSVM_score\tdbNSFP_MetaSVM_pred\tdbNSFP_MetaLR_score\tdbNSFP_MetaLR_pred\tdbNSFP_integrated_fitCons_score\tdbNSFP_integrated_confidence_value\tdbNSFP_GERP_RS\tdbNSFP_phyloP7way_vertebrate\tdbNSFP_phyloP20way_mammalian\tdbNSFP_phastCons7way_vertebrate\tdbNSFP_phastCons20way_mammalian\tdbNSFP_SiPhy_29way_logOdds\tdbNSFP_unknown\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | |
1390 joinresults originalfile $annovarout 36 37 38 39 40 B.dbNSFP_SIFT_score,B.dbNSFP_SIFT_pred,B.dbNSFP_Polyphen2_HDIV_score,B.dbNSFP_Polyphen2_HDIV_pred,B.dbNSFP_Polyphen2_HVAR_score,B.dbNSFP_Polyphen2_HVAR_pred,B.dbNSFP_LRT_score,B.dbNSFP_LRT_pred,B.dbNSFP_MutationTaster_score,B.dbNSFP_MutationTaster_pred,B.dbNSFP_MutationAssessor_score,B.dbNSFP_MutationAssessor_pred,B.dbNSFP_FATHMM_score,B.dbNSFP_FATHMM_pred,B.dbNSFP_PROVEAN_score,B.dbNSFP_PROVEAN_pred,B.dbNSFP_VEST3_score,B.dbNSFP_CADD_raw,B.dbNSFP_CADD_phredDANN_score,B.dbNSFP_fathmm-MKL_coding_score,B.dbNSFP_fathmm-MKL_coding_pred,B.dbNSFP_MetaSVM_score,B.dbNSFP_MetaSVM_pred,B.dbNSFP_MetaLR_score,B.dbNSFP_MetaLR_pred,B.dbNSFP_integrated_fitCons_score,B.dbNSFP_integrated_confidence_value,B.dbNSFP_GERP_RS,B.dbNSFP_phyloP7way_vertebrate,B.dbNSFP_phyloP20way_mammalian,B.dbNSFP_phastCons7way_vertebrate,B.dbNSFP_phastCons20way_mammalian,B.dbNSFP_SiPhy_29way_logOdds | |
1391 | |
1392 fi | |
1393 | |
1394 #mitimpact2 | |
1395 if [ $mitimpact2 == "Y" ] | |
1396 then | |
1397 echo -e "\nmitimpact2 Annotation" | |
1398 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver -dbtype mitimpact2 annovarinput $humandb 2>&1 | |
1399 | |
1400 #annovarout="annovarinput.${buildver}_mitimpact2_dropped" | |
1401 awk 'BEGIN{FS="\t" | |
1402 OFS="\t" | |
1403 }{ | |
1404 gsub(",","\t",$2) | |
1405 print $0 | |
1406 }END{}' annovarinput.${buildver}_mitimpact2_dropped > $annovarout | |
1407 | |
1408 sed -i '1i\db\tMITimpact2_Gene_symbol\tMITimpact2_OXPHOS_Complex\tMITimpact2_Ensembl_Gene_ID\tMITimpact2_Ensembl_Protein_ID\tMITimpact2_Uniprot_Name\tMITimpact2_Uniprot_ID\tMITimpact2_NCBI_Gene_ID\tMITimpact2_NCBI_Protein_ID\tMITimpact2_Gene_pos\tMITimpact2_AA_pos\tMITimpact2_AA_sub\tMITimpact2_Codon_sub\tMITimpact2_dbSNP_ID\tMITimpact2_PhyloP_46V\tMITimpact2_PhastCons_46V\tMITimpact2_PhyloP_100V\tMITimpact2_PhastCons_100V\tMITimpact2_SiteVar\tMITimpact2_PolyPhen2_prediction\tMITimpact2_PolyPhen2_score\tMITimpact2_SIFT_prediction\tMITimpact2_SIFT_score\tMITimpact2_FatHmm_prediction\tMITimpact2_FatHmm_score\tMITimpact2_PROVEAN_prediction\tMITimpact2_PROVEAN_score\tMITimpact2_MutAss_prediction\tMITimpact2_MutAss_score\tMITimpact2_EFIN_Swiss_Prot_Score\tMITimpact2_EFIN_Swiss_Prot_Prediction\tMITimpact2_EFIN_HumDiv_Score\tMITimpact2_EFIN_HumDiv_Prediction\tMITimpact2_CADD_score\tMITimpact2_CADD_Phred_score\tMITimpact2_CADD_prediction\tMITimpact2_Carol_prediction\tMITimpact2_Carol_score\tMITimpact2_Condel_score\tMITimpact2_Condel_pred\tMITimpact2_COVEC_WMV\tMITimpact2_COVEC_WMV_prediction\tMITimpact2_PolyPhen2_score_transf\tMITimpact2_PolyPhen2_pred_transf\tMITimpact2_SIFT_score_transf\tMITimpact2_SIFT_pred_transf\tMITimpact2_MutAss_score_transf\tMITimpact2_MutAss_pred_transf\tMITimpact2_Perc_coevo_Sites\tMITimpact2_Mean_MI_score\tMITimpact2_COSMIC_ID\tMITimpact2_Tumor_site\tMITimpact2_Examined_samples\tMITimpact2_Mutation_frequency\tMITimpact2_US\tMITimpact2_Status\tMITimpact2_Associated_disease\tMITimpact2_Presence_in_TD\tMITimpact2_Class_predicted\tMITimpact2_Prob_N\tMITimpact2_Prob_P\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | |
1409 joinresults originalfile $annovarout 62 63 64 65 66 B.MITimpact2_Gene_symbol,B.MITimpact2_OXPHOS_Complex,B.MITimpact2_Ensembl_Gene_ID,B.MITimpact2_Ensembl_Protein_ID,B.MITimpact2_Uniprot_Name,B.MITimpact2_Uniprot_ID,B.MITimpact2_NCBI_Gene_ID,B.MITimpact2_NCBI_Protein_ID,B.MITimpact2_Gene_pos,B.MITimpact2_AA_pos,B.MITimpact2_AA_sub,B.MITimpact2_Codon_sub,B.MITimpact2_dbSNP_ID,B.MITimpact2_PhyloP_46V,B.MITimpact2_PhastCons_46V,B.MITimpact2_PhyloP_100V,B.MITimpact2_PhastCons_100V,B.MITimpact2_SiteVar,B.MITimpact2_PolyPhen2_prediction,B.MITimpact2_PolyPhen2_score,B.MITimpact2_SIFT_prediction,B.MITimpact2_SIFT_score,B.MITimpact2_FatHmm_prediction,B.MITimpact2_FatHmm_score,B.MITimpact2_PROVEAN_prediction,B.MITimpact2_PROVEAN_score,B.MITimpact2_MutAss_prediction,B.MITimpact2_MutAss_score,B.MITimpact2_EFIN_Swiss_Prot_Score,B.MITimpact2_EFIN_Swiss_Prot_Prediction,B.MITimpact2_EFIN_HumDiv_Score,B.MITimpact2_EFIN_HumDiv_Prediction,B.MITimpact2_CADD_score,B.MITimpact2_CADD_Phred_score,B.MITimpact2_CADD_prediction,B.MITimpact2_Carol_prediction,B.MITimpact2_Carol_score,B.MITimpact2_Condel_score,B.MITimpact2_Condel_pred,B.MITimpact2_COVEC_WMV,B.MITimpact2_COVEC_WMV_prediction,B.MITimpact2_PolyPhen2_score_transf,B.MITimpact2_PolyPhen2_pred_transf,B.MITimpact2_SIFT_score_transf,B.MITimpact2_SIFT_pred_transf,B.MITimpact2_MutAss_score_transf,B.MITimpact2_MutAss_pred_transf,B.MITimpact2_Perc_coevo_Sites,B.MITimpact2_Mean_MI_score,B.MITimpact2_COSMIC_ID,B.MITimpact2_Tumor_site,B.MITimpact2_Examined_samples,B.MITimpact2_Mutation_frequency,B.MITimpact2_US,B.MITimpact2_Status,B.MITimpact2_Associated_disease,B.MITimpact2_Presence_in_TD,B.MITimpact2_Class_predicted,B.MITimpact2_Prob_N,B.MITimpact2_Prob_P | |
1410 fi | |
1411 | |
1412 #mitimpact24 | |
1413 if [ $mitimpact24 == "Y" ] | |
1414 then | |
1415 echo -e "\nmitimpact24 Annotation" | |
1416 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver -dbtype mitimpact24 annovarinput $humandb 2>&1 | |
1417 | |
1418 #annovarout="annovarinput.${buildver}_mitimpact24_dropped" | |
1419 awk 'BEGIN{FS="\t" | |
1420 OFS="\t" | |
1421 }{ | |
1422 gsub(",","\t",$24) | |
1423 print $0 | |
1424 }END{}' annovarinput.${buildver}_mitimpact24_dropped > $annovarout | |
1425 | |
1426 sed -i '1i\db\tMITimpact24_Gene_symbol\tMITimpact24_OXPHOS_Complex\tMITimpact24_Ensembl_Gene_ID\tMITimpact24_Ensembl_Protein_ID\tMITimpact24_Uniprot_Name\tMITimpact24_Uniprot_ID\tMITimpact24_NCBI_Gene_ID\tMITimpact24_NCBI_Protein_ID\tMITimpact24_Gene_pos\tMITimpact24_AA_pos\tMITimpact24_AA_sub\tMITimpact24_Codon_sub\tMITimpact24_dbSNP_ID\tMITimpact24_PhyloP_46V\tMITimpact24_PhastCons_46V\tMITimpact24_PhyloP_100V\tMITimpact24_PhastCons_100V\tMITimpact24_SiteVar\tMITimpact24_PolyPhen24_prediction\tMITimpact24_PolyPhen24_score\tMITimpact24_SIFT_prediction\tMITimpact24_SIFT_score\tMITimpact24_FatHmm_prediction\tMITimpact24_FatHmm_score\tMITimpact24_PROVEAN_prediction\tMITimpact24_PROVEAN_score\tMITimpact24_MutAss_prediction\tMITimpact24_MutAss_score\tMITimpact24_EFIN_Swiss_Prot_Score\tMITimpact24_EFIN_Swiss_Prot_Prediction\tMITimpact24_EFIN_HumDiv_Score\tMITimpact24_EFIN_HumDiv_Prediction\tMITimpact24_CADD_score\tMITimpact24_CADD_Phred_score\tMITimpact24_CADD_prediction\tMITimpact24_Carol_prediction\tMITimpact24_Carol_score\tMITimpact24_Condel_score\tMITimpact24_Condel_pred\tMITimpact24_COVEC_WMV\tMITimpact24_COVEC_WMV_prediction\tMITimpact24_PolyPhen24_score_transf\tMITimpact24_PolyPhen24_pred_transf\tMITimpact24_SIFT_score_transf\tMITimpact24_SIFT_pred_transf\tMITimpact24_MutAss_score_transf\tMITimpact24_MutAss_pred_transf\tMITimpact24_Perc_coevo_Sites\tMITimpact24_Mean_MI_score\tMITimpact24_COSMIC_ID\tMITimpact24_Tumor_site\tMITimpact24_Examined_samples\tMITimpact24_Mutation_frequency\tMITimpact24_US\tMITimpact24_Status\tMITimpact24_Associated_disease\tMITimpact24_Presence_in_TD\tMITimpact24_Class_predicted\tMITimpact24_Prob_N\tMITimpact24_Prob_P\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | |
1427 joinresults originalfile $annovarout 62 63 64 65 66 B.MITimpact24_Gene_symbol,B.MITimpact24_OXPHOS_Complex,B.MITimpact24_Ensembl_Gene_ID,B.MITimpact24_Ensembl_Protein_ID,B.MITimpact24_Uniprot_Name,B.MITimpact24_Uniprot_ID,B.MITimpact24_NCBI_Gene_ID,B.MITimpact24_NCBI_Protein_ID,B.MITimpact24_Gene_pos,B.MITimpact24_AA_pos,B.MITimpact24_AA_sub,B.MITimpact24_Codon_sub,B.MITimpact24_dbSNP_ID,B.MITimpact24_PhyloP_46V,B.MITimpact24_PhastCons_46V,B.MITimpact24_PhyloP_100V,B.MITimpact24_PhastCons_100V,B.MITimpact24_SiteVar,B.MITimpact24_PolyPhen24_prediction,B.MITimpact24_PolyPhen24_score,B.MITimpact24_SIFT_prediction,B.MITimpact24_SIFT_score,B.MITimpact24_FatHmm_prediction,B.MITimpact24_FatHmm_score,B.MITimpact24_PROVEAN_prediction,B.MITimpact24_PROVEAN_score,B.MITimpact24_MutAss_prediction,B.MITimpact24_MutAss_score,B.MITimpact24_EFIN_Swiss_Prot_Score,B.MITimpact24_EFIN_Swiss_Prot_Prediction,B.MITimpact24_EFIN_HumDiv_Score,B.MITimpact24_EFIN_HumDiv_Prediction,B.MITimpact24_CADD_score,B.MITimpact24_CADD_Phred_score,B.MITimpact24_CADD_prediction,B.MITimpact24_Carol_prediction,B.MITimpact24_Carol_score,B.MITimpact24_Condel_score,B.MITimpact24_Condel_pred,B.MITimpact24_COVEC_WMV,B.MITimpact24_COVEC_WMV_prediction,B.MITimpact24_PolyPhen24_score_transf,B.MITimpact24_PolyPhen24_pred_transf,B.MITimpact24_SIFT_score_transf,B.MITimpact24_SIFT_pred_transf,B.MITimpact24_MutAss_score_transf,B.MITimpact24_MutAss_pred_transf,B.MITimpact24_Perc_coevo_Sites,B.MITimpact24_Mean_MI_score,B.MITimpact24_COSMIC_ID,B.MITimpact24_Tumor_site,B.MITimpact24_Examined_samples,B.MITimpact24_Mutation_frequency,B.MITimpact24_US,B.MITimpact24_Status,B.MITimpact24_Associated_disease,B.MITimpact24_Presence_in_TD,B.MITimpact24_Class_predicted,B.MITimpact24_Prob_N,B.MITimpact24_Prob_P | |
1428 fi | |
1429 | |
1430 | |
1278 #GoNL database | 1431 #GoNL database |
1279 if [ $gonl == "Y" ] | 1432 if [ $gonl == "Y" ] |
1280 then | 1433 then |
1281 | 1434 |
1282 if [ $buildver == "hg19" ] | 1435 if [ $buildver == "hg19" ] |
1283 then | 1436 then |
1284 echo -e "\nGoNL Annotation" | 1437 echo -e "\nGoNL Annotation" |
1285 $scriptsdir/annotate_variation.pl --filter --buildver $buildver --otherinfo -dbtype generic -genericdbfile ${buildver}_gonl.txt annovarinput $humandb 2>&1 | 1438 $scriptsdir/annotate_variation.pl --filter --buildver $buildver --otherinfo -dbtype generic -genericdbfile ${buildver}_gonl.txt annovarinput $humandb 2>&1 |
1286 | 1439 |
1287 ls | 1440 ls |
1288 annovarout=annovarinput.${buildver}_generic_dropped | 1441 annovarout=annovarinput.${buildver}_generic_dropped |
1289 | 1442 |
1290 head $annovarout | 1443 head $annovarout |
1291 | 1444 |
1292 sed -i '1i\db\tGoNL\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1445 sed -i '1i\db\tGoNL\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1293 joinresults originalfile $annovarout 3 4 5 6 7 B.GoNL | 1446 joinresults originalfile $annovarout 3 4 5 6 7 B.GoNL |
1294 | 1447 |
1295 fi | 1448 fi |
1296 | 1449 |
1297 fi | 1450 fi |
1298 | 1451 |
1299 #SPIDEX database | 1452 #SPIDEX database |
1300 if [ $spidex == "Y" ] | 1453 if [ $spidex == "Y" ] |
1301 then | 1454 then |
1302 | 1455 |
1303 if [ $buildver == "hg19" ] | 1456 if [ $buildver == "hg19" ] |
1304 then | 1457 then |
1305 echo -e "\nSPIDEX Annotation" | 1458 echo -e "\nSPIDEX Annotation" |
1306 $scriptsdir/annotate_variation.pl --filter --buildver $buildver --otherinfo -dbtype spidex annovarinput $humandb 2>&1 | 1459 $scriptsdir/annotate_variation.pl --filter --buildver $buildver --otherinfo -dbtype spidex annovarinput $humandb 2>&1 |
1307 | 1460 |
1308 # split allelefrequency column into several columns, one per population | 1461 # split allelefrequency column into several columns, one per population |
1309 awk 'BEGIN{FS="\t" | 1462 awk 'BEGIN{FS="\t" |
1310 OFS="\t" | 1463 OFS="\t" |
1311 }{ | 1464 }{ |
1312 gsub(",","\t",$2) | 1465 gsub(",","\t",$2) |
1313 print $0 | 1466 print $0 |
1314 }END{}' annovarinput.${buildver}_spidex_dropped > $annovarout | 1467 }END{}' annovarinput.${buildver}_spidex_dropped > $annovarout |
1315 | 1468 |
1316 #annovarout=annovarinput.${buildver}_spidex_dropped | 1469 #annovarout=annovarinput.${buildver}_spidex_dropped |
1317 #head $annovarout | 1470 #head $annovarout |
1318 | 1471 |
1319 sed -i '1i\db\tSPIDEX_dpsi_max_tissue\tSPIDEX_dpsi_zscore\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1472 sed -i '1i\db\tSPIDEX_dpsi_max_tissue\tSPIDEX_dpsi_zscore\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1320 joinresults originalfile $annovarout 4 5 6 7 8 B.SPIDEX_dpsi_max_tissue,B.SPIDEX_dpsi_zscore | 1473 joinresults originalfile $annovarout 4 5 6 7 8 B.SPIDEX_dpsi_max_tissue,B.SPIDEX_dpsi_zscore |
1321 | |
1322 fi | 1474 fi |
1323 | 1475 |
1324 fi | 1476 fi |
1325 | 1477 |
1326 | 1478 #GERP++ |
1327 #GERP++ | 1479 if [ $gerp == "Y" ] |
1328 if [ $gerp == "Y" ] | 1480 then |
1329 then | 1481 echo -e "\nGERP++ Annotation" |
1330 echo -e "\nGERP++ Annotation" | 1482 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype gerp++gt2 annovarinput $humandb 2>&1 |
1331 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype gerp++gt2 annovarinput $humandb 2>&1 | 1483 |
1332 | 1484 annovarout="annovarinput.${buildver}_gerp++gt2_dropped" |
1333 annovarout="annovarinput.${buildver}_gerp++gt2_dropped" | 1485 sed -i '1i\db\tGERP++\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1334 sed -i '1i\db\tGERP++\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1486 joinresults originalfile $annovarout 3 4 5 6 7 B.GERP++ |
1335 joinresults originalfile $annovarout 3 4 5 6 7 B.GERP++ | 1487 fi |
1336 fi | 1488 |
1337 | 1489 |
1338 | 1490 #COSMIC |
1339 #COSMIC | 1491 if [[ $cosmic61 == "Y" && $buildver == "hg19" ]] |
1340 if [[ $cosmic61 == "Y" && $buildver == "hg19" ]] | 1492 then |
1341 then | 1493 echo -e "\nCOSMIC61 Annotation" |
1342 echo -e "\nCOSMIC61 Annotation" | 1494 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic61 annovarinput $humandb 2>&1 |
1343 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic61 annovarinput $humandb 2>&1 | 1495 |
1344 | 1496 annovarout="annovarinput.${buildver}_cosmic61_dropped" |
1345 annovarout="annovarinput.${buildver}_cosmic61_dropped" | 1497 sed -i '1i\db\tCOSMIC61\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1346 sed -i '1i\db\tCOSMIC61\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1498 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC61 |
1347 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC61 | 1499 |
1348 | 1500 fi |
1349 fi | 1501 |
1350 | 1502 if [[ $cosmic63 == "Y" && $buildver == "hg19" ]] |
1351 if [[ $cosmic63 == "Y" && $buildver == "hg19" ]] | 1503 then |
1352 then | 1504 echo -e "\nCOSMIC63 Annotation" |
1353 echo -e "\nCOSMIC63 Annotation" | 1505 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic63 annovarinput $humandb 2>&1 |
1354 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic63 annovarinput $humandb 2>&1 | 1506 |
1355 | 1507 annovarout="annovarinput.${buildver}_cosmic63_dropped" |
1356 annovarout="annovarinput.${buildver}_cosmic63_dropped" | 1508 sed -i '1i\db\tCOSMIC63\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1357 sed -i '1i\db\tCOSMIC63\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1509 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC63 |
1358 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC63 | 1510 |
1359 | 1511 fi |
1360 fi | 1512 |
1361 | 1513 if [[ $cosmic64 == "Y" && $buildver == "hg19" ]] |
1362 if [[ $cosmic64 == "Y" && $buildver == "hg19" ]] | 1514 then |
1363 then | 1515 echo -e "\nCOSMIC64 Annotation" |
1364 echo -e "\nCOSMIC64 Annotation" | 1516 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic64 annovarinput $humandb 2>&1 |
1365 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic64 annovarinput $humandb 2>&1 | 1517 |
1366 | 1518 annovarout="annovarinput.${buildver}_cosmic64_dropped" |
1367 annovarout="annovarinput.${buildver}_cosmic64_dropped" | 1519 sed -i '1i\db\tCOSMIC64\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1368 sed -i '1i\db\tCOSMIC64\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1520 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC64 |
1369 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC64 | 1521 |
1370 | 1522 fi |
1371 fi | 1523 |
1372 | 1524 if [[ $cosmic65 == "Y" && $buildver == "hg19" ]] |
1373 if [[ $cosmic65 == "Y" && $buildver == "hg19" ]] | 1525 then |
1374 then | 1526 echo -e "\nCOSMIC65 Annotation" |
1375 echo -e "\nCOSMIC65 Annotation" | 1527 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic65 annovarinput $humandb 2>&1 |
1376 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic65 annovarinput $humandb 2>&1 | 1528 |
1377 | 1529 annovarout="annovarinput.${buildver}_cosmic65_dropped" |
1378 annovarout="annovarinput.${buildver}_cosmic65_dropped" | 1530 sed -i '1i\db\tCOSMIC65\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1379 sed -i '1i\db\tCOSMIC65\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1531 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC65 |
1380 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC65 | 1532 |
1381 | 1533 fi |
1382 fi | 1534 |
1383 | 1535 if [[ $cosmic67 == "Y" && $buildver == "hg19" ]] |
1384 if [[ $cosmic67 == "Y" && $buildver == "hg19" ]] | 1536 then |
1385 then | 1537 echo -e "\nCOSMIC67 Annotation" |
1386 echo -e "\nCOSMIC67 Annotation" | 1538 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic67 annovarinput $humandb 2>&1 |
1387 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic67 annovarinput $humandb 2>&1 | 1539 |
1388 | 1540 annovarout="annovarinput.${buildver}_cosmic67_dropped" |
1389 annovarout="annovarinput.${buildver}_cosmic67_dropped" | 1541 sed -i '1i\db\tCOSMIC67\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1390 sed -i '1i\db\tCOSMIC67\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1542 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC67 |
1391 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC67 | 1543 |
1392 | 1544 fi |
1393 fi | 1545 |
1394 | 1546 if [[ $cosmic68 == "Y" && $buildver == "hg19" ]] |
1395 if [[ $cosmic68 == "Y" && $buildver == "hg19" ]] | 1547 then |
1396 then | 1548 echo -e "\nCOSMIC68 Annotation" |
1397 echo -e "\nCOSMIC68 Annotation" | 1549 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic68 annovarinput $humandb 2>&1 |
1398 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic68 annovarinput $humandb 2>&1 | 1550 |
1399 | 1551 annovarout="annovarinput.${buildver}_cosmic68_dropped" |
1400 annovarout="annovarinput.${buildver}_cosmic68_dropped" | 1552 sed -i '1i\db\tCOSMIC68\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1401 sed -i '1i\db\tCOSMIC68\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1553 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC68 |
1402 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC68 | 1554 |
1403 | 1555 fi |
1404 fi | 1556 |
1405 | 1557 if [[ $cosmic70 == "Y" && $buildver == "hg19" ]] |
1406 if [[ $cosmic70 == "Y" && $buildver == "hg19" ]] | 1558 then |
1407 then | 1559 echo -e "\nCOSMIC70 Annotation" |
1408 echo -e "\nCOSMIC70 Annotation" | 1560 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic70 annovarinput $humandb 2>&1 |
1409 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic70 annovarinput $humandb 2>&1 | 1561 |
1410 | 1562 annovarout="annovarinput.${buildver}_cosmic70_dropped" |
1411 annovarout="annovarinput.${buildver}_cosmic70_dropped" | 1563 sed -i '1i\db\tCOSMIC70\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1412 sed -i '1i\db\tCOSMIC70\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1564 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC70 |
1413 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC70 | 1565 |
1414 | 1566 fi |
1415 fi | 1567 |
1416 | 1568 if [[ $clinvar == "Y" && $buildver == "hg19" ]] |
1417 if [[ $clinvar == "Y" && $buildver == "hg19" ]] | 1569 then |
1418 then | 1570 echo -e "\nCLINVAR Annotation" |
1419 echo -e "\nCLINVAR Annotation" | 1571 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype clinvar_20140211 annovarinput $humandb 2>&1 |
1420 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype clinvar_20140211 annovarinput $humandb 2>&1 | 1572 |
1421 | 1573 annovarout="annovarinput.${buildver}_clinvar_20140211_dropped" |
1422 annovarout="annovarinput.${buildver}_clinvar_20140211_dropped" | 1574 sed -i '1i\db\tCLINVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1423 sed -i '1i\db\tCLINVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1575 joinresults originalfile $annovarout 3 4 5 6 7 B.CLINVAR |
1424 joinresults originalfile $annovarout 3 4 5 6 7 B.CLINVAR | 1576 |
1425 | 1577 fi |
1426 fi | 1578 |
1427 | 1579 if [[ $nci60 == "Y" && $buildver == "hg19" ]] |
1428 if [[ $nci60 == "Y" && $buildver == "hg19" ]] | 1580 then |
1429 then | 1581 echo -e "\nNCI60 Annotation" |
1430 echo -e "\nNCI60 Annotation" | 1582 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype nci60 annovarinput $humandb 2>&1 |
1431 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype nci60 annovarinput $humandb 2>&1 | 1583 |
1432 | 1584 annovarout="annovarinput.${buildver}_nci60_dropped" |
1433 annovarout="annovarinput.${buildver}_nci60_dropped" | 1585 sed -i '1i\db\tNCI60\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1434 sed -i '1i\db\tNCI60\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1586 joinresults originalfile $annovarout 3 4 5 6 7 B.NCI60 |
1435 joinresults originalfile $annovarout 3 4 5 6 7 B.NCI60 | 1587 |
1436 | 1588 fi |
1437 fi | 1589 |
1438 | 1590 #cg46 |
1439 #cg46 | 1591 if [[ $cg46 == "Y" ]] |
1440 if [[ $cg46 == "Y" ]] | 1592 then |
1441 then | 1593 echo -e "\nCG 46 genomes Annotation" |
1442 echo -e "\nCG 46 genomes Annotation" | 1594 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg46 annovarinput $humandb 2>&1 |
1443 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg46 annovarinput $humandb 2>&1 | 1595 |
1444 | 1596 annovarout="annovarinput.${buildver}_cg46_dropped" |
1445 annovarout="annovarinput.${buildver}_cg46_dropped" | 1597 sed -i '1i\db\t'${cg46_colheader}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1446 sed -i '1i\db\t'${cg46_colheader}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1598 joinresults originalfile $annovarout 3 4 5 6 7 B.${cg46_colheader} |
1447 joinresults originalfile $annovarout 3 4 5 6 7 B.${cg46_colheader} | 1599 |
1448 | 1600 fi |
1449 fi | 1601 |
1450 | 1602 |
1451 | 1603 #cg69 |
1452 #cg69 | 1604 if [[ $cg69 == "Y" ]] |
1453 if [[ $cg69 == "Y" ]] | 1605 then |
1454 then | 1606 echo -e "\nCG 69 genomes Annotation" |
1455 echo -e "\nCG 69 genomes Annotation" | 1607 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg69 annovarinput $humandb 2>&1 |
1456 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg69 annovarinput $humandb 2>&1 | 1608 |
1457 | 1609 annovarout="annovarinput.${buildver}_cg69_dropped" |
1458 annovarout="annovarinput.${buildver}_cg69_dropped" | 1610 sed -i '1i\db\t'${cg69_colheader}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout |
1459 sed -i '1i\db\t'${cg69_colheader}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout | 1611 joinresults originalfile $annovarout 3 4 5 6 7 B.${cg69_colheader} |
1460 joinresults originalfile $annovarout 3 4 5 6 7 B.${cg69_colheader} | 1612 |
1461 | 1613 fi |
1462 fi | 1614 |
1463 | 1615 |
1464 | 1616 |
1465 | 1617 if [ $convertcoords == "Y" ] |
1466 if [ $convertcoords == "Y" ] | 1618 then |
1467 then | 1619 echo "converting back coordinates" |
1468 echo "converting back coordinates" | 1620 awk 'BEGIN{ |
1469 awk 'BEGIN{ | 1621 FS="\t"; |
1470 FS="\t"; | 1622 OFS="\t"; |
1471 OFS="\t"; | 1623 }{ |
1472 }{ | 1624 if (FNR==1) |
1473 if (FNR==1) | 1625 print $0 |
1474 print $0 | 1626 if(FNR>1) { |
1475 if(FNR>1) { | 1627 $"'"${chrcol}"'" = "chr"$"'"${chrcol}"'" |
1476 $"'"${chrcol}"'" = "chr"$"'"${chrcol}"'" | 1628 if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" -= 1 }; |
1477 if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" -= 1 }; | 1629 if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "" }; |
1478 if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "" }; | 1630 if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" -=1; $"'"${obscol}"'" = "" }; |
1479 if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" -=1; $"'"${obscol}"'" = "" }; | 1631 if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" -= 1 }; |
1480 if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" -= 1 }; | 1632 print $0 |
1481 print $0 | 1633 } |
1482 | 1634 } |
1483 } | 1635 END{ |
1484 } | 1636 }' originalfile > originalfile_coords |
1485 END{ | 1637 else |
1486 }' originalfile > originalfile_coords | 1638 mv originalfile originalfile_coords |
1487 else | 1639 fi |
1488 mv originalfile originalfile_coords | 1640 |
1489 fi | 1641 #restore "chr" prefix? |
1490 | 1642 |
1491 #restore "chr" prefix? | 1643 #move to outputfile |
1492 | 1644 if [ ! -s annovarinput.invalid_input ] |
1493 #move to outputfile | 1645 then |
1494 if [ ! -s annovarinput.invalid_input ] | 1646 echo "Congrats, your input file contained no invalid lines!" > annovarinput.invalid_input |
1495 then | 1647 fi |
1496 echo "Congrats, your input file contained no invalid lines!" > annovarinput.invalid_input | 1648 |
1497 fi | 1649 cp originalfile_coords $outfile_all |
1498 | 1650 cp annovarinput.invalid_input $outfile_invalid 2>&1 |
1499 cp originalfile_coords $outfile_all | 1651 |
1500 cp annovarinput.invalid_input $outfile_invalid 2>&1 | 1652 sed -i 's/chrchr/chr/g' $outfile_all |
1501 | 1653 sed -i 's/chrchr/chr/g' $outfile_invalid |
1502 sed -i 's/chrchr/chr/g' $outfile_all | 1654 |
1503 sed -i 's/chrchr/chr/g' $outfile_invalid | |
1504 | |
1505 fi #if $dorunannovar | 1655 fi #if $dorunannovar |
1506 | 1656 |
1507 | 1657 |
1508 | 1658 |
1509 | 1659 |