diff tools/tools/annovar/annovar.sh @ 5:4600be69b96f draft

Added databases 1000g2015aug, SPIDEX, avsnp138, avsnp142, exac03
author saskia-hiltemann
date Thu, 01 Oct 2015 04:24:45 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/tools/annovar/annovar.sh	Thu Oct 01 04:24:45 2015 -0400
@@ -0,0 +1,1461 @@
+#!/bin/bash
+
+test="N"
+dofilter="N"
+
+#########################
+#	   DEFINE SOME
+#	    FUNCTIONS
+#########################
+
+function usage(){
+	echo "usage: $0 todo"
+}
+
+function runfilter(){
+	ifile=$1	
+	columnname=$2
+	threshold=$3
+
+	if [[ $threshold == "-1" ]]
+	then
+		echo "not filtering"
+		return
+	fi
+	
+	echo "filtering: $columnname, $threshold"
+	cat $ifile
+
+	#get column number corresponding to column header
+	column=`awk 'BEGIN{
+					FS="\t";
+					col=-1
+				}{
+					if(FNR==1){
+						for(i=1;i<=NF;i++){
+							if($i == "'"${columnname}"'") 
+								col=i 
+						} 
+						print col 
+					}
+				}' $ifile `
+
+	if [ $column == -1 ]
+	then
+		echo "no such column, exiting"
+		return
+	fi	
+
+	#perform filtering using the threshold
+	awk 'BEGIN{
+		FS="\t";
+		OFS="\t";
+	}{
+		if(FNR==1) 
+			print $0; 
+		if(FNR>1){
+			if( $"'"${column}"'" == "" )  # empty column, then print
+				print $0
+			else if ("'"${threshold}"'" == "text"){}  #if set to text dont check threshold
+				
+			else if ($"'"${column}"'" < "'"${threshold}"'")  #else do check it
+				print $0	
+		}
+	}' $ifile > tmpfile
+
+	mv tmpfile $ifile	
+}
+
+# arguments: originalfile,resultfile,chrcol,startcol,endcol,refcol,obscol,addcols
+function joinresults(){
+	ofile=$1
+	rfile=$2
+	colchr=$3
+	colstart=$4
+	colend=$5
+	colref=$6
+	colobs=$7
+	addcols=$8 #e.g. "B.col1,B.col2"
+	
+	test="N"
+	
+	# echo "joining result with original file"
+	if [ $test == "Y" ]
+	then 	
+		echo "ofile: $ofile"
+		head $ofile 
+		echo "rfile: $rfile"
+		head $rfile
+	fi
+	numlines=`wc $rfile | cut -d" " -f2`
+	
+	# if empty results file, just add header fields
+	if [[ ! -s $rfile ]] 
+	then			
+		dummycol=${addcols:2}
+		outputcol=${dummycol//",B."/"	"}
+		numcommas=`echo "$addcols" | grep -o "," | wc -l`		
+		
+		awk 'BEGIN{FS="\t";OFS="\t"}{
+				if(FNR==1)
+					print $0,"'"$outputcol"'"; 
+				else{
+					printf $0
+					for(i=0;i<="'"$numcommas"'"+1;i++)
+						printf "\t"
+					printf "\n"
+				}
+			}END{}' $ofile > tempofile
+			
+			mv tempofile $ofile		
+		return
+	fi
+	
+
+	#get input file column names for cgatools join
+	col_chr_name=`head -1  $rfile | cut -f${colchr}`
+	col_start_name=`head -1  $rfile | cut -f${colstart}`
+	col_end_name=`head -1  $rfile | cut -f${colend}`
+	col_ref_name=`head -1  $rfile | cut -f${colref}`
+	col_obs_name=`head -1  $rfile | cut -f${colobs}`
+
+	#get annotation file column names for cgatools join
+	chr_name=`head -1  $ofile | cut -f${chrcol}`
+	start_name=`head -1  $ofile | cut -f${startcol}`
+	end_name=`head -1  $ofile | cut -f${endcol}`
+	ref_name=`head -1  $ofile | cut -f${refcol}`
+	obs_name=`head -1  $ofile | cut -f${obscol}`
+
+	if [ $test == "Y" ]	
+	then
+		echo "input file"
+		echo "chr   col: $col_chr_name ($colchr)"	
+		echo "start col: $col_start_name ($colstart)"	
+		echo "end   col: $col_end_name ($colend)"	
+		echo "ref   col: $col_ref_name ($colref)"	
+		echo "obs   col: $col_obs_name ($colobs)"	
+		echo ""
+		echo "annotation file"
+		echo "chr   col: $chr_name ($chrcol)"	
+		echo "start col: $start_name ($startcol)"	
+		echo "end   col: $end_name ($endcol)"	
+		echo "ref   col: $ref_name ($refcol)"	
+		echo "obs   col: $obs_name ($obscol)"	
+	fi
+
+	#perform join
+	cgatools join --beta \
+		--input $ofile $rfile \
+		--output temporiginal \
+		--match ${chr_name}:${col_chr_name} \
+		--match ${start_name}:${col_start_name} \
+		--match ${end_name}:${col_end_name} \
+		--match ${ref_name}:${col_ref_name} \
+		--match ${obs_name}:${col_obs_name} \
+		--select A.*,$addcols \
+		--always-dump \
+		--output-mode compact 
+
+	#replace originalfile
+	sed -i 's/^>//g' temporiginal #join sometimes adds a '>' symbol to header
+	mv temporiginal originalfile
+		
+	if [ $test == "Y" ]
+	then
+		echo "joining complete"
+		head originalfile
+		echo ""	
+	fi
+	
+}
+
+
+
+
+#################################
+#
+#	   PARSE PARAMETERS
+#
+#################################
+
+
+set -- `getopt -n$0 -u -a --longoptions="inputfile: buildver: humandb: varfile: VCF: chrcol: startcol: endcol: refcol: obscol: vartypecol: convertcoords: geneanno: hgvs: verdbsnp: tfbs: mce: cytoband: segdup: dgv: gwas: ver1000g: cg46: cg69: impactscores: newimpactscores: otherinfo: esp: exac03: gerp: cosmic61: cosmic63: cosmic64: cosmic65: cosmic67: cosmic68: clinvar: nci60: outall: outfilt: outinvalid: scriptsdir: dorunannovar: dofilter: filt_dbsnp: filt1000GALL: filt1000GAFR: filt1000GAMR: filt1000GASN: filt1000GEUR: filtESP6500ALL: filtESP6500EA: filtESP6500AA: filtcg46: filtcg69: dummy:" "h:" "$@"` || usage
+[ $# -eq 0 ] && usage
+
+
+
+while [ $# -gt 0 ]
+do
+    case "$1" in
+       	--inputfile)      			infile=$2;shift;;  # inputfile
+		--buildver)					buildvertmp=$2;shift;; # hg18 or hg19
+		--humandb)					humandbtmp=$2;shift;; # location of humandb database
+	 	--varfile)      			varfile=$2;shift;; # Y or N  
+		--VCF)						vcf=$2;shift;; #Y or N
+		--chrcol)      				chrcol=$2;shift;;  # which column has chr 
+		--startcol)      			startcol=$2;shift;;  # which column has start
+		--endcol)      				endcol=$2;shift;;  # which column has end
+		--refcol)      				refcol=$2;shift;;  # which column has ref
+		--obscol)      				obscol=$2;shift;;  # which column has alt
+		--vartypecol)      			vartypecol=$2;shift;;  # which column has vartype
+		--convertcoords)			convertcoords=$2;shift;;  # Y or N convert coordinate from CG to 1-based?		
+		--geneanno)      			geneanno=$2;shift;; # comma-separated list of strings refSeq, knowngene, ensgene  
+		--hgvs)						hgvs=$2;shift;;
+		--verdbsnp)					verdbsnp=$2;shift;; #comma-separated list of dbsnp version to annotate with (e.g. "132,135NonFlagged,137,138")"
+		--tfbs)      				tfbs=$2;shift;; 	# Y or N 
+		--mce)      				mce=$2;shift;; 	# Y or N 
+		--cytoband)      			cytoband=$2;shift;; # Y or N  
+		--segdup)      				segdup=$2;shift;; 	# Y or N 				
+        --dgv)      				dgv=$2;shift;; 	# Y or N 
+		--gwas)      				gwas=$2;shift;; 	# Y or N 
+		--ver1000g) 				ver1000g=$2;shift;; 	# Y or N 
+		--cg46)						cg46=$2;shift;;
+		--cg69)						cg69=$2;shift;;		
+		--impactscores)      		impactscores=$2;shift;; # Y or N 
+		--newimpactscores)      	newimpactscores=$2;shift;; # Y or N 
+		--otherinfo)				otherinfo=$2;shift;; 
+		--scriptsdir)	      		scriptsdirtmp=$2;shift;; # Y or N 
+		--esp)      				esp=$2;shift;; 	# Y or N 
+		--exac03)                                 exac03=$2;shift;;
+		--gerp)      				gerp=$2;shift;; 	# Y or N 
+		--cosmic61)					cosmic61=$2;shift;;  # Y or N
+		--cosmic63)					cosmic63=$2;shift;;  # Y or N
+		--cosmic64)					cosmic64=$2;shift;;  # Y or N
+		--cosmic65)					cosmic65=$2;shift;;  # Y or N
+		--cosmic67)					cosmic67=$2;shift;;  # Y or N
+		--cosmic68)					cosmic68=$2;shift;;  # Y or N
+		--nci60)					nci60=$2;shift;;  # Y or N
+		--clinvar)					clinvar=$2;shift;;  # Y or N
+		--filt_dbsnp)				filt_dbsnp=$2;shift;;
+		--filt1000GALL)				threshold_1000g_ALL=$2;shift;; #threshold value
+		--filt1000GAFR)				threshold_1000g_AFR=$2;shift;; #threshold value
+		--filt1000GAMR)				threshold_1000g_AMR=$2;shift;; #threshold value
+		--filt1000GASN)				threshold_1000g_ASN=$2;shift;; #threshold value
+		--filt1000GEUR)				threshold_1000g_EUR=$2;shift;; #threshold value
+		--filtESP6500ALL)			threshold_ESP6500_ALL=$2;shift;; #threshold value
+		--filtESP6500EA)			threshold_ESP6500_EA=$2;shift;; #threshold value
+		--filtESP6500AA)			threshold_ESP6500_AA=$2;shift;; #threshold value
+		--filtcg46)					threshold_cg46=$2;shift;;
+		--filtcg69)					threshold_cg69=$2;shift;;
+		--outall)      				outfile_all=$2;shift;; # file 
+		--outfilt)      			outfile_filt=$2;shift;; # file
+		--outinvalid)				outfile_invalid=$2;shift;; #file
+		--dorunannovar)				dorunannovar=$2;shift;; 	#Y or N		
+       -h)        	shift;;
+	   --)        	shift;break;;
+       -*)        	usage;;
+       *)         	break;;            
+    esac
+    shift
+done
+
+#sometimes galaxy screws up these variables after updates, if comma-separated list, use only what is before first comma
+humandb=${humandbtmp%,*}
+buildver=${buildvertmp%,*}
+scriptsdir=${scriptsdirtmp%,*}
+
+
+if [ $test == "Y" ]
+then
+	echo "dorunannovar: $dorunannovar"
+	echo "infile: $infile"
+	echo "buildver: $buildver"
+	echo "annovardb: $humandb"
+	echo "verdbnsp: $verdbsnp"
+	echo "geneanno: $geneanno"
+	echo "tfbs: $tfbs"
+	echo "mce: $mce"
+	echo "cytoband: $cytoband"
+	echo "segdup: $segdup"
+	echo "dgv: $dgv"
+	echo "gwas: $gwas"	
+	echo "g1000: ${g1000}"
+	echo "cg46: ${cg46}"	
+	echo "cg69: ${cg69}"
+	echo "impactscores: $impactscores"
+	echo "impactscores: $newimpactscores"	
+	echo "esp: $esp"
+	echo "gerp: $gerp"
+	echo "cosmic: $cosmic"
+	echo "outfile: $outfile_all"
+	echo "outinvalid: $outfile_invalid"
+	echo "outfiltered: $outfile_filt"
+	echo "varfile: $varfile"
+	echo "vcf" $vcf
+	echo "chrcol: $chrcol"
+	echo "startcol: $startcol"
+	echo "endcol: $endcol"
+	echo "refcol: $refcol"
+	echo "obscol: $obscol"
+	echo "convertcoords: $convertcoords"
+	echo "vartypecol: $vartypecol"
+	echo "dofilter: $dofilter"
+	echo "threshold_1000g_ALL  : $threshold_1000g_ALL"
+	echo "threshold_1000g_AFR  : $threshold_1000g_AFR"
+	echo "threshold_1000g_AMR  : $threshold_1000g_AMR"
+	echo "threshold_1000g_ASN  : $threshold_1000g_ASN"
+	echo "threshold_1000g_EUR  : $threshold_1000g_EUR"
+	echo "threshold_ESP6500_ALL: $threshold_ESP6500_ALL"
+	echo "threshold_ESP6500_EA : $threshold_ESP6500_EA"
+	echo "threshold_ESP6500_AA : $threshold_ESP6500_AA"
+
+fi
+
+
+
+############################################
+#
+#       Annotate Variants 
+#
+############################################
+
+#parse geneanno param
+refgene="N"
+knowngene="N"
+ensgene="N"	
+
+if [[ $geneanno =~ "refSeq" ]]
+then
+	refgene="Y"
+fi
+if [[ $geneanno =~ "knowngene" ]]
+then
+	knowngene="Y"
+fi
+if [[ $geneanno =~ "ensgene" ]]
+then
+	ensgene="Y"
+fi
+if [ $hgvs == "N" ]
+then
+	hgvs=""
+fi
+
+#parse verdbsnp/1000g/esp strings
+dbsnpstr=${verdbsnp//,/ }
+filt_dbsnpstr=${filt_dbsnp//,/ }
+g1000str=${ver1000g//,/ }
+espstr=${esp//,/ }
+
+if [ $test == "Y" ]
+then
+	echo "annotate dbsnp: $dbsnpstr"
+	echo "annotate esp:   $espstr"
+	echo "filter dbsnp: $filt_dbsnpstr"
+fi
+
+mutationtaster="N"
+avsift="N"
+lrt="N"
+polyphen2="N"
+phylop="N"
+ljbsift="N"
+
+#parse old impactscores param (obsolete)
+if [[ $impactscores =~ "mutationtaster" ]]
+then
+	mutationtaster="Y"
+fi
+if [[ $impactscores =~ "sift" ]]
+then
+	avsift="Y"
+fi
+if [[ $impactscores =~ "lrt" ]]
+then
+	lrt="Y"
+fi
+if [[ $impactscores =~ "ljbsift" ]]
+then
+	ljbsift="Y"
+fi
+if [[ $impactscores =~ "ljb2sift" ]]
+then
+	ljb2sift="Y"
+fi
+if [[ $impactscores =~ "pp2" ]]
+then
+	polyphen2="Y"
+fi
+if [[ $impactscores =~ "phylop" ]]
+then
+	phylop="Y"
+fi
+
+if [[ $varfile == "Y" ]]
+then
+	convertcoords="Y"
+fi
+
+#ljb refers to Liu, Jian, Boerwinkle paper in Human Mutation with pubmed ID 21520341. Cite this paper if you use the scores
+
+ljb2_sift="N"
+ljb2_pp2hdiv="N"
+ljb2_pp2hvar="N"
+ljb2_lrt="N"
+ljb2_mt="N"
+ljb2_ma="N"
+ljb2_fathmm="N"
+ljb2_gerp="N"
+ljb2_phylop="N"
+ljb2_siphy="N"
+
+# parse ljb2 newimpactscores param
+# ljb2_sift, ljb2_pp2hdiv, ljb2_pp2hvar, ljb2_lrt, ljb2_mt, ljb2_ma, ljb2_fathmm, ljb2_gerp++, ljb2_phylop, ljb2_siphy
+if [[ $newimpactscores =~ "ljb2_sift" ]]
+then
+	ljb2_sift="Y"
+fi
+if [[ $newimpactscores =~ "ljb2_pp2hdiv" ]]
+then
+	ljb2_pp2hdiv="Y"
+fi
+if [[ $newimpactscores =~ "ljb2_pp2hvar" ]]
+then
+	ljb2_pp2hvar="Y"
+fi
+if [[ $newimpactscores =~ "ljb2_lrt" ]]
+then
+	ljb2_lrt="Y"
+fi
+if [[ $newimpactscores =~ "ljb2_mt" ]]
+then
+	ljb2_mt="Y"
+fi
+if [[ $newimpactscores =~ "ljb2_ma" ]]
+then
+	ljb2_ma="Y"
+fi
+if [[ $newimpactscores =~ "ljb2_fathmm" ]]
+then
+	ljb2_fathmm="Y"
+fi
+if [[ $newimpactscores =~ "ljb2_gerp" ]]
+then
+	ljb2_gerp="Y"
+fi
+if [[ $newimpactscores =~ "ljb2_phylop" ]]
+then
+	ljb2_phylop="Y"
+fi
+if [[ $newimpactscores =~ "ljb2_siphy" ]]
+then
+	ljb2_siphy="Y"
+fi
+
+if [ $otherinfo == "N" ]
+then
+	otherinfo=""
+fi
+
+
+#column header names we will be adding
+# ESP 6500
+esp6500si_colheader_ALL="ESP6500si_ALL"
+esp6500si_colheader_EA="ESP6500si_EA"
+esp6500si_colheader_AA="ESP6500si_AA"
+esp6500_colheader_ALL="ESP6500_ALL"
+esp6500_colheader_EA="ESP6500_EA"
+esp6500_colheader_AA="ESP6500_AA"
+esp5400si_colheader_ALL="ESP5400si_ALL"
+esp5400si_colheader_EA="ESP5400si_EA"
+esp5400si_colheader_AA="ESP5400si_AA"
+esp5400_colheader_ALL="ESP5400_ALL"
+esp5400_colheader_EA="ESP5400_EA"
+esp5400_colheader_AA="ESP5400_AA"
+
+
+# cg46 cg69
+cg46_colheader="CG_46_genomes"
+cg69_colheader="CG_69_genomes"
+
+cp $infile originalfile
+#run annovar or filter only?
+if [ $dorunannovar == "Y" ]
+then
+
+
+	####################################
+	#
+	#       PREPARE INPUT FILE
+	#
+	####################################
+	
+	echo "converting input file"
+	vcfheader=""
+	if [ $vcf == "Y" ]     #if CG varfile, convert
+	then 
+		# convert vcf to annovarinput
+		$scriptsdir/convert2annovar.pl --format vcf4old --allallele --includeinfo --outfile annovarinput $infile 2>&1
+		
+		#construct header line from vcf file		
+		cat $infile | grep "#CHROM" > additionalcols
+		sed -i 's/#//g' additionalcols 			
+		vcfheader="\t`cat additionalcols`"
+		echo "vcfheader:$vcfheader"
+		echo -e "chromosome\tbegin\tend\treference\tobserved\t`cat additionalcols`" > originalfile
+		cat annovarinput >> originalfile
+		
+		chrcol=1
+		startcol=2
+		endcol=3
+		refcol=4
+		obscol=5
+
+	
+	elif [ $varfile == "Y" ]     #if CG varfile, convert
+	then 
+		# convert varfile
+		$scriptsdir/convert2annovar.pl --format cg --outfile annovarinput $infile 2>&1
+		echo -e "chromosome\tbegin\tend\treference\talleleSeq\tvarType\thaplotype" > originalfile
+		cat annovarinput | cut -f1-6,8 >> originalfile
+		cat annovarinput | cut -f1-5 >> annovarinput2
+		mv annovarinput2 annovarinput
+
+		chrcol=1
+		startcol=2
+		endcol=3
+		refcol=4
+		obscol=5
+
+	elif [ $convertcoords == "Y" ]    # if CG-coordinates, convert 
+	then
+		#echo "rearranging columns and converting coordinates"
+		awk 'BEGIN{
+				FS="\t";
+				OFS="\t";
+			}{
+				if(FNR>1) { 
+                                        gsub(/chr/,"",$"'"${chrcol}"'")
+					if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" += 1 }; 
+					if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "-" };
+					if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" +=1; $"'"${obscol}"'" = "-" };
+					if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" += 1 }; 
+			
+					printf("%s\t%s\t%s\t%s\t%s\n" ,$"'"${chrcol}"'",$"'"${startcol}"'",$"'"${endcol}"'",$"'"${refcol}"'",$"'"${obscol}"'");				
+				}
+			}	
+			END{
+			}' $infile > annovarinput
+
+			#remove any "chr" prefixes
+			#sed -i '2,$s/chr//g' annovarinput
+
+			awk 'BEGIN{
+				FS="\t";
+				OFS="\t";				
+			}{
+                                
+				if(FNR>=1) { 
+				        gsub(/chr/,"",$"'"${chrcol}"'")
+					if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" += 1 }; 
+					if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "-" };
+					if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" +=1; $"'"${obscol}"'" = "-" };
+					if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" += 1 }; 
+			
+					print $0			
+				}
+			}	
+			END{
+			}' $infile > originalfile
+
+			#remove any "chr" prefixes
+			#sed -i '2,$s/chr//g' originalfile
+			sed -i 's/omosome/chromosome/g' originalfile
+
+
+	else #only rearrange columns if already 1-based coordinates	
+		echo "rearranging columns "
+		awk 'BEGIN{
+				FS="\t";
+				OFS="\t";
+			}{
+				if(FNR>1) { 		                                    
+                                    printf("%s\t%s\t%s\t%s\t%s\n",$"'"${chrcol}"'",$"'"${startcol}"'",$"'"${endcol}"'",$"'"${refcol}"'",$"'"${obscol}"'");				
+				}
+			}	
+			END{
+			}' $infile > annovarinput
+
+			#remove any "chr" prefixes
+			sed -i '2,$s/chr//g' annovarinput
+			sed '2,$s/chr//g' $infile > originalfile
+			sed -i 's/omosome/chromosome/g' originalfile
+	fi
+
+	echo "...finished conversion"
+	
+
+
+
+	####################################
+	#
+	#       RUN ANNOVAR COMMANDS
+	#
+	####################################
+
+	
+
+	######    gene-based annotation   #######
+
+	# RefSeq Gene
+	if [ $refgene == "Y" ]
+	then
+		echo -e "\nrefSeq gene"
+		$scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype gene ${hgvs} annovarinput $humandb 2>&1
+		
+		annovarout=annovarinput.variant_function
+		sed -i '1i\RefSeq_Func\tRefSeq_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout  3 4 5 6 7 B.RefSeq_Func,B.RefSeq_Gene
+
+		annovarout=annovarinput.exonic_variant_function 
+		sed -i '1i\linenum\tRefSeq_ExonicFunc\tRefSeq_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout  
+		joinresults originalfile $annovarout  4 5 6 7 8 B.RefSeq_ExonicFunc,B.RefSeq_AAChange
+	fi
+
+
+	# UCSC KnownGene
+	if [ $knowngene == "Y" ]
+	then
+		echo -e "\nUCSC known gene"
+		$scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype knowngene annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.variant_function
+		sed -i '1i\UCSCKnownGene_Func\tUCSCKnownGene_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.UCSCKnownGene_Func,B.UCSCKnownGene_Gene
+
+		annovarout=annovarinput.exonic_variant_function
+		sed -i '1i\linenum\tUCSCKnownGene_ExonicFunc\tUCSCKnownGene_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 4 5 6 7 8 B.UCSCKnownGene_ExonicFunc,B.UCSCKnownGene_AAChange
+	fi
+
+
+	# Emsembl Gene
+	if [ $ensgene == "Y" ]
+	then
+		echo -e "\nEnsembl gene"
+		$scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype ensgene annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.variant_function
+		sed -i '1i\EnsemblGene_Func\tEnsemblGene_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.EnsemblGene_Func,B.EnsemblGene_Gene
+
+		annovarout=annovarinput.exonic_variant_function
+		sed -i '1i\linenum\tEnsemblGene_ExonicFunc\tEnsemblGene_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 4 5 6 7 8 B.EnsemblGene_ExonicFunc,B.EnsemblGene_AAChange
+	fi
+
+
+
+	######    region-based annotation   #######
+
+
+	# Transcription Factor Binding Sites Annotation
+	if [ $mce == "Y" ]
+	then
+		echo -e "\nMost Conserved Elements"
+	
+		if [ $buildver == "hg18" ]
+		then
+			$scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype mce44way annovarinput $humandb 2>&1
+			annovarout=annovarinput.${buildver}_phastConsElements44way
+			sed -i '1i\db\tphastConsElements44way\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+			joinresults originalfile $annovarout 3 4 5 6 7 B.phastConsElements44way	
+
+		else #hg19	
+			$scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype mce46way annovarinput $humandb 2>&1
+			annovarout=annovarinput.${buildver}_phastConsElements46way	   
+			sed -i '1i\db\tphastConsElements46way\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+			joinresults originalfile $annovarout 3 4 5 6 7 B.phastConsElements46way	
+		fi
+	
+	fi
+
+
+
+	# Transcription Factor Binding Sites Annotation
+	if [ $tfbs == "Y" ]
+	then
+		echo -e "\nTranscription Factor Binding Site Annotation"
+		$scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype tfbs annovarinput $humandb 2>&1
+	
+		# arguments: originalfile, resultfile,chrcol,startcol,endcol,refcol,obscol,selectcolumns
+		annovarout=annovarinput.${buildver}_tfbsConsSites
+		sed -i '1i\db\tTFBS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.TFBS	
+	fi
+
+
+
+	# Identify cytogenetic band for genetic variants
+	if [ $cytoband == "Y" ]
+	then
+		echo -e "\nCytogenic band Annotation"
+		$scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype band annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_cytoBand
+		sed -i '1i\db\tBand\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.Band	
+	fi
+
+
+	# Identify variants located in segmental duplications
+	if [ $segdup == "Y" ]
+	then
+		echo -e "\nSegmental Duplications Annotation"
+		$scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype segdup annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_genomicSuperDups
+		sed -i '1i\db\tSegDup\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.SegDup	
+	fi
+
+
+
+	# Identify previously reported structural variants in DGV
+	if [ $dgv == "Y" ]
+	then
+		echo -e "\nDGV Annotation"
+		$scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype dgvMerged annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_dgvMerged
+		sed -i '1i\db\tDGV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.DGV	
+	fi
+
+
+	# Identify variants reported in previously published GWAS studies
+	if [ $gwas == "Y" ]
+	then
+		echo -e "\nGWAS Annotation"
+		$scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype gwascatalog annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_gwasCatalog
+		sed -i '1i\db\tGWAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.GWAS	
+	fi
+
+
+	
+	
+	######    filter-based annotation   #######
+
+	#dbSNP
+	for version in $dbsnpstr
+	do
+		if [ $version == "None" ] 
+		then
+			break
+		fi
+		echo -e "\ndbSNP region Annotation, version: $version"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ${version} annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_${version}_dropped		
+		sed -i '1i\db\tdb'${version}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.db${version}	
+	
+
+	done
+
+
+
+	#1000 Genomes
+
+	if [ $ver1000g != "None" ] 
+	then
+
+		for version in $g1000str
+		do
+			#column headers
+			g1000_colheader_ALL="${version}_ALL"
+			g1000_colheader_AFR="${version}_AFR"
+			g1000_colheader_AMR="${version}_AMR"
+			g1000_colheader_ASN="${version}_ASN"
+			g1000_colheader_EUR="${version}_EUR"
+			g1000_colheader_EAS="${version}_EAS"
+			g1000_colheader_SAS="${version}_SAS"
+			g1000_colheader_CEU="${version}_CEU"
+			g1000_colheader_YRI="${version}_YRI"
+			g1000_colheader_JPTCHB="${version}_JPTCHB"
+			
+			doALL="N"
+			doAMR="N"
+			doAFR="N"
+			doASN="N"
+			doEAS="N"
+			doSAS="N"
+			doEUR="N"
+			doCEU="N"
+			doYRI="N"
+			doJPTCHB="N"
+
+
+			if [ $version == "1000g2012apr" ]
+			then
+				fileID="2012_04"
+				doALL="Y"
+				if [ $buildver == "hg19" ]
+				then
+					doAMR="Y"
+					doAFR="Y"
+					doASN="Y"
+					doEUR="Y"
+				fi
+			elif [ $version == "1000g2014oct" ]
+			then
+				fileID="2014_10"
+				doALL="Y"
+				doAMR="Y"
+				doAFR="Y"
+				doEUR="Y"
+				doEAS="Y"
+				if [ $buildver == "hg19" ]
+				then
+					doSAS="Y"
+				fi
+			elif [[ $version == "1000g2012feb" && $buildver == "hg19" ]]
+			then
+				fileID="2012_02"				
+				doALL="Y"	
+			elif [[ $version == "1000g2010nov" && $buildver == "hg19" ]]
+			then
+				fileID="2010_11"
+				doALL="Y"	
+			elif [[ $version == "1000g2010jul" && $buildver == "hg18" ]]
+			then
+				fileID="2010_07"
+				doALL="N"
+				doCEU="Y"
+				doYRI="Y"
+				doJPTCHB="Y"
+			else
+				echo "unrecognized 1000g version, skipping"
+			fi
+
+			#ALL
+			if [ $doALL == "Y"  ]
+				then
+				echo -e "\n1000Genomes ALL"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_all" annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_ALL.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_ALL	
+			fi
+
+			# AFR
+			if [ $doAFR == "Y"  ]
+			then
+				echo -e "\n1000Genomes AFR"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_afr" annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_AFR.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_AFR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_AFR	
+			fi
+
+			
+			# AMR
+			if [ $doAMR == "Y"  ]
+			then
+				echo -e "\n1000Genomes AMR"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_amr" annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_AMR.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_AMR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_AMR	
+			fi
+
+			# ASN
+			if [ $doASN == "Y"  ]
+			then
+				echo -e "\n1000Genomes ASN"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_asn" annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_ASN.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_ASN'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_ASN	
+			fi
+			
+			# EAS
+			if [ $doEAS == "Y"  ]
+			then
+				echo -e "\n1000Genomes EAS"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_eas" annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_EAS.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_EAS'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_EAS	
+			fi
+			
+			# SAS
+			if [ $doSAS == "Y"  ]
+			then
+				echo -e "\n1000Genomes SAS"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_sas" annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_SAS.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_SAS'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_SAS	
+			fi
+			
+			# EUR
+			if [ $doEUR == "Y"  ]
+			then
+				echo -e "\n1000Genomes EUR"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_eur" annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_EUR.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_EUR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_EUR	
+			fi
+
+			# CEU
+			if [ $doCEU == "Y"  ]
+			then
+				echo -e "\n1000Genomes CEU"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_ceu" annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_CEU.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_CEU'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_CEU	
+			fi
+
+			# YRI
+			if [ $doYRI == "Y"  ]
+			then
+				echo -e "\n1000Genomes YRI"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_yri" annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_YRI.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_YRI'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_YRI	
+	
+
+			fi
+
+			#JPTCHB
+			if [ $doJPTCHB == "Y"  ]
+			then
+				echo -e "\n1000Genomes JPTCHB"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_jptchb" annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_JPTCHB.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_JPTCHB'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_JPTCHB	
+			fi
+
+		done
+	fi
+
+
+	
+	
+	#### IMPACT SCORE ANNOTATIONS
+
+
+	if [ $ljb2_sift == "Y" ]
+	then
+		echo -e "\nLJB2 SIFT Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_sift annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb2_sift_dropped
+		sed -i '1i\db\tLJB2_SIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_SIFT	
+	fi
+	
+	if [ $ljb2_pp2hdiv == "Y" ]
+	then
+		echo -e "\nLJB2 pp2hdiv Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hdiv annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb2_pp2hdiv_dropped
+		sed -i '1i\db\tLJB2_PolyPhen2_HDIV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HDIV	
+	fi
+	
+	if [ $ljb2_pp2hvar == "Y" ]
+	then
+		echo -e "\nLJB2 pp2hvar Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hvar annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb2_pp2hvar_dropped
+		
+		head $annovarout
+		sed -i '1i\db\tLJB2_PolyPhen2_HVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HVAR	
+	fi
+	
+	if [ $ljb2_lrt == "Y" ]
+	then
+		echo -e "\nLJB2 LRT Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_lrt annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb2_lrt_dropped
+		sed -i '1i\db\tLJB2_LRT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_LRT	
+	fi
+	
+	if [ $ljb2_mt == "Y" ]
+	then
+		echo -e "\nLJB2 mutationtaster Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_mt annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb2_mt_dropped
+		sed -i '1i\db\tLJB2_MutationTaster\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_MutationTaster	
+	fi
+	
+	if [ $ljb2_ma == "Y" ]
+	then
+		echo -e "\nLJB2 mutationassessor Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_ma annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb2_ma_dropped
+		sed -i '1i\db\tLJB2_MutationAssessor\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_MutationAssessor	
+	fi
+	
+	if [ $ljb2_fathmm == "Y" ]
+	then
+		echo -e "\nLJB2 FATHMM Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_fathmm annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb2_fathmm_dropped
+		sed -i '1i\db\tLJB2_FATHMM\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_FATHMM	
+	fi
+	
+	if [ $ljb2_gerp == "Y" ]
+	then
+		echo -e "\nLJB2 GERP++ Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_gerp++ annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb2_gerp++_dropped
+		sed -i '1i\db\tLJB2_GERP++\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_GERP++	
+	fi
+	
+	if [ $ljb2_phylop == "Y" ]
+	then
+		echo -e "\nLJB2 PhyloP Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_phylop annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb2_phylop_dropped
+		sed -i '1i\db\tLJB2_PhyloP\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PhyloP	
+	fi
+	
+	if [ $ljb2_siphy == "Y" ]
+	then
+		echo -e "\nLJB2 SiPhy Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_siphy annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb2_siphy_dropped
+		sed -i '1i\db\tLJB2_SiPhy\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_SiPhy	
+	fi
+
+
+
+	### OLD IMPACT SCORE ANNOTATIONS
+
+	# SIFT
+	if [ $avsift == "Y" ]
+	then
+		echo -e "\nSIFT Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype avsift annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_avsift_dropped
+		sed -i '1i\db\tAVSIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.AVSIFT	
+	fi
+
+	#ljb refers to Liu, Jian, Boerwinkle paper in Human Mutation with pubmed ID 21520341. Cite this paper if you use the scores
+	# SIFT2
+	if [ $ljbsift == "Y" ]
+	then
+		echo -e "\nLJB SIFT Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_sift annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb_sift_dropped
+		sed -i '1i\db\tLJB_SIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.LJB_SIFT
+	fi
+
+
+	# PolyPhen2
+	if [ $polyphen2 == "Y" ]
+	then
+		echo -e "\nPolyPhen Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_pp2 annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb_pp2_dropped
+		sed -i '1i\db\tPolyPhen2\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.PolyPhen2
+	fi
+
+
+	# MutationTaster
+	if [ $mutationtaster == "Y" ]
+	then
+		echo -e "\nMutationTaster Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_mt annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb_mt_dropped
+		sed -i '1i\db\tMutationTaster\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.MutationTaster
+	fi
+
+
+	# LRT
+	if [ $lrt == "Y" ]
+	then
+		echo -e "\nLRT Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_lrt annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb_lrt_dropped
+		sed -i '1i\db\tLikelihoodRatioTestScore\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.LikelihoodRatioTestScore
+	fi
+
+	# PhyloP
+	if [ $phylop == "Y" ]
+	then
+		echo -e "\nPhyloP Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_phylop annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb_phylop_dropped
+		sed -i '1i\db\tPhyloP\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.PhyloP
+	fi
+
+
+	### ESP  Exome Variant Server
+	if [ $esp != "None" ] 
+	then
+		echo -e "\nESP Annotation"
+		for version in $espstr
+		do
+			echo "version: $version"
+			# 6500si ALL
+			if [ $version == "esp6500si_all" ]
+			then 				
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_all annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_esp6500si_all_dropped
+				sed -i '1i\db\t'$esp6500si_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_ALL
+			fi
+
+			
+			# 6500si European American
+			if [ $version == "esp6500si_ea" ]
+			then
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_ea annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_esp6500si_ea_dropped
+				sed -i '1i\db\t'$esp6500si_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_EA
+			fi
+
+			# 6500si African Americans
+			if [ $version == "esp6500si_aa" ]
+			then
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_aa annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_esp6500si_aa_dropped
+				sed -i '1i\db\t'$esp6500si_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_AA
+			fi
+
+
+			# 6500 ALL
+			if [ $version == "esp6500_all" ]
+			then 				
+				ls
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_all annovarinput $humandb 2>&1
+				
+				annovarout=annovarinput.${buildver}_esp6500_all_dropped				
+				sed -i '1i\db\t'$esp6500_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_ALL
+			fi
+
+			
+			# 6500 European American
+			if [ $version == "esp6500_ea" ]
+			then
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_ea annovarinput $humandb 2>&1	
+				annovarout=annovarinput.${buildver}_esp6500_ea_dropped
+				sed -i '1i\db\t'$esp6500_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_EA
+			fi
+
+			# 6500 African Americans
+			if [ $version == "esp6500_aa" ]
+			then
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_aa annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_esp6500_aa_dropped
+				sed -i '1i\db\t'$esp6500_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_AA
+			fi
+
+
+			# 5400 ALL
+			if [ $version == "esp5400_all" ]
+			then 				
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_all annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_esp5400_all_dropped
+				sed -i '1i\db\t'$esp5400_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_ALL
+			fi
+
+			
+			# 5400 European American
+			if [ $version == "esp5400_ea" ]
+			then
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_ea annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_esp5400_ea_dropped
+				sed -i '1i\db\t'$esp5400_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_EA
+			fi
+
+			# 5400 African Americans
+			if [ $version == "esp5400_aa" ]
+			then
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_aa annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_esp5400_aa_dropped
+				sed -i '1i\db\t'$esp5400_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_AA
+			fi
+
+		done
+	fi	
+
+	
+	#exac03
+	if [ $exac03 == "Y" ]
+	then
+		echo -e "\nexac03 Annotation"
+		$scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver $otherinfo -dbtype exac03 annovarinput $humandb 2>&1
+	        
+		#annovarout.tmp=annovarinput.${buildver}_exac03_dropped
+		
+		# split allelefrequency column into several columns, one per population
+		awk 'BEGIN{FS="\t"
+		           OFS="\t"		           
+		           }{		           
+		           gsub(",","\t",$2)
+		           print $0		           
+		           }END{}' annovarinput.${buildver}_exac03_dropped > $annovarout
+		
+		head $annovarout
+		sed -i '1i\db\tExAC_Freq\tExAC_AFR\tExAC_AMR\tExAC_EAS\tExAC_FIN\tExAC_NFE\tExAC_OTH\tExAC_SAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 10 11 12 13 14 B.ExAC_Freq,B.ExAC_AFR,B.ExAC_AMR,B.ExAC_EAS,B.ExAC_FIN,B.ExAC_NFE,B.ExAC_OTH,B.ExAC_SAS	
+	fi
+
+	#GERP++
+	if [ $gerp == "Y" ]
+	then
+		echo -e "\nGERP++ Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype gerp++gt2 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_gerp++gt2_dropped"
+		sed -i '1i\db\tGERP++\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.GERP++
+	fi
+
+
+	#COSMIC
+	if [[ $cosmic61 == "Y" && $buildver == "hg19" ]]
+	then
+		echo -e "\nCOSMIC61 Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic61 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_cosmic61_dropped"
+		sed -i '1i\db\tCOSMIC61\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC61
+
+	fi
+
+	if [[ $cosmic63 == "Y" && $buildver == "hg19" ]]
+	then
+		echo -e "\nCOSMIC63 Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic63 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_cosmic63_dropped"
+		sed -i '1i\db\tCOSMIC63\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC63
+
+	fi
+
+	if [[ $cosmic64 == "Y" && $buildver == "hg19" ]]
+	then
+		echo -e "\nCOSMIC64 Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic64 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_cosmic64_dropped"
+		sed -i '1i\db\tCOSMIC64\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC64
+
+	fi
+	
+	if [[ $cosmic65 == "Y" && $buildver == "hg19" ]]
+	then
+		echo -e "\nCOSMIC65 Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic65 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_cosmic65_dropped"
+		sed -i '1i\db\tCOSMIC65\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC65
+
+	fi
+
+	if [[ $cosmic67 == "Y" && $buildver == "hg19" ]]
+	then
+		echo -e "\nCOSMIC67 Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic67 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_cosmic67_dropped"
+		sed -i '1i\db\tCOSMIC67\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC67
+
+	fi
+	
+	if [[ $cosmic68 == "Y" && $buildver == "hg19" ]]
+	then
+		echo -e "\nCOSMIC68 Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic68 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_cosmic68_dropped"
+		sed -i '1i\db\tCOSMIC68\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC68
+
+	fi
+	
+	if [[ $cosmic70 == "Y" && $buildver == "hg19" ]]
+	then
+		echo -e "\nCOSMIC70 Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic70 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_cosmic70_dropped"
+		sed -i '1i\db\tCOSMIC70\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC70
+
+	fi
+
+	if [[ $clinvar == "Y" && $buildver == "hg19" ]]
+	then
+		echo -e "\nCLINVAR Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype clinvar_20140211 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_clinvar_20140211_dropped"
+		sed -i '1i\db\tCLINVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.CLINVAR
+
+	fi
+	
+	if [[ $nci60 == "Y" && $buildver == "hg19" ]]
+	then
+		echo -e "\nNCI60 Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype nci60 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_nci60_dropped"
+		sed -i '1i\db\tNCI60\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.NCI60
+
+	fi
+	
+	#cg46
+	if [[ $cg46 == "Y"  ]]
+	then
+		echo -e "\nCG 46 genomes Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg46 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_cg46_dropped"
+		sed -i '1i\db\t'${cg46_colheader}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.${cg46_colheader}
+
+	fi
+
+
+	#cg69
+	if [[ $cg69 == "Y"  ]]
+	then
+		echo -e "\nCG 69 genomes Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg69 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_cg69_dropped"
+		sed -i '1i\db\t'${cg69_colheader}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.${cg69_colheader}
+
+	fi
+
+
+	
+	if [ $convertcoords == "Y" ]
+	then
+		echo "converting back coordinates"
+		awk 'BEGIN{
+				FS="\t";
+				OFS="\t";
+			}{
+				if (FNR==1)
+					print $0
+				if(FNR>1) { 
+					$"'"${chrcol}"'" = "chr"$"'"${chrcol}"'"
+					if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" -= 1 }; 	
+					if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "" };			
+					if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" -=1; $"'"${obscol}"'" = "" };
+					if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" -= 1 }; 
+					print $0
+								
+				}
+			}	
+			END{
+			}' originalfile > originalfile_coords
+	else
+		mv originalfile originalfile_coords
+	fi
+
+	#restore "chr" prefix?
+
+	#move to outputfile
+	if [ ! -s annovarinput.invalid_input ]
+	then
+		echo "Congrats, your input file contained no invalid lines!" > annovarinput.invalid_input
+	fi
+	
+	cp originalfile_coords $outfile_all
+	cp annovarinput.invalid_input $outfile_invalid 2>&1
+	
+	sed -i 's/chrchr/chr/g' $outfile_all
+	sed -i 's/chrchr/chr/g' $outfile_invalid
+	
+fi #if $dorunannovar
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+