Mercurial > repos > saskia-hiltemann > annovar

#!/bin/bash

test="N"
dofilter="N"

#########################
#       DEFINE SOME
#        FUNCTIONS
#########################

function usage(){
    echo "usage: $0 todo"
}

function runfilter(){
    ifile=$1
    columnname=$2
    threshold=$3

    if [[ $threshold == "-1" ]]
    then
        echo "not filtering"
        return
    fi

    echo "filtering: $columnname, $threshold"
    cat $ifile

    #get column number corresponding to column header
    column=`awk 'BEGIN{
                    FS="\t";
                    col=-1
                }{
                    if(FNR==1){
                        for(i=1;i<=NF;i++){
                            if($i == "'"${columnname}"'")
                                col=i
                        }
                        print col
                    }
                }' $ifile `

    if [ $column == -1 ]
    then
        echo "no such column, exiting"
        return
    fi

    #perform filtering using the threshold
    awk 'BEGIN{
        FS="\t";
        OFS="\t";
    }{
        if(FNR==1)
            print $0;
        if(FNR>1){
            if( $"'"${column}"'" == "" )  # empty column, then print
                print $0
            else if ("'"${threshold}"'" == "text"){}  #if set to text dont check threshold

            else if ($"'"${column}"'" < "'"${threshold}"'")  #else do check it
                print $0
        }
    }' $ifile > tmpfile

    mv tmpfile $ifile
}

# arguments: originalfile,resultfile,chrcol,startcol,endcol,refcol,obscol,addcols
function joinresults(){
    ofile=$1
    rfile=$2
    colchr=$3
    colstart=$4
    colend=$5
    colref=$6
    colobs=$7
    addcols=$8 #e.g. "B.col1,B.col2"

    test="N"

    # echo "joining result with original file"
    if [ $test == "Y" ]
    then
        echo "ofile: $ofile"
        head $ofile
        echo "rfile: $rfile"
        head $rfile
    fi
    numlines=`wc $rfile | cut -d" " -f2`

    # if empty results file, just add header fields
    if [[ ! -s $rfile ]]
    then
        dummycol=${addcols:2}
        outputcol=${dummycol//",B."/"   "}
        numcommas=`echo "$addcols" | grep -o "," | wc -l`

        awk 'BEGIN{FS="\t";OFS="\t"}{
                if(FNR==1)
                    print $0,"'"$outputcol"'";
                else{
                    printf $0
                    for(i=0;i<="'"$numcommas"'"+1;i++)
                        printf "\t"
                    printf "\n"
                }
            }END{}' $ofile > tempofile

            mv tempofile $ofile
        return
    fi


    #get input file column names for cgatools join
    col_chr_name=`head -1  $rfile | cut -f${colchr}`
    col_start_name=`head -1  $rfile | cut -f${colstart}`
    col_end_name=`head -1  $rfile | cut -f${colend}`
    col_ref_name=`head -1  $rfile | cut -f${colref}`
    col_obs_name=`head -1  $rfile | cut -f${colobs}`

    #get annotation file column names for cgatools join
    chr_name=`head -1  $ofile | cut -f${chrcol}`
    start_name=`head -1  $ofile | cut -f${startcol}`
    end_name=`head -1  $ofile | cut -f${endcol}`
    ref_name=`head -1  $ofile | cut -f${refcol}`
    obs_name=`head -1  $ofile | cut -f${obscol}`

    if [ $test == "Y" ]
    then
        echo "input file"
        echo "chr   col: $col_chr_name ($colchr)"
        echo "start col: $col_start_name ($colstart)"
        echo "end   col: $col_end_name ($colend)"
        echo "ref   col: $col_ref_name ($colref)"
        echo "obs   col: $col_obs_name ($colobs)"
        echo ""
        echo "annotation file"
        echo "chr   col: $chr_name ($chrcol)"
        echo "start col: $start_name ($startcol)"
        echo "end   col: $end_name ($endcol)"
        echo "ref   col: $ref_name ($refcol)"
        echo "obs   col: $obs_name ($obscol)"
    fi

    #perform join
    cgatools join --beta \
        --input $ofile $rfile \
        --output temporiginal \
        --match ${chr_name}:${col_chr_name} \
        --match ${start_name}:${col_start_name} \
        --match ${end_name}:${col_end_name} \
        --match ${ref_name}:${col_ref_name} \
        --match ${obs_name}:${col_obs_name} \
        --select A.*,$addcols \
        --always-dump \
        --output-mode compact

    #replace originalfile
    sed -i 's/^>//g' temporiginal #join sometimes adds a '>' symbol to header
    mv temporiginal originalfile

    if [ $test == "Y" ]
    then
        echo "joining complete"
        head originalfile
        echo ""
    fi
}


#################################
#
#       PARSE PARAMETERS
#
#################################


set -- `getopt -n$0 -u -a --longoptions="inputfile: buildver: humandb: varfile: VCF: chrcol: startcol: endcol: refcol: obscol: vartypecol: convertcoords: geneanno: hgvs: verdbsnp: tfbs: mce: cytoband: segdup: dgv: gwas: ver1000g: cg46: cg69: impactscores: newimpactscores: otherinfo: esp: exac03: exac03nonpsych: exac03nontcga: dbscsnv11: kaviar_20150923: hrcr1: mitimpact2: mitimpact24: dbnsfp30a: spidex: gonl: gerp: cosmic61: cosmic63: cosmic64: cosmic65: cosmic67: cosmic68: cosmic70: clinvar: nci60: outall: outfilt: outinvalid: scriptsdir: dorunannovar: dofilter: filt_dbsnp: filt1000GALL: filt1000GAFR: filt1000GAMR: filt1000GASN: filt1000GEUR: filtESP6500ALL: filtESP6500EA: filtESP6500AA: filtcg46: filtcg69: dummy:" "h:" "$@"` || usage
[ $# -eq 0 ] && usage


while [ $# -gt 0 ]
do
    case "$1" in
        --inputfile)           infile=$2;shift;;      # inputfile
        --buildver)            buildvertmp=$2;shift;; # hg18 or hg19
        --humandb)             humandbtmp=$2;shift;;  # location of humandb database
        --varfile)             varfile=$2;shift;;     # Y or N
        --VCF)                 vcf=$2;shift;;         #Y or N
        --chrcol)              chrcol=$2;shift;;      # which column has chr
        --startcol)            startcol=$2;shift;;    # which column has start coord
        --endcol)              endcol=$2;shift;;      # which column has end coord
        --refcol)              refcol=$2;shift;;      # which column has ref allele
        --obscol)              obscol=$2;shift;;      # which column has alt allele
        --vartypecol)          vartypecol=$2;shift;;  # which column has vartype
        --convertcoords)       convertcoords=$2;shift;;  # Y or N convert coordinate from CG to 1-based?
        --geneanno)            geneanno=$2;shift;;    # comma-separated list of strings refSeq, knowngene, ensgene
        --hgvs)                hgvs=$2;shift;;
        --verdbsnp)            verdbsnp=$2;shift;;    #comma-separated list of dbsnp version to annotate with (e.g. "132,135NonFlagged,137,138")"
        --tfbs)                tfbs=$2;shift;;        # Y or N
        --mce)                 mce=$2;shift;;         # Y or N
        --cytoband)            cytoband=$2;shift;;    # Y or N
        --segdup)              segdup=$2;shift;;      # Y or N
        --dgv)                 dgv=$2;shift;;         # Y or N
        --gwas)                gwas=$2;shift;;        # Y or N
        --ver1000g)            ver1000g=$2;shift;;    # Y or N
        --cg46)                cg46=$2;shift;;        # Y or N
        --cg69)                cg69=$2;shift;;        # Y or N
        --impactscores)        impactscores=$2;shift;;    # Y or N
        --newimpactscores)     newimpactscores=$2;shift;; # Y or N
        --otherinfo)           otherinfo=$2;shift;;       # display additional columns?
        --scriptsdir)          scriptsdirtmp=$2;shift;;   # Y or N
        --esp)                 esp=$2;shift;;             # Y or N
        --exac03)              exac03=$2;shift;;          # Y or N
        --exac03nonpsych)      exac03nonpsych=$2;shift;;  # Y or N
        --exac03nontcga)       exac03nontcga=$2;shift;;   # Y or N
        --dbscsnv11)           dbscsnv11=$2;shift;;
        --kaviar_20150923)     kaviar_20150923=$2;shift;;
        --hrcr1)               hrcr1=$2;shift;;
        --mitimpact2)          mitimpact2=$2;shift;;
        --mitimpact24)         mitimpact24=$2;shift;;
        --dbnsfp30a)           dbnsfp30a=$2;shift;;
        --gonl)                gonl=$2;shift;;        # Y or N
        --spidex)              spidex=$2;shift;;      # Y or N
        --gerp)                gerp=$2;shift;;        # Y or N
        --cosmic61)            cosmic61=$2;shift;;    # Y or N
        --cosmic63)            cosmic63=$2;shift;;    # Y or N
        --cosmic64)            cosmic64=$2;shift;;    # Y or N
        --cosmic65)            cosmic65=$2;shift;;    # Y or N
        --cosmic67)            cosmic67=$2;shift;;    # Y or N
        --cosmic68)            cosmic68=$2;shift;;    # Y or N
        --cosmic70)            cosmic70=$2;shift;;    # Y or N
        --nci60)               nci60=$2;shift;;       # Y or N
        --clinvar)             clinvar=$2;shift;;     # Y or N
        --filt_dbsnp)          filt_dbsnp=$2;shift;;  # Y or N
        --filt1000GALL)        threshold_1000g_ALL=$2;shift;;   #threshold value
        --filt1000GAFR)        threshold_1000g_AFR=$2;shift;;   #threshold value
        --filt1000GAMR)        threshold_1000g_AMR=$2;shift;;   #threshold value
        --filt1000GASN)        threshold_1000g_ASN=$2;shift;;   #threshold value
        --filt1000GEUR)        threshold_1000g_EUR=$2;shift;;   #threshold value
        --filtESP6500ALL)      threshold_ESP6500_ALL=$2;shift;; #threshold value
        --filtESP6500EA)       threshold_ESP6500_EA=$2;shift;;  #threshold value
        --filtESP6500AA)       threshold_ESP6500_AA=$2;shift;;  #threshold value
        --filtcg46)            threshold_cg46=$2;shift;;
        --filtcg69)            threshold_cg69=$2;shift;;
        --outall)              outfile_all=$2;shift;;     # file
        --outfilt)             outfile_filt=$2;shift;;    # file
        --outinvalid)          outfile_invalid=$2;shift;; #file
        --dorunannovar)        dorunannovar=$2;shift;;    #Y or N
       -h)            shift;;
       --)            shift;break;;
       -*)            usage;;
       *)             break;;
    esac
    shift
done

#sometimes galaxy screws up these variables after updates, if comma-separated list, use only what is before first comma
humandb=${humandbtmp%,*}
buildver=${buildvertmp%,*}
scriptsdir=${scriptsdirtmp%,*}


if [ $test == "Y" ]
then
    echo "dorunannovar: $dorunannovar"
    echo "infile: $infile"
    echo "buildver: $buildver"
    echo "annovardb: $humandb"
    echo "verdbnsp: $verdbsnp"
    echo "geneanno: $geneanno"
    echo "tfbs: $tfbs"
    echo "mce: $mce"
    echo "cytoband: $cytoband"
    echo "segdup: $segdup"
    echo "dgv: $dgv"
    echo "gwas: $gwas"
    echo "g1000: ${g1000}"
    echo "cg46: ${cg46}"
    echo "cg69: ${cg69}"
    echo "impactscores: $impactscores"
    echo "impactscores: $newimpactscores"
    echo "esp: $esp"
    echo "gerp: $gerp"
    echo "cosmic: $cosmic"
    echo "outfile: $outfile_all"
    echo "outinvalid: $outfile_invalid"
    echo "outfiltered: $outfile_filt"
    echo "varfile: $varfile"
    echo "vcf" $vcf
    echo "chrcol: $chrcol"
    echo "startcol: $startcol"
    echo "endcol: $endcol"
    echo "refcol: $refcol"
    echo "obscol: $obscol"
    echo "convertcoords: $convertcoords"
    echo "vartypecol: $vartypecol"
    echo "dofilter: $dofilter"
    echo "threshold_1000g_ALL  : $threshold_1000g_ALL"
    echo "threshold_1000g_AFR  : $threshold_1000g_AFR"
    echo "threshold_1000g_AMR  : $threshold_1000g_AMR"
    echo "threshold_1000g_ASN  : $threshold_1000g_ASN"
    echo "threshold_1000g_EUR  : $threshold_1000g_EUR"
    echo "threshold_ESP6500_ALL: $threshold_ESP6500_ALL"
    echo "threshold_ESP6500_EA : $threshold_ESP6500_EA"
    echo "threshold_ESP6500_AA : $threshold_ESP6500_AA"
fi


############################################
#
#       Annotate Variants
#
############################################

# parse geneanno param
refgene="N"
knowngene="N"
ensgene="N"

if [[ $geneanno =~ "refSeq" ]]
then
    refgene="Y"
fi
if [[ $geneanno =~ "knowngene" ]]
then
    knowngene="Y"
fi
if [[ $geneanno =~ "ensgene" ]]
then
    ensgene="Y"
fi
if [ $hgvs == "N" ]
then
    hgvs=""
fi

#parse verdbsnp/1000g/esp strings
dbsnpstr=${verdbsnp//,/ }
filt_dbsnpstr=${filt_dbsnp//,/ }
g1000str=${ver1000g//,/ }
espstr=${esp//,/ }

if [ $test == "Y" ]
then
    echo "annotate dbsnp: $dbsnpstr"
    echo "annotate esp:   $espstr"
    echo "filter dbsnp: $filt_dbsnpstr"
fi

mutationtaster="N"
avsift="N"
lrt="N"
polyphen2="N"
phylop="N"
ljbsift="N"

#parse old impactscores param (obsolete)
if [[ $impactscores =~ "mutationtaster" ]]
then
    mutationtaster="Y"
fi
if [[ $impactscores =~ "sift" ]]
then
    avsift="Y"
fi
if [[ $impactscores =~ "lrt" ]]
then
    lrt="Y"
fi
if [[ $impactscores =~ "ljbsift" ]]
then
    ljbsift="Y"
fi
if [[ $impactscores =~ "ljb2sift" ]]
then
    ljb2sift="Y"
fi
if [[ $impactscores =~ "pp2" ]]
then
    polyphen2="Y"
fi
if [[ $impactscores =~ "phylop" ]]
then
    phylop="Y"
fi

if [[ $varfile == "Y" ]]
then
    convertcoords="Y"
fi

#ljb refers to Liu, Jian, Boerwinkle paper in Human Mutation with pubmed ID 21520341. Cite this paper if you use the scores

ljb2_sift="N"
ljb2_pp2hdiv="N"
ljb2_pp2hvar="N"
ljb2_lrt="N"
ljb2_mt="N"
ljb2_ma="N"
ljb2_fathmm="N"
ljb2_gerp="N"
ljb2_phylop="N"
ljb2_siphy="N"

# parse ljb2 newimpactscores param
# ljb2_sift, ljb2_pp2hdiv, ljb2_pp2hvar, ljb2_lrt, ljb2_mt, ljb2_ma, ljb2_fathmm, ljb2_gerp++, ljb2_phylop, ljb2_siphy
if [[ $newimpactscores =~ "ljb2_sift" ]]
then
    ljb2_sift="Y"
fi
if [[ $newimpactscores =~ "ljb2_pp2hdiv" ]]
then
    ljb2_pp2hdiv="Y"
fi
if [[ $newimpactscores =~ "ljb2_pp2hvar" ]]
then
    ljb2_pp2hvar="Y"
fi
if [[ $newimpactscores =~ "ljb2_lrt" ]]
then
    ljb2_lrt="Y"
fi
if [[ $newimpactscores =~ "ljb2_mt" ]]
then
    ljb2_mt="Y"
fi
if [[ $newimpactscores =~ "ljb2_ma" ]]
then
    ljb2_ma="Y"
fi
if [[ $newimpactscores =~ "ljb2_fathmm" ]]
then
    ljb2_fathmm="Y"
fi
if [[ $newimpactscores =~ "ljb2_gerp" ]]
then
    ljb2_gerp="Y"
fi
if [[ $newimpactscores =~ "ljb2_phylop" ]]
then
    ljb2_phylop="Y"
fi
if [[ $newimpactscores =~ "ljb2_siphy" ]]
then
    ljb2_siphy="Y"
fi

if [ $otherinfo == "N" ]
then
    otherinfo=""
fi


#column header names we will be adding
# ESP 6500
esp6500si_colheader_ALL="ESP6500si_ALL"
esp6500si_colheader_EA="ESP6500si_EA"
esp6500si_colheader_AA="ESP6500si_AA"
esp6500_colheader_ALL="ESP6500_ALL"
esp6500_colheader_EA="ESP6500_EA"
esp6500_colheader_AA="ESP6500_AA"
esp5400si_colheader_ALL="ESP5400si_ALL"
esp5400si_colheader_EA="ESP5400si_EA"
esp5400si_colheader_AA="ESP5400si_AA"
esp5400_colheader_ALL="ESP5400_ALL"
esp5400_colheader_EA="ESP5400_EA"
esp5400_colheader_AA="ESP5400_AA"


# cg46 cg69
cg46_colheader="CG_46_genomes"
cg69_colheader="CG_69_genomes"

cp $infile originalfile
#run annovar or filter only?
if [ $dorunannovar == "Y" ]
then


    ####################################
    #
    #       PREPARE INPUT FILE
    #
    ####################################

    echo "converting input file"
    vcfheader=""
    if [ $vcf == "Y" ]     #if CG varfile, convert
    then
        # convert vcf to annovarinput
        $scriptsdir/convert2annovar.pl --format vcf4old --allallele --includeinfo --outfile annovarinput $infile 2>&1

        #construct header line from vcf file
        cat $infile | grep "#CHROM" > additionalcols
        sed -i 's/#//g' additionalcols
        vcfheader="\t`cat additionalcols`"
        echo "vcfheader:$vcfheader"
        echo -e "chromosome\tbegin\tend\treference\tobserved\t`cat additionalcols`" > originalfile
        cat annovarinput >> originalfile

        chrcol=1
        startcol=2
        endcol=3
        refcol=4
        obscol=5


    elif [ $varfile == "Y" ]     #if CG varfile, convert
    then
        # convert varfile
        $scriptsdir/convert2annovar.pl --format cg --outfile annovarinput $infile 2>&1
        echo -e "chromosome\tbegin\tend\treference\talleleSeq\tvarType\thaplotype" > originalfile
        cat annovarinput | cut -f1-6,8 >> originalfile
        cat annovarinput | cut -f1-5 >> annovarinput2
        mv annovarinput2 annovarinput

        chrcol=1
        startcol=2
        endcol=3
        refcol=4
        obscol=5

    elif [ $convertcoords == "Y" ]    # if CG-coordinates, convert
    then
        #echo "rearranging columns and converting coordinates"
        awk 'BEGIN{
                FS="\t";
                OFS="\t";
            }{
                if(FNR>1) {
                                        gsub(/chr/,"",$"'"${chrcol}"'")
                    if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" += 1 };
                    if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "-" };
                    if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" +=1; $"'"${obscol}"'" = "-" };
                    if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" += 1 };

                    printf("%s\t%s\t%s\t%s\t%s\n" ,$"'"${chrcol}"'",$"'"${startcol}"'",$"'"${endcol}"'",$"'"${refcol}"'",$"'"${obscol}"'");
                }
            }
            END{
            }' $infile > annovarinput

            #remove any "chr" prefixes
            #sed -i '2,$s/chr//g' annovarinput

            awk 'BEGIN{
                FS="\t";
                OFS="\t";
            }{

                if(FNR>=1) {
                        gsub(/chr/,"",$"'"${chrcol}"'")
                    if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" += 1 };
                    if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "-" };
                    if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" +=1; $"'"${obscol}"'" = "-" };
                    if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" += 1 };

                    print $0
                }
            }
            END{
            }' $infile > originalfile

            #remove any "chr" prefixes
            #sed -i '2,$s/chr//g' originalfile
            sed -i 's/omosome/chromosome/g' originalfile


    else #only rearrange columns if already 1-based coordinates
        echo "rearranging columns "
        awk 'BEGIN{
                FS="\t";
                OFS="\t";
            }{
                if(FNR>1) {
                                    printf("%s\t%s\t%s\t%s\t%s\n",$"'"${chrcol}"'",$"'"${startcol}"'",$"'"${endcol}"'",$"'"${refcol}"'",$"'"${obscol}"'");
                }
            }
            END{
            }' $infile > annovarinput

            #remove any "chr" prefixes
            sed -i '2,$s/chr//g' annovarinput
            sed '2,$s/chr//g' $infile > originalfile
            sed -i 's/omosome/chromosome/g' originalfile
    fi

    echo "...finished conversion"


    ####################################
    #
    #       RUN ANNOVAR COMMANDS
    #
    ####################################


    ######    gene-based annotation   #######

    # RefSeq Gene
    if [ $refgene == "Y" ]
    then
        echo -e "\nrefSeq gene"
        $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype gene ${hgvs} annovarinput $humandb 2>&1

        annovarout=annovarinput.variant_function
        sed -i '1i\RefSeq_Func\tRefSeq_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout  3 4 5 6 7 B.RefSeq_Func,B.RefSeq_Gene

        annovarout=annovarinput.exonic_variant_function
        sed -i '1i\linenum\tRefSeq_ExonicFunc\tRefSeq_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout  4 5 6 7 8 B.RefSeq_ExonicFunc,B.RefSeq_AAChange
    fi


    # UCSC KnownGene
    if [ $knowngene == "Y" ]
    then
        echo -e "\nUCSC known gene"
        $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype knowngene annovarinput $humandb 2>&1

        annovarout=annovarinput.variant_function
        sed -i '1i\UCSCKnownGene_Func\tUCSCKnownGene_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.UCSCKnownGene_Func,B.UCSCKnownGene_Gene

        annovarout=annovarinput.exonic_variant_function
        sed -i '1i\linenum\tUCSCKnownGene_ExonicFunc\tUCSCKnownGene_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 4 5 6 7 8 B.UCSCKnownGene_ExonicFunc,B.UCSCKnownGene_AAChange
    fi


    # Emsembl Gene
    if [ $ensgene == "Y" ]
    then
        echo -e "\nEnsembl gene"
        $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype ensgene annovarinput $humandb 2>&1

        annovarout=annovarinput.variant_function
        sed -i '1i\EnsemblGene_Func\tEnsemblGene_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.EnsemblGene_Func,B.EnsemblGene_Gene

        annovarout=annovarinput.exonic_variant_function
        sed -i '1i\linenum\tEnsemblGene_ExonicFunc\tEnsemblGene_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 4 5 6 7 8 B.EnsemblGene_ExonicFunc,B.EnsemblGene_AAChange
    fi


    ######    region-based annotation   #######


    # Transcription Factor Binding Sites Annotation
    if [ $mce == "Y" ]
    then
        echo -e "\nMost Conserved Elements"

        if [ $buildver == "hg18" ]
        then
            $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype mce44way annovarinput $humandb 2>&1
            annovarout=annovarinput.${buildver}_phastConsElements44way
            sed -i '1i\db\tphastConsElements44way\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
            joinresults originalfile $annovarout 3 4 5 6 7 B.phastConsElements44way

        else #hg19
            $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype mce46way annovarinput $humandb 2>&1
            annovarout=annovarinput.${buildver}_phastConsElements46way
            sed -i '1i\db\tphastConsElements46way\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
            joinresults originalfile $annovarout 3 4 5 6 7 B.phastConsElements46way
        fi

    fi


    # Transcription Factor Binding Sites Annotation
    if [ $tfbs == "Y" ]
    then
        echo -e "\nTranscription Factor Binding Site Annotation"
        $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype tfbs annovarinput $humandb 2>&1

        # arguments: originalfile, resultfile,chrcol,startcol,endcol,refcol,obscol,selectcolumns
        annovarout=annovarinput.${buildver}_tfbsConsSites
        sed -i '1i\db\tTFBS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.TFBS
    fi


    # Identify cytogenetic band for genetic variants
    if [ $cytoband == "Y" ]
    then
        echo -e "\nCytogenic band Annotation"
        $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype band annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_cytoBand
        sed -i '1i\db\tBand\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.Band
    fi


    # Identify variants located in segmental duplications
    if [ $segdup == "Y" ]
    then
        echo -e "\nSegmental Duplications Annotation"
        $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype segdup annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_genomicSuperDups
        sed -i '1i\db\tSegDup\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.SegDup
    fi


    # Identify previously reported structural variants in DGV
    if [ $dgv == "Y" ]
    then
        echo -e "\nDGV Annotation"
        $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype dgvMerged annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_dgvMerged
        sed -i '1i\db\tDGV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.DGV
    fi


    # Identify variants reported in previously published GWAS studies
    if [ $gwas == "Y" ]
    then
        echo -e "\nGWAS Annotation"
        $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype gwascatalog annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_gwasCatalog
        sed -i '1i\db\tGWAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.GWAS
    fi


    ######    filter-based annotation   #######

    #dbSNP
    for version in $dbsnpstr
    do
        if [ $version == "None" ]
        then
            break
        fi
        echo -e "\ndbSNP region Annotation, version: $version"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ${version} annovarinput $humandb 2>&1

        columnname=${version}
        if [[ $columnname == snp* ]]
        then
            columnname="db${version}"
        fi

        annovarout=annovarinput.${buildver}_${version}_dropped
        sed -i '1i\db\t'${columnname}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.${columnname}

    done


    #1000 Genomes

    if [ $ver1000g != "None" ]
    then

        for version in $g1000str
        do
            #column headers
            g1000_colheader_ALL="${version}_ALL"
            g1000_colheader_AFR="${version}_AFR"
            g1000_colheader_AMR="${version}_AMR"
            g1000_colheader_ASN="${version}_ASN"
            g1000_colheader_EUR="${version}_EUR"
            g1000_colheader_EAS="${version}_EAS"
            g1000_colheader_SAS="${version}_SAS"
            g1000_colheader_CEU="${version}_CEU"
            g1000_colheader_YRI="${version}_YRI"
            g1000_colheader_JPTCHB="${version}_JPTCHB"

            doALL="N"
            doAMR="N"
            doAFR="N"
            doASN="N"
            doEAS="N"
            doSAS="N"
            doEUR="N"
            doCEU="N"
            doYRI="N"
            doJPTCHB="N"


            if [ $version == "1000g2012apr" ]
            then
                fileID="2012_04"
                doALL="Y"
                if [ $buildver == "hg19" ]
                then
                    doAMR="Y"
                    doAFR="Y"
                    doASN="Y"
                    doEUR="Y"
                fi
            elif [ $version == "1000g2014oct" ]
            then
                fileID="2014_10"
                doALL="Y"
                doAMR="Y"
                doAFR="Y"
                doEUR="Y"
                doEAS="Y"
                if [ $buildver == "hg19" ]
                then
                    doSAS="Y"
                fi

            elif [[ $version == "1000g2015aug"  ]]
            then
                fileID="2015_08"
                doALL="Y"
                doAMR="Y"
                doAFR="Y"
                doEUR="Y"
                doEAS="Y"
                doSAS="Y"

            elif [[ $version == "1000g2012feb"  ]]
            then
                fileID="2012_02"
                doALL="Y"
            elif [[ $version == "1000g2010nov"  ]]
            then
                fileID="2010_11"
                doALL="Y"
            elif [[ $version == "1000g2010jul"  ]]
            then
                fileID="2010_07"
                doALL="N"
                doCEU="Y"
                doYRI="Y"
                doJPTCHB="Y"
            else
                echo "unrecognized 1000g version, skipping"
            fi

            #ALL
            if [ $doALL == "Y"  ]
                then
                echo -e "\n1000Genomes ALL"
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_all" annovarinput $humandb 2>&1

                annovarout=annovarinput.${buildver}_ALL.sites.${fileID}_dropped
                sed -i '1i\db\t'$g1000_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_ALL
            fi

            # AFR
            if [ $doAFR == "Y"  ]
            then
                echo -e "\n1000Genomes AFR"
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_afr" annovarinput $humandb 2>&1

                annovarout=annovarinput.${buildver}_AFR.sites.${fileID}_dropped
                sed -i '1i\db\t'$g1000_colheader_AFR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_AFR
            fi


            # AMR
            if [ $doAMR == "Y"  ]
            then
                echo -e "\n1000Genomes AMR"
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_amr" annovarinput $humandb 2>&1

                annovarout=annovarinput.${buildver}_AMR.sites.${fileID}_dropped
                sed -i '1i\db\t'$g1000_colheader_AMR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_AMR
            fi

            # ASN
            if [ $doASN == "Y"  ]
            then
                echo -e "\n1000Genomes ASN"
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_asn" annovarinput $humandb 2>&1

                annovarout=annovarinput.${buildver}_ASN.sites.${fileID}_dropped
                sed -i '1i\db\t'$g1000_colheader_ASN'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_ASN
            fi

            # EAS
            if [ $doEAS == "Y"  ]
            then
                echo -e "\n1000Genomes EAS"
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_eas" annovarinput $humandb 2>&1

                annovarout=annovarinput.${buildver}_EAS.sites.${fileID}_dropped
                sed -i '1i\db\t'$g1000_colheader_EAS'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_EAS
            fi

            # SAS
            if [ $doSAS == "Y"  ]
            then
                echo -e "\n1000Genomes SAS"
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_sas" annovarinput $humandb 2>&1

                annovarout=annovarinput.${buildver}_SAS.sites.${fileID}_dropped
                sed -i '1i\db\t'$g1000_colheader_SAS'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_SAS
            fi

            # EUR
            if [ $doEUR == "Y"  ]
            then
                echo -e "\n1000Genomes EUR"
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_eur" annovarinput $humandb 2>&1

                annovarout=annovarinput.${buildver}_EUR.sites.${fileID}_dropped
                sed -i '1i\db\t'$g1000_colheader_EUR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_EUR
            fi

            # CEU
            if [ $doCEU == "Y"  ]
            then
                echo -e "\n1000Genomes CEU"
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_ceu" annovarinput $humandb 2>&1

                annovarout=annovarinput.${buildver}_CEU.sites.${fileID}_dropped
                sed -i '1i\db\t'$g1000_colheader_CEU'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_CEU
            fi

            # YRI
            if [ $doYRI == "Y"  ]
            then
                echo -e "\n1000Genomes YRI"
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_yri" annovarinput $humandb 2>&1

                annovarout=annovarinput.${buildver}_YRI.sites.${fileID}_dropped
                sed -i '1i\db\t'$g1000_colheader_YRI'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_YRI


            fi

            #JPTCHB
            if [ $doJPTCHB == "Y"  ]
            then
                echo -e "\n1000Genomes JPTCHB"
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_jptchb" annovarinput $humandb 2>&1

                annovarout=annovarinput.${buildver}_JPTCHB.sites.${fileID}_dropped
                sed -i '1i\db\t'$g1000_colheader_JPTCHB'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_JPTCHB
            fi

        done
    fi


    #### IMPACT SCORE ANNOTATIONS


    if [ $ljb2_sift == "Y" ]
    then
        echo -e "\nLJB2 SIFT Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_sift annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_ljb2_sift_dropped
        sed -i '1i\db\tLJB2_SIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_SIFT
    fi

    if [ $ljb2_pp2hdiv == "Y" ]
    then
        echo -e "\nLJB2 pp2hdiv Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hdiv annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_ljb2_pp2hdiv_dropped
        sed -i '1i\db\tLJB2_PolyPhen2_HDIV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HDIV
    fi

    if [ $ljb2_pp2hvar == "Y" ]
    then
        echo -e "\nLJB2 pp2hvar Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hvar annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_ljb2_pp2hvar_dropped

        head $annovarout
        sed -i '1i\db\tLJB2_PolyPhen2_HVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HVAR
    fi

    if [ $ljb2_lrt == "Y" ]
    then
        echo -e "\nLJB2 LRT Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_lrt annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_ljb2_lrt_dropped
        sed -i '1i\db\tLJB2_LRT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_LRT
    fi

    if [ $ljb2_mt == "Y" ]
    then
        echo -e "\nLJB2 mutationtaster Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_mt annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_ljb2_mt_dropped
        sed -i '1i\db\tLJB2_MutationTaster\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_MutationTaster
    fi

    if [ $ljb2_ma == "Y" ]
    then
        echo -e "\nLJB2 mutationassessor Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_ma annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_ljb2_ma_dropped
        sed -i '1i\db\tLJB2_MutationAssessor\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_MutationAssessor
    fi

    if [ $ljb2_fathmm == "Y" ]
    then
        echo -e "\nLJB2 FATHMM Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_fathmm annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_ljb2_fathmm_dropped
        sed -i '1i\db\tLJB2_FATHMM\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_FATHMM
    fi

    if [ $ljb2_gerp == "Y" ]
    then
        echo -e "\nLJB2 GERP++ Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_gerp++ annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_ljb2_gerp++_dropped
        sed -i '1i\db\tLJB2_GERP++\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_GERP++
    fi

    if [ $ljb2_phylop == "Y" ]
    then
        echo -e "\nLJB2 PhyloP Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_phylop annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_ljb2_phylop_dropped
        sed -i '1i\db\tLJB2_PhyloP\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PhyloP
    fi

    if [ $ljb2_siphy == "Y" ]
    then
        echo -e "\nLJB2 SiPhy Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_siphy annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_ljb2_siphy_dropped
        sed -i '1i\db\tLJB2_SiPhy\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_SiPhy
    fi


    ### OLD IMPACT SCORE ANNOTATIONS

    # SIFT
    if [ $avsift == "Y" ]
    then
        echo -e "\nSIFT Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype avsift annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_avsift_dropped
        sed -i '1i\db\tAVSIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.AVSIFT
    fi

    #ljb refers to Liu, Jian, Boerwinkle paper in Human Mutation with pubmed ID 21520341. Cite this paper if you use the scores
    # SIFT2
    if [ $ljbsift == "Y" ]
    then
        echo -e "\nLJB SIFT Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_sift annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_ljb_sift_dropped
        sed -i '1i\db\tLJB_SIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.LJB_SIFT
    fi


    # PolyPhen2
    if [ $polyphen2 == "Y" ]
    then
        echo -e "\nPolyPhen Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_pp2 annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_ljb_pp2_dropped
        sed -i '1i\db\tPolyPhen2\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.PolyPhen2
    fi


    # MutationTaster
    if [ $mutationtaster == "Y" ]
    then
        echo -e "\nMutationTaster Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_mt annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_ljb_mt_dropped
        sed -i '1i\db\tMutationTaster\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.MutationTaster
    fi


    # LRT
    if [ $lrt == "Y" ]
    then
        echo -e "\nLRT Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_lrt annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_ljb_lrt_dropped
        sed -i '1i\db\tLikelihoodRatioTestScore\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.LikelihoodRatioTestScore
    fi

    # PhyloP
    if [ $phylop == "Y" ]
    then
        echo -e "\nPhyloP Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_phylop annovarinput $humandb 2>&1

        annovarout=annovarinput.${buildver}_ljb_phylop_dropped
        sed -i '1i\db\tPhyloP\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.PhyloP
    fi


    ### ESP  Exome Variant Server
    if [ $esp != "None" ]
    then
        echo -e "\nESP Annotation"
        for version in $espstr
        do
            echo "version: $version"
            # 6500si ALL
            if [ $version == "esp6500si_all" ]
            then
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_all annovarinput $humandb 2>&1

                annovarout=annovarinput.${buildver}_esp6500si_all_dropped
                sed -i '1i\db\t'$esp6500si_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_ALL
            fi


            # 6500si European American
            if [ $version == "esp6500si_ea" ]
            then
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_ea annovarinput $humandb 2>&1

                annovarout=annovarinput.${buildver}_esp6500si_ea_dropped
                sed -i '1i\db\t'$esp6500si_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_EA
            fi

            # 6500si African Americans
            if [ $version == "esp6500si_aa" ]
            then
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_aa annovarinput $humandb 2>&1

                annovarout=annovarinput.${buildver}_esp6500si_aa_dropped
                sed -i '1i\db\t'$esp6500si_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_AA
            fi


            # 6500 ALL
            if [ $version == "esp6500_all" ]
            then
                ls
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_all annovarinput $humandb 2>&1

                annovarout=annovarinput.${buildver}_esp6500_all_dropped
                sed -i '1i\db\t'$esp6500_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_ALL
            fi


            # 6500 European American
            if [ $version == "esp6500_ea" ]
            then
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_ea annovarinput $humandb 2>&1
                annovarout=annovarinput.${buildver}_esp6500_ea_dropped
                sed -i '1i\db\t'$esp6500_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_EA
            fi

            # 6500 African Americans
            if [ $version == "esp6500_aa" ]
            then
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_aa annovarinput $humandb 2>&1

                annovarout=annovarinput.${buildver}_esp6500_aa_dropped
                sed -i '1i\db\t'$esp6500_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_AA
            fi


            # 5400 ALL
            if [ $version == "esp5400_all" ]
            then
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_all annovarinput $humandb 2>&1

                annovarout=annovarinput.${buildver}_esp5400_all_dropped
                sed -i '1i\db\t'$esp5400_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_ALL
            fi


            # 5400 European American
            if [ $version == "esp5400_ea" ]
            then
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_ea annovarinput $humandb 2>&1

                annovarout=annovarinput.${buildver}_esp5400_ea_dropped
                sed -i '1i\db\t'$esp5400_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_EA
            fi

            # 5400 African Americans
            if [ $version == "esp5400_aa" ]
            then
                $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_aa annovarinput $humandb 2>&1

                annovarout=annovarinput.${buildver}_esp5400_aa_dropped
                sed -i '1i\db\t'$esp5400_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
                joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_AA
            fi

        done
    fi

    #ExAC-03 database
    if [ $exac03 == "Y" ]
    then
        echo -e "\nExAC03 Annotation"
        $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver --otherinfo -dbtype exac03 annovarinput $humandb 2>&1

        #annovarout=annovarinput.${buildver}_exac03_dropped

        # split allelefrequency column into several columns, one per population
        awk 'BEGIN{FS="\t"
                   OFS="\t"
                   }{
                   gsub(",","\t",$2)
                   print $0
                   }END{}' annovarinput.${buildver}_exac03_dropped > $annovarout

        sed -i '1i\db\tExAC_ALL\tExAC_AFR\tExAC_AMR\tExAC_EAS\tExAC_FIN\tExAC_NFE\tExAC_OTH\tExAC_SAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 10 11 12 13 14 B.ExAC_ALL,B.ExAC_AFR,B.ExAC_AMR,B.ExAC_EAS,B.ExAC_FIN,B.ExAC_NFE,B.ExAC_OTH,B.ExAC_SAS
    fi

        #ExAC-03 database
    if [ $exac03nonpsych == "Y" ]
    then
        echo -e "\nExAC03 non-psych Annotation"
        $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver --otherinfo -dbtype exac03nonpsych annovarinput $humandb 2>&1

        #annovarout=annovarinput.${buildver}_exac03_dropped

        # split allelefrequency column into several columns, one per population
        awk 'BEGIN{FS="\t"
                   OFS="\t"
                   }{
                   gsub(",","\t",$2)
                   print $0
                   }END{}' annovarinput.${buildver}_exac03nonpsych_dropped > $annovarout

        sed -i '1i\db\tExAC_non-phsych_ALL\tExAC_non-phsych_AFR\tExAC_non-phsych_AMR\tExAC_non-phsych_EAS\tExAC_non-phsych_FIN\tExAC_non-phsych_NFE\tExAC_non-phsych_OTH\tExAC_non-phsych_SAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 10 11 12 13 14 B.ExAC_non-phsych_ALL,B.ExAC_non-phsych_AFR,B.ExAC_non-phsych_AMR,B.ExAC_non-phsych_EAS,B.ExAC_non-phsych_FIN,B.ExAC_non-phsych_NFE,B.ExAC_non-phsych_OTH,B.ExAC_non-phsych_SAS
    fi

    #ExAC-03 database
    if [ $exac03nontcga == "Y" ]
    then
        echo -e "\nExAC03 non-tcga Annotation"
        $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver --otherinfo -dbtype exac03nontcga annovarinput $humandb 2>&1

        #annovarout=annovarinput.${buildver}_exac03_dropped

        # split allelefrequency column into several columns, one per population
        awk 'BEGIN{FS="\t"
                   OFS="\t"
                   }{
                   gsub(",","\t",$2)
                   print $0
                   }END{}' annovarinput.${buildver}_exac03nontcga_dropped > $annovarout

        sed -i '1i\db\tExAC_non-TCGA_ALL\tExAC_non-TCGA_AFR\tExAC_non-TCGA_AMR\tExAC_non-TCGA_EAS\tExAC_non-TCGA_FIN\tExAC_non-TCGA_NFE\tExAC_non-TCGA_OTH\tExAC_non-TCGA_SAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 10 11 12 13 14 B.ExAC_non-TCGA_ALL,B.ExAC_non-TCGA_AFR,B.ExAC_non-TCGA_AMR,B.ExAC_non-TCGA_EAS,B.ExAC_non-TCGA_FIN,B.ExAC_non-TCGA_NFE,B.ExAC_non-TCGA_OTH,B.ExAC_non-TCGA_SAS
    fi

    #dbscSNV 1.1
    if [ $dbscsnv11 == "Y" ]
    then
        echo -e "\ndbscSNV11 Annotation"
        $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver -dbtype dbscsnv11 annovarinput $humandb 2>&1

        #annovarout="annovarinput.${buildver}_dbscsnv11_dropped"
        awk 'BEGIN{FS="\t"
                   OFS="\t"
                   }{
                   gsub(",","\t",$2)
                   print $0
                   }END{}' annovarinput.${buildver}_dbscsnv11_dropped > $annovarout

        sed -i '1i\db\tdbscSNV11_ADA_SCORE\tdbscSNV11_RF_SCORE\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 4 5 6 7 8 B.dbscSNV11_ADA_SCORE,B.dbscSNV11_RF_SCORE
    fi


    #kaviar_20150923
    if [ $kaviar_20150923 == "Y" ]
    then
        echo -e "\nkaviar_20150923 Annotation"
        $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver -dbtype kaviar_20150923 annovarinput $humandb 2>&1

        #annovarout="annovarinput.${buildver}_kaviar_20150923_dropped"
        awk 'BEGIN{FS="\t"
                   OFS="\t"
                   }{
                   gsub(",","\t",$2)
                   print $0
                   }END{}' annovarinput.${buildver}_kaviar_20150923_dropped > $annovarout

        sed -i '1i\db\tKaviar_AF\tKaviar_AC\tKaviar_AN\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 5 6 7 8 9 B.Kaviar_AF,B.Kaviar_AC,B.Kaviar_AN
    fi

    #hrcr1
    if [ $hrcr1 == "Y" ]
    then
        echo -e "\nhrcr1 Annotation"
        $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver -dbtype hrcr1 annovarinput $humandb 2>&1

        #annovarout="annovarinput.${buildver}_dbscsnv11_dropped"
        awk 'BEGIN{FS="\t"
                   OFS="\t"
                   }{
                   gsub(",","\t",$2)
                   print $0
                   }END{}' annovarinput.${buildver}_hrcr1_dropped > $annovarout
        sed -i '1i\db\tHRC_AF\tHRC_AC\tHRC_AN\tHRC_non1000G_AF\tHRC_non1000G_AC\tHRC_non1000g_AN\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 8 9 10 11 12 B.HRC_AF,B.HRC_AC,B.HRC_AN,B.HRC_non1000g_AF,B.HRC_non1000g_AC,B.HRC_non1000g_AN
    fi

    #dbnsfp30a
    if [ $dbnsfp30a == "Y" ]
    then
        echo -e "\ndbnsfp30a Annotation"
        $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver -dbtype dbnsfp30a annovarinput $humandb 2>&1

        #annovarout="annovarinput.${buildver}_dbnsfp30a_dropped"
        awk 'BEGIN{FS="\t"
                   OFS="\t"
                   }{
                   gsub(",","\t",$2)
                   print $0
                   }END{}' annovarinput.${buildver}_dbnsfp30a_dropped > $annovarout

        sed -i '1i\db\tdbNSFP_SIFT_score\tdbNSFP_SIFT_pred\tdbNSFP_Polyphen2_HDIV_score\tdbNSFP_Polyphen2_HDIV_pred\tdbNSFP_Polyphen2_HVAR_score\tdbNSFP_Polyphen2_HVAR_pred\tdbNSFP_LRT_score\tdbNSFP_LRT_pred\tdbNSFP_MutationTaster_score\tdbNSFP_MutationTaster_pred\tdbNSFP_MutationAssessor_score\tdbNSFP_MutationAssessor_pred\tdbNSFP_FATHMM_score\tdbNSFP_FATHMM_pred\tdbNSFP_PROVEAN_score\tdbNSFP_PROVEAN_pred\tdbNSFP_VEST3_score\tdbNSFP_CADD_raw\tdbNSFP_CADD_phredDANN_score\tdbNSFP_fathmm-MKL_coding_score\tdbNSFP_fathmm-MKL_coding_pred\tdbNSFP_MetaSVM_score\tdbNSFP_MetaSVM_pred\tdbNSFP_MetaLR_score\tdbNSFP_MetaLR_pred\tdbNSFP_integrated_fitCons_score\tdbNSFP_integrated_confidence_value\tdbNSFP_GERP_RS\tdbNSFP_phyloP7way_vertebrate\tdbNSFP_phyloP20way_mammalian\tdbNSFP_phastCons7way_vertebrate\tdbNSFP_phastCons20way_mammalian\tdbNSFP_SiPhy_29way_logOdds\tdbNSFP_unknown\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 36 37 38 39 40 B.dbNSFP_SIFT_score,B.dbNSFP_SIFT_pred,B.dbNSFP_Polyphen2_HDIV_score,B.dbNSFP_Polyphen2_HDIV_pred,B.dbNSFP_Polyphen2_HVAR_score,B.dbNSFP_Polyphen2_HVAR_pred,B.dbNSFP_LRT_score,B.dbNSFP_LRT_pred,B.dbNSFP_MutationTaster_score,B.dbNSFP_MutationTaster_pred,B.dbNSFP_MutationAssessor_score,B.dbNSFP_MutationAssessor_pred,B.dbNSFP_FATHMM_score,B.dbNSFP_FATHMM_pred,B.dbNSFP_PROVEAN_score,B.dbNSFP_PROVEAN_pred,B.dbNSFP_VEST3_score,B.dbNSFP_CADD_raw,B.dbNSFP_CADD_phredDANN_score,B.dbNSFP_fathmm-MKL_coding_score,B.dbNSFP_fathmm-MKL_coding_pred,B.dbNSFP_MetaSVM_score,B.dbNSFP_MetaSVM_pred,B.dbNSFP_MetaLR_score,B.dbNSFP_MetaLR_pred,B.dbNSFP_integrated_fitCons_score,B.dbNSFP_integrated_confidence_value,B.dbNSFP_GERP_RS,B.dbNSFP_phyloP7way_vertebrate,B.dbNSFP_phyloP20way_mammalian,B.dbNSFP_phastCons7way_vertebrate,B.dbNSFP_phastCons20way_mammalian,B.dbNSFP_SiPhy_29way_logOdds

    fi

    #mitimpact2
    if [ $mitimpact2 == "Y" ]
    then
        echo -e "\nmitimpact2 Annotation"
        $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver -dbtype mitimpact2 annovarinput $humandb 2>&1

        #annovarout="annovarinput.${buildver}_mitimpact2_dropped"
        awk 'BEGIN{FS="\t"
                   OFS="\t"
                   }{
                   gsub(",","\t",$2)
                   print $0
                   }END{}' annovarinput.${buildver}_mitimpact2_dropped > $annovarout

        sed -i '1i\db\tMITimpact2_Gene_symbol\tMITimpact2_OXPHOS_Complex\tMITimpact2_Ensembl_Gene_ID\tMITimpact2_Ensembl_Protein_ID\tMITimpact2_Uniprot_Name\tMITimpact2_Uniprot_ID\tMITimpact2_NCBI_Gene_ID\tMITimpact2_NCBI_Protein_ID\tMITimpact2_Gene_pos\tMITimpact2_AA_pos\tMITimpact2_AA_sub\tMITimpact2_Codon_sub\tMITimpact2_dbSNP_ID\tMITimpact2_PhyloP_46V\tMITimpact2_PhastCons_46V\tMITimpact2_PhyloP_100V\tMITimpact2_PhastCons_100V\tMITimpact2_SiteVar\tMITimpact2_PolyPhen2_prediction\tMITimpact2_PolyPhen2_score\tMITimpact2_SIFT_prediction\tMITimpact2_SIFT_score\tMITimpact2_FatHmm_prediction\tMITimpact2_FatHmm_score\tMITimpact2_PROVEAN_prediction\tMITimpact2_PROVEAN_score\tMITimpact2_MutAss_prediction\tMITimpact2_MutAss_score\tMITimpact2_EFIN_Swiss_Prot_Score\tMITimpact2_EFIN_Swiss_Prot_Prediction\tMITimpact2_EFIN_HumDiv_Score\tMITimpact2_EFIN_HumDiv_Prediction\tMITimpact2_CADD_score\tMITimpact2_CADD_Phred_score\tMITimpact2_CADD_prediction\tMITimpact2_Carol_prediction\tMITimpact2_Carol_score\tMITimpact2_Condel_score\tMITimpact2_Condel_pred\tMITimpact2_COVEC_WMV\tMITimpact2_COVEC_WMV_prediction\tMITimpact2_PolyPhen2_score_transf\tMITimpact2_PolyPhen2_pred_transf\tMITimpact2_SIFT_score_transf\tMITimpact2_SIFT_pred_transf\tMITimpact2_MutAss_score_transf\tMITimpact2_MutAss_pred_transf\tMITimpact2_Perc_coevo_Sites\tMITimpact2_Mean_MI_score\tMITimpact2_COSMIC_ID\tMITimpact2_Tumor_site\tMITimpact2_Examined_samples\tMITimpact2_Mutation_frequency\tMITimpact2_US\tMITimpact2_Status\tMITimpact2_Associated_disease\tMITimpact2_Presence_in_TD\tMITimpact2_Class_predicted\tMITimpact2_Prob_N\tMITimpact2_Prob_P\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 62 63 64 65 66 B.MITimpact2_Gene_symbol,B.MITimpact2_OXPHOS_Complex,B.MITimpact2_Ensembl_Gene_ID,B.MITimpact2_Ensembl_Protein_ID,B.MITimpact2_Uniprot_Name,B.MITimpact2_Uniprot_ID,B.MITimpact2_NCBI_Gene_ID,B.MITimpact2_NCBI_Protein_ID,B.MITimpact2_Gene_pos,B.MITimpact2_AA_pos,B.MITimpact2_AA_sub,B.MITimpact2_Codon_sub,B.MITimpact2_dbSNP_ID,B.MITimpact2_PhyloP_46V,B.MITimpact2_PhastCons_46V,B.MITimpact2_PhyloP_100V,B.MITimpact2_PhastCons_100V,B.MITimpact2_SiteVar,B.MITimpact2_PolyPhen2_prediction,B.MITimpact2_PolyPhen2_score,B.MITimpact2_SIFT_prediction,B.MITimpact2_SIFT_score,B.MITimpact2_FatHmm_prediction,B.MITimpact2_FatHmm_score,B.MITimpact2_PROVEAN_prediction,B.MITimpact2_PROVEAN_score,B.MITimpact2_MutAss_prediction,B.MITimpact2_MutAss_score,B.MITimpact2_EFIN_Swiss_Prot_Score,B.MITimpact2_EFIN_Swiss_Prot_Prediction,B.MITimpact2_EFIN_HumDiv_Score,B.MITimpact2_EFIN_HumDiv_Prediction,B.MITimpact2_CADD_score,B.MITimpact2_CADD_Phred_score,B.MITimpact2_CADD_prediction,B.MITimpact2_Carol_prediction,B.MITimpact2_Carol_score,B.MITimpact2_Condel_score,B.MITimpact2_Condel_pred,B.MITimpact2_COVEC_WMV,B.MITimpact2_COVEC_WMV_prediction,B.MITimpact2_PolyPhen2_score_transf,B.MITimpact2_PolyPhen2_pred_transf,B.MITimpact2_SIFT_score_transf,B.MITimpact2_SIFT_pred_transf,B.MITimpact2_MutAss_score_transf,B.MITimpact2_MutAss_pred_transf,B.MITimpact2_Perc_coevo_Sites,B.MITimpact2_Mean_MI_score,B.MITimpact2_COSMIC_ID,B.MITimpact2_Tumor_site,B.MITimpact2_Examined_samples,B.MITimpact2_Mutation_frequency,B.MITimpact2_US,B.MITimpact2_Status,B.MITimpact2_Associated_disease,B.MITimpact2_Presence_in_TD,B.MITimpact2_Class_predicted,B.MITimpact2_Prob_N,B.MITimpact2_Prob_P
    fi

    #mitimpact24
    if [ $mitimpact24 == "Y" ]
    then
        echo -e "\nmitimpact24 Annotation"
        $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver -dbtype mitimpact24 annovarinput $humandb 2>&1

        #annovarout="annovarinput.${buildver}_mitimpact24_dropped"
        awk 'BEGIN{FS="\t"
                   OFS="\t"
                   }{
                   gsub(",","\t",$24)
                   print $0
                   }END{}' annovarinput.${buildver}_mitimpact24_dropped > $annovarout

        sed -i '1i\db\tMITimpact24_Gene_symbol\tMITimpact24_OXPHOS_Complex\tMITimpact24_Ensembl_Gene_ID\tMITimpact24_Ensembl_Protein_ID\tMITimpact24_Uniprot_Name\tMITimpact24_Uniprot_ID\tMITimpact24_NCBI_Gene_ID\tMITimpact24_NCBI_Protein_ID\tMITimpact24_Gene_pos\tMITimpact24_AA_pos\tMITimpact24_AA_sub\tMITimpact24_Codon_sub\tMITimpact24_dbSNP_ID\tMITimpact24_PhyloP_46V\tMITimpact24_PhastCons_46V\tMITimpact24_PhyloP_100V\tMITimpact24_PhastCons_100V\tMITimpact24_SiteVar\tMITimpact24_PolyPhen24_prediction\tMITimpact24_PolyPhen24_score\tMITimpact24_SIFT_prediction\tMITimpact24_SIFT_score\tMITimpact24_FatHmm_prediction\tMITimpact24_FatHmm_score\tMITimpact24_PROVEAN_prediction\tMITimpact24_PROVEAN_score\tMITimpact24_MutAss_prediction\tMITimpact24_MutAss_score\tMITimpact24_EFIN_Swiss_Prot_Score\tMITimpact24_EFIN_Swiss_Prot_Prediction\tMITimpact24_EFIN_HumDiv_Score\tMITimpact24_EFIN_HumDiv_Prediction\tMITimpact24_CADD_score\tMITimpact24_CADD_Phred_score\tMITimpact24_CADD_prediction\tMITimpact24_Carol_prediction\tMITimpact24_Carol_score\tMITimpact24_Condel_score\tMITimpact24_Condel_pred\tMITimpact24_COVEC_WMV\tMITimpact24_COVEC_WMV_prediction\tMITimpact24_PolyPhen24_score_transf\tMITimpact24_PolyPhen24_pred_transf\tMITimpact24_SIFT_score_transf\tMITimpact24_SIFT_pred_transf\tMITimpact24_MutAss_score_transf\tMITimpact24_MutAss_pred_transf\tMITimpact24_Perc_coevo_Sites\tMITimpact24_Mean_MI_score\tMITimpact24_COSMIC_ID\tMITimpact24_Tumor_site\tMITimpact24_Examined_samples\tMITimpact24_Mutation_frequency\tMITimpact24_US\tMITimpact24_Status\tMITimpact24_Associated_disease\tMITimpact24_Presence_in_TD\tMITimpact24_Class_predicted\tMITimpact24_Prob_N\tMITimpact24_Prob_P\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 62 63 64 65 66 B.MITimpact24_Gene_symbol,B.MITimpact24_OXPHOS_Complex,B.MITimpact24_Ensembl_Gene_ID,B.MITimpact24_Ensembl_Protein_ID,B.MITimpact24_Uniprot_Name,B.MITimpact24_Uniprot_ID,B.MITimpact24_NCBI_Gene_ID,B.MITimpact24_NCBI_Protein_ID,B.MITimpact24_Gene_pos,B.MITimpact24_AA_pos,B.MITimpact24_AA_sub,B.MITimpact24_Codon_sub,B.MITimpact24_dbSNP_ID,B.MITimpact24_PhyloP_46V,B.MITimpact24_PhastCons_46V,B.MITimpact24_PhyloP_100V,B.MITimpact24_PhastCons_100V,B.MITimpact24_SiteVar,B.MITimpact24_PolyPhen24_prediction,B.MITimpact24_PolyPhen24_score,B.MITimpact24_SIFT_prediction,B.MITimpact24_SIFT_score,B.MITimpact24_FatHmm_prediction,B.MITimpact24_FatHmm_score,B.MITimpact24_PROVEAN_prediction,B.MITimpact24_PROVEAN_score,B.MITimpact24_MutAss_prediction,B.MITimpact24_MutAss_score,B.MITimpact24_EFIN_Swiss_Prot_Score,B.MITimpact24_EFIN_Swiss_Prot_Prediction,B.MITimpact24_EFIN_HumDiv_Score,B.MITimpact24_EFIN_HumDiv_Prediction,B.MITimpact24_CADD_score,B.MITimpact24_CADD_Phred_score,B.MITimpact24_CADD_prediction,B.MITimpact24_Carol_prediction,B.MITimpact24_Carol_score,B.MITimpact24_Condel_score,B.MITimpact24_Condel_pred,B.MITimpact24_COVEC_WMV,B.MITimpact24_COVEC_WMV_prediction,B.MITimpact24_PolyPhen24_score_transf,B.MITimpact24_PolyPhen24_pred_transf,B.MITimpact24_SIFT_score_transf,B.MITimpact24_SIFT_pred_transf,B.MITimpact24_MutAss_score_transf,B.MITimpact24_MutAss_pred_transf,B.MITimpact24_Perc_coevo_Sites,B.MITimpact24_Mean_MI_score,B.MITimpact24_COSMIC_ID,B.MITimpact24_Tumor_site,B.MITimpact24_Examined_samples,B.MITimpact24_Mutation_frequency,B.MITimpact24_US,B.MITimpact24_Status,B.MITimpact24_Associated_disease,B.MITimpact24_Presence_in_TD,B.MITimpact24_Class_predicted,B.MITimpact24_Prob_N,B.MITimpact24_Prob_P
    fi


    #GoNL database
    if [ $gonl == "Y" ]
    then

        if [ $buildver == "hg19" ]
            then
        echo -e "\nGoNL Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver --otherinfo -dbtype generic -genericdbfile ${buildver}_gonl.txt annovarinput $humandb 2>&1

            ls
        annovarout=annovarinput.${buildver}_generic_dropped

        head $annovarout

        sed -i '1i\db\tGoNL\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.GoNL

            fi

    fi

    #SPIDEX database
    if [ $spidex == "Y" ]
    then

        if [ $buildver == "hg19" ]
        then
            echo -e "\nSPIDEX Annotation"
            $scriptsdir/annotate_variation.pl --filter --buildver $buildver --otherinfo -dbtype spidex annovarinput $humandb 2>&1

            # split allelefrequency column into several columns, one per population
            awk 'BEGIN{FS="\t"
                   OFS="\t"
                   }{
                   gsub(",","\t",$2)
                   print $0
            }END{}' annovarinput.${buildver}_spidex_dropped > $annovarout

            #annovarout=annovarinput.${buildver}_spidex_dropped
            #head $annovarout

            sed -i '1i\db\tSPIDEX_dpsi_max_tissue\tSPIDEX_dpsi_zscore\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
            joinresults originalfile $annovarout 4 5 6 7 8 B.SPIDEX_dpsi_max_tissue,B.SPIDEX_dpsi_zscore
        fi

    fi

    #GERP++
    if [ $gerp == "Y" ]
    then
        echo -e "\nGERP++ Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype gerp++gt2 annovarinput $humandb 2>&1

        annovarout="annovarinput.${buildver}_gerp++gt2_dropped"
        sed -i '1i\db\tGERP++\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.GERP++
    fi


    #COSMIC
    if [[ $cosmic61 == "Y" && $buildver == "hg19" ]]
    then
        echo -e "\nCOSMIC61 Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic61 annovarinput $humandb 2>&1

        annovarout="annovarinput.${buildver}_cosmic61_dropped"
        sed -i '1i\db\tCOSMIC61\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC61

    fi

    if [[ $cosmic63 == "Y" && $buildver == "hg19" ]]
    then
        echo -e "\nCOSMIC63 Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic63 annovarinput $humandb 2>&1

        annovarout="annovarinput.${buildver}_cosmic63_dropped"
        sed -i '1i\db\tCOSMIC63\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC63

    fi

    if [[ $cosmic64 == "Y" && $buildver == "hg19" ]]
    then
        echo -e "\nCOSMIC64 Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic64 annovarinput $humandb 2>&1

        annovarout="annovarinput.${buildver}_cosmic64_dropped"
        sed -i '1i\db\tCOSMIC64\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC64

    fi

    if [[ $cosmic65 == "Y" && $buildver == "hg19" ]]
    then
        echo -e "\nCOSMIC65 Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic65 annovarinput $humandb 2>&1

        annovarout="annovarinput.${buildver}_cosmic65_dropped"
        sed -i '1i\db\tCOSMIC65\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC65

    fi

    if [[ $cosmic67 == "Y" && $buildver == "hg19" ]]
    then
        echo -e "\nCOSMIC67 Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic67 annovarinput $humandb 2>&1

        annovarout="annovarinput.${buildver}_cosmic67_dropped"
        sed -i '1i\db\tCOSMIC67\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC67

    fi

    if [[ $cosmic68 == "Y" && $buildver == "hg19" ]]
    then
        echo -e "\nCOSMIC68 Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic68 annovarinput $humandb 2>&1

        annovarout="annovarinput.${buildver}_cosmic68_dropped"
        sed -i '1i\db\tCOSMIC68\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC68

    fi

    if [[ $cosmic70 == "Y" && $buildver == "hg19" ]]
    then
        echo -e "\nCOSMIC70 Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic70 annovarinput $humandb 2>&1

        annovarout="annovarinput.${buildver}_cosmic70_dropped"
        sed -i '1i\db\tCOSMIC70\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC70

    fi

    if [[ $clinvar == "Y" && $buildver == "hg19" ]]
    then
        echo -e "\nCLINVAR Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype clinvar_20140211 annovarinput $humandb 2>&1

        annovarout="annovarinput.${buildver}_clinvar_20140211_dropped"
        sed -i '1i\db\tCLINVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.CLINVAR

    fi

    if [[ $nci60 == "Y" && $buildver == "hg19" ]]
    then
        echo -e "\nNCI60 Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype nci60 annovarinput $humandb 2>&1

        annovarout="annovarinput.${buildver}_nci60_dropped"
        sed -i '1i\db\tNCI60\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.NCI60

    fi

    #cg46
    if [[ $cg46 == "Y"  ]]
    then
        echo -e "\nCG 46 genomes Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg46 annovarinput $humandb 2>&1

        annovarout="annovarinput.${buildver}_cg46_dropped"
        sed -i '1i\db\t'${cg46_colheader}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.${cg46_colheader}

    fi


    #cg69
    if [[ $cg69 == "Y"  ]]
    then
        echo -e "\nCG 69 genomes Annotation"
        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg69 annovarinput $humandb 2>&1

        annovarout="annovarinput.${buildver}_cg69_dropped"
        sed -i '1i\db\t'${cg69_colheader}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
        joinresults originalfile $annovarout 3 4 5 6 7 B.${cg69_colheader}

    fi


    if [ $convertcoords == "Y" ]
    then
        echo "converting back coordinates"
        awk 'BEGIN{
                FS="\t";
                OFS="\t";
            }{
                if (FNR==1)
                    print $0
                if(FNR>1) {
                    $"'"${chrcol}"'" = "chr"$"'"${chrcol}"'"
                    if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" -= 1 };
                    if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "" };
                    if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" -=1; $"'"${obscol}"'" = "" };
                    if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" -= 1 };
                    print $0
                }
            }
            END{
            }' originalfile > originalfile_coords
    else
        mv originalfile originalfile_coords
    fi

    #restore "chr" prefix?

    #move to outputfile
    if [ ! -s annovarinput.invalid_input ]
    then
        echo "Congrats, your input file contained no invalid lines!" > annovarinput.invalid_input
    fi

    cp originalfile_coords $outfile_all
    cp annovarinput.invalid_input $outfile_invalid 2>&1

    sed -i 's/chrchr/chr/g' $outfile_all
    sed -i 's/chrchr/chr/g' $outfile_invalid

fi #if $dorunannovar
author	saskia-hiltemann
date	Fri, 04 Mar 2016 11:56:10 -0500
parents	d6af2a78617f
children