Mercurial > repos > saskia-hiltemann > annovar
diff tools/annovar/annovar.sh @ 2:565c0e690238 draft
Added support for LJB2, COSMIC67, CLINVAR and NCI60.
Fixed dgv annotation to use new UCSC table dgvMerged instead.
author | saskia-hiltemann |
---|---|
date | Mon, 18 Nov 2013 10:32:33 -0500 |
parents | d3a72e55deca |
children | ff5325029a8e |
line wrap: on
line diff
--- a/tools/annovar/annovar.sh Tue Nov 05 07:16:32 2013 -0500 +++ b/tools/annovar/annovar.sh Mon Nov 18 10:32:33 2013 -0500 @@ -1,7 +1,12 @@ #!/bin/bash test="N" +dofilter="N" +######################### +# DEFINE SOME +# FUNCTIONS +######################### function usage(){ echo "usage: $0 todo" @@ -167,7 +172,14 @@ -set -- `getopt -n$0 -u -a --longoptions="inputfile: buildver: humandb: varfile: VCF: chrcol: startcol: endcol: refcol: obscol: vartypecol: convertcoords: geneanno: verdbsnp: tfbs: mce: cytoband: segdup: dgv: gwas: ver1000g: cg46: cg69: impactscores: esp: gerp: cosmic61: cosmic63: cosmic64: cosmic65: outall: outfilt: outinvalid: scriptsdir: dorunannovar: dofilter: filt_dbsnp: filt1000GALL: filt1000GAFR: filt1000GAMR: filt1000GASN: filt1000GEUR: filtESP6500ALL: filtESP6500EA: filtESP6500AA: filtcg46: filtcg69: dummy:" "h:" "$@"` || usage +################################# +# +# PARSE PARAMETERS +# +################################# + + +set -- `getopt -n$0 -u -a --longoptions="inputfile: buildver: humandb: varfile: VCF: chrcol: startcol: endcol: refcol: obscol: vartypecol: convertcoords: geneanno: hgvs: verdbsnp: tfbs: mce: cytoband: segdup: dgv: gwas: ver1000g: cg46: cg69: impactscores: newimpactscores: otherinfo: esp: gerp: cosmic61: cosmic63: cosmic64: cosmic65: cosmic67: clinvar: nci60: outall: outfilt: outinvalid: scriptsdir: dorunannovar: dofilter: filt_dbsnp: filt1000GALL: filt1000GAFR: filt1000GAMR: filt1000GASN: filt1000GEUR: filtESP6500ALL: filtESP6500EA: filtESP6500AA: filtcg46: filtcg69: dummy:" "h:" "$@"` || usage [ $# -eq 0 ] && usage @@ -176,8 +188,8 @@ do case "$1" in --inputfile) infile=$2;shift;; # inputfile - --buildver) buildver=$2;shift;; # hg18 or hg19 - --humandb) humandb=$2;shift;; # location of humandb database + --buildver) buildvertmp=$2;shift;; # hg18 or hg19 + --humandb) humandbtmp=$2;shift;; # location of humandb database --varfile) varfile=$2;shift;; # Y or N --VCF) vcf=$2;shift;; #Y or N --chrcol) chrcol=$2;shift;; # which column has chr @@ -188,6 +200,7 @@ --vartypecol) vartypecol=$2;shift;; # which column has vartype --convertcoords) convertcoords=$2;shift;; # Y or N convert coordinate from CG to 1-based? --geneanno) geneanno=$2;shift;; # comma-separated list of strings refSeq, knowngene, ensgene + --hgvs) hgvs=$2;shift;; --verdbsnp) verdbsnp=$2;shift;; #comma-separated list of dbsnp version to annotate with (e.g. "132,135NonFlagged,137")" --tfbs) tfbs=$2;shift;; # Y or N --mce) mce=$2;shift;; # Y or N @@ -199,13 +212,18 @@ --cg46) cg46=$2;shift;; --cg69) cg69=$2;shift;; --impactscores) impactscores=$2;shift;; # Y or N - --scriptsdir) scriptsdir=$2;shift;; # Y or N + --newimpactscores) newimpactscores=$2;shift;; # Y or N + --otherinfo) otherinfo=$2;shift;; + --scriptsdir) scriptsdirtmp=$2;shift;; # Y or N --esp) esp=$2;shift;; # Y or N --gerp) gerp=$2;shift;; # Y or N --cosmic61) cosmic61=$2;shift;; # Y or N --cosmic63) cosmic63=$2;shift;; # Y or N --cosmic64) cosmic64=$2;shift;; # Y or N --cosmic65) cosmic65=$2;shift;; # Y or N + --cosmic67) cosmic67=$2;shift;; # Y or N + --nci60) nci60=$2;shift;; # Y or N + --clinvar) clinvar=$2;shift;; # Y or N --filt_dbsnp) filt_dbsnp=$2;shift;; --filt1000GALL) threshold_1000g_ALL=$2;shift;; #threshold value --filt1000GAFR) threshold_1000g_AFR=$2;shift;; #threshold value @@ -220,8 +238,7 @@ --outall) outfile_all=$2;shift;; # file --outfilt) outfile_filt=$2;shift;; # file --outinvalid) outfile_invalid=$2;shift;; #file - --dorunannovar) dorunannovar=$2;shift;; #Y or N - --dofilter) dofilter=$2;shift;; #Y or N + --dorunannovar) dorunannovar=$2;shift;; #Y or N -h) shift;; --) shift;break;; -*) usage;; @@ -230,6 +247,11 @@ shift done +#sometimes galaxy screws up these variables after updates, if comma-separated list, use only what is before first comma +humandb=${humandbtmp%,*} +buildver=${buildvertmp%,*} +scriptsdir=${scriptsdirtmp%,*} + if [ $test == "Y" ] then @@ -249,6 +271,7 @@ echo "cg46: ${cg46}" echo "cg69: ${cg69}" echo "impactscores: $impactscores" + echo "impactscores: $newimpactscores" echo "esp: $esp" echo "gerp: $gerp" echo "cosmic: $cosmic" @@ -277,10 +300,18 @@ fi + +############################################ +# +# Annotate Variants +# +############################################ + +#parse geneanno param refgene="N" knowngene="N" ensgene="N" -#parse geneanno param + if [[ $geneanno =~ "refSeq" ]] then refgene="Y" @@ -293,7 +324,10 @@ then ensgene="Y" fi - +if [ $hgvs == "N" ] +then + hgvs="" +fi #parse verdbsnp/1000g/esp strings dbsnpstr=${verdbsnp//,/ } @@ -314,7 +348,8 @@ polyphen2="N" phylop="N" ljbsift="N" -#parse impactscores param + +#parse old impactscores param (obsolete) if [[ $impactscores =~ "mutationtaster" ]] then mutationtaster="Y" @@ -331,6 +366,10 @@ then ljbsift="Y" fi +if [[ $impactscores =~ "ljb2sift" ]] +then + ljb2sift="Y" +fi if [[ $impactscores =~ "pp2" ]] then polyphen2="Y" @@ -347,6 +386,65 @@ #ljb refers to Liu, Jian, Boerwinkle paper in Human Mutation with pubmed ID 21520341. Cite this paper if you use the scores +ljb2_sift="N" +ljb2_pp2hdiv="N" +ljb2_pp2hvar="N" +ljb2_lrt="N" +ljb2_mt="N" +ljb2_ma="N" +ljb2_fathmm="N" +ljb2_gerp="N" +ljb2_phylop="N" +ljb2_siphy="N" + +# parse ljb2 newimpactscores param +# ljb2_sift, ljb2_pp2hdiv, ljb2_pp2hvar, ljb2_lrt, ljb2_mt, ljb2_ma, ljb2_fathmm, ljb2_gerp++, ljb2_phylop, ljb2_siphy +if [[ $newimpactscores =~ "ljb2_sift" ]] +then + ljb2_sift="Y" +fi +if [[ $newimpactscores =~ "ljb2_pp2hdiv" ]] +then + ljb2_pp2hdiv="Y" +fi +if [[ $newimpactscores =~ "ljb2_pp2hvar" ]] +then + ljb2_pp2hvar="Y" +fi +if [[ $newimpactscores =~ "ljb2_lrt" ]] +then + ljb2_lrt="Y" +fi +if [[ $newimpactscores =~ "ljb2_mt" ]] +then + ljb2_mt="Y" +fi +if [[ $newimpactscores =~ "ljb2_ma" ]] +then + ljb2_ma="Y" +fi +if [[ $newimpactscores =~ "ljb2_fathmm" ]] +then + ljb2_fathmm="Y" +fi +if [[ $newimpactscores =~ "ljb2_gerp" ]] +then + ljb2_gerp="Y" +fi +if [[ $newimpactscores =~ "ljb2_phylop" ]] +then + ljb2_phylop="Y" +fi +if [[ $newimpactscores =~ "ljb2_siphy" ]] +then + ljb2_siphy="Y" +fi + +if [ $otherinfo == "N" ] +then + otherinfo="" +fi + #column header names we will be adding # ESP 6500 @@ -498,7 +596,7 @@ if [ $refgene == "Y" ] then echo -e "\nrefSeq gene" - $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype gene annovarinput $humandb 2>&1 + $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype gene ${hgvs} annovarinput $humandb 2>&1 annovarout=annovarinput.variant_function sed -i '1i\RefSeq_Func\tRefSeq_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout @@ -612,9 +710,9 @@ if [ $dgv == "Y" ] then echo -e "\nDGV Annotation" - $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype dgv annovarinput $humandb 2>&1 + $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype dgvMerged annovarinput $humandb 2>&1 - annovarout=annovarinput.${buildver}_dgv + annovarout=annovarinput.${buildver}_dgvMerged sed -i '1i\db\tDGV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout joinresults originalfile $annovarout 3 4 5 6 7 B.DGV fi @@ -807,6 +905,115 @@ fi + + + #### IMPACT SCORE ANNOTATIONS + + + if [ $ljb2_sift == "Y" ] + then + echo -e "\nLJB2 SIFT Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_sift annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_sift_dropped + sed -i '1i\db\tLJB2_SIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_SIFT + fi + + if [ $ljb2_pp2hdiv == "Y" ] + then + echo -e "\nLJB2 pp2hdiv Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hdiv annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_pp2hdiv_dropped + sed -i '1i\db\tLJB2_PolyPhen2_HDIV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HDIV + fi + + if [ $ljb2_pp2hvar == "Y" ] + then + echo -e "\nLJB2 pp2hvar Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hvar annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_pp2hvar_dropped + sed -i '1i\db\tLJB2_PolyPhen2_HVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HVAR + fi + + if [ $ljb2_lrt == "Y" ] + then + echo -e "\nLJB2 LRT Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_lrt annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_lrt_dropped + sed -i '1i\db\tLJB2_LRT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_LRT + fi + + if [ $ljb2_mt == "Y" ] + then + echo -e "\nLJB2 mutationtaster Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_mt annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_mt_dropped + sed -i '1i\db\tLJB2_MutationTaster\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_MutationTaster + fi + + if [ $ljb2_ma == "Y" ] + then + echo -e "\nLJB2 mutationassessor Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_ma annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_ma_dropped + sed -i '1i\db\tLJB2_MutationAssessor\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_MutationAssessor + fi + + if [ $ljb2_fathmm == "Y" ] + then + echo -e "\nLJB2 FATHMM Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_fathmm annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_fathmm_dropped + sed -i '1i\db\tLJB2_FATHMM\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_FATHMM + fi + + if [ $ljb2_gerp == "Y" ] + then + echo -e "\nLJB2 GERP++ Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_gerp++ annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_gerp++_dropped + sed -i '1i\db\tLJB2_GERP++\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_GERP++ + fi + + if [ $ljb2_phylop == "Y" ] + then + echo -e "\nLJB2 PhyloP Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_phylop annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_phylop_dropped + sed -i '1i\db\tLJB2_PhyloP\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PhyloP + fi + + if [ $ljb2_siphy == "Y" ] + then + echo -e "\nLJB2 SiPhy Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_siphy annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_siphy_dropped + sed -i '1i\db\tLJB2_SiPhy\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_SiPhy + fi + + + + ### OLD IMPACT SCORE ANNOTATIONS + # SIFT if [ $avsift == "Y" ] then @@ -822,7 +1029,7 @@ # SIFT2 if [ $ljbsift == "Y" ] then - echo -e "\nSIFT Annotation" + echo -e "\nLJB SIFT Annotation" $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_sift annovarinput $humandb 2>&1 annovarout=annovarinput.${buildver}_ljb_sift_dropped @@ -984,35 +1191,6 @@ fi - #ESP6500 - if [ $esp == "Y" ] - then - echo -e "\nESP Annotation OLD" - # ALL - $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_all annovarinput $humandb 2>&1 - - annovarout=annovarinput.${buildver}_esp6500si_all_dropped - sed -i '1i\db\t'$esp6500_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout - joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_ALL - - - # European American - $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_ea annovarinput $humandb 2>&1 - - annovarout=annovarinput.${buildver}_esp6500si_ea_dropped - sed -i '1i\db\t'$esp6500_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout - joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_EA - - # African Americans - $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_aa annovarinput $humandb 2>&1 - - annovarout=annovarinput.${buildver}_esp6500si_aa_dropped - sed -i '1i\db\t'$esp6500_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout - joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_AA - fi - - - #GERP++ if [ $gerp == "Y" ] then @@ -1070,6 +1248,39 @@ fi + if [[ $cosmic67 == "Y" && $buildver == "hg19" ]] + then + echo -e "\nCOSMIC67 Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic67 annovarinput $humandb 2>&1 + + annovarout="annovarinput.${buildver}_cosmic67_dropped" + sed -i '1i\db\tCOSMIC67\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC67 + + fi + + if [[ $clinvar == "Y" && $buildver == "hg19" ]] + then + echo -e "\nCLINVAR Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype clinvar_20131105 annovarinput $humandb 2>&1 + + annovarout="annovarinput.${buildver}_clinvar_20131105_dropped" + sed -i '1i\db\tCLINVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.CLINVAR + + fi + + if [[ $nci60 == "Y" && $buildver == "hg19" ]] + then + echo -e "\nNCI60 Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype nci60 annovarinput $humandb 2>&1 + + annovarout="annovarinput.${buildver}_nci60_dropped" + sed -i '1i\db\tNCI60\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.NCI60 + + fi + #cg46 if [[ $cg46 == "Y" ]] then @@ -1138,52 +1349,6 @@ -############################################ -# -# Filter Annotated Variants -# -############################################ - - -if [[ $dofilter == "Y" ]] -then - echo "starting filtering" - cp originalfile filteredfile - - ### do the filtering - # usage: runfilter <column name> <threshold> (-1=do not filter, 0=filter any value) - - #1000genomes - runfilter filteredfile ${g1000_colheader_ALL} ${threshold_1000g_ALL} - runfilter filteredfile ${g1000_colheader_AFR} ${threshold_1000g_AFR} - runfilter filteredfile ${g1000_colheader_AMR} ${threshold_1000g_AMR} - runfilter filteredfile ${g1000_colheader_ASN} ${threshold_1000g_ASN} - runfilter filteredfile ${g1000_colheader_EUR} ${threshold_1000g_EUR} - - #esp - runfilter filteredfile ${esp6500_colheader_ALL} ${threshold_ESP6500_ALL} - runfilter filteredfile ${esp6500_colheader_EA} ${threshold_ESP6500_EA} - runfilter filteredfile ${esp6500_colheader_AA} ${threshold_ESP6500_AA} - - #dbsnp - for version in $filt_dbsnpstr - do - if [ $version == "None" ] - then - break - fi - runfilter filteredfile "db$version" "text" #-42 will filter any non-empty string in that field - - done - - #complete genomics - runfilter filteredfile ${cg46_colheader} ${threshold_cg46} - runfilter filteredfile ${cg69_colheader} ${threshold_cg69} - - #move filtered output file to galaxy output file - cp filteredfile $outfile_filt - -fi @@ -1201,3 +1366,4 @@ +