annovar: tools/annovar/annovar.sh comparison

comparison tools/annovar/annovar.sh @ 5:4600be69b96f draft

Added databases 1000g2015aug, SPIDEX, avsnp138, avsnp142, exac03

author	saskia-hiltemann
date	Thu, 01 Oct 2015 04:24:45 -0400
parents	ff5325029a8e
children	69e2067a120d

comparison

equal deleted inserted replaced

-:e423536a0780
+:4600be69b96f
 #	   PARSE PARAMETERS
 #
 #################################
-set -- `getopt -n$0 -u -a --longoptions="inputfile: buildver: humandb: varfile: VCF: chrcol: startcol: endcol: refcol: obscol: vartypecol: convertcoords: geneanno: hgvs: verdbsnp: tfbs: mce: cytoband: segdup: dgv: gwas: ver1000g: cg46: cg69: impactscores: newimpactscores: otherinfo: esp: gerp: cosmic61: cosmic63: cosmic64: cosmic65: cosmic67: cosmic68: clinvar: nci60: outall: outfilt: outinvalid: scriptsdir: dorunannovar: dofilter: filt_dbsnp: filt1000GALL: filt1000GAFR: filt1000GAMR: filt1000GASN: filt1000GEUR: filtESP6500ALL: filtESP6500EA: filtESP6500AA: filtcg46: filtcg69: dummy:" "h:" "$@"` || usage
+set -- `getopt -n$0 -u -a --longoptions="inputfile: buildver: humandb: varfile: VCF: chrcol: startcol: endcol: refcol: obscol: vartypecol: convertcoords: geneanno: hgvs: verdbsnp: tfbs: mce: cytoband: segdup: dgv: gwas: ver1000g: cg46: cg69: impactscores: newimpactscores: otherinfo: esp: exac03: spidex: gonl: gerp: cosmic61: cosmic63: cosmic64: cosmic65: cosmic67: cosmic68: clinvar: nci60: outall: outfilt: outinvalid: scriptsdir: dorunannovar: dofilter: filt_dbsnp: filt1000GALL: filt1000GAFR: filt1000GAMR: filt1000GASN: filt1000GEUR: filtESP6500ALL: filtESP6500EA: filtESP6500AA: filtcg46: filtcg69: dummy:" "h:" "$@"` || usage
 [ $# -eq 0 ] && usage
 while [ $# -gt 0 ]
 		--impactscores)      		impactscores=$2;shift;; # Y or N
 		--newimpactscores)      	newimpactscores=$2;shift;; # Y or N
 		--otherinfo)				otherinfo=$2;shift;;
 		--scriptsdir)	      		scriptsdirtmp=$2;shift;; # Y or N
 		--esp)      				esp=$2;shift;; 	# Y or N
+		--exac03)                   exac03=$2;shift;;
+		--gonl)                     gonl=$2;shift;;
+		--spidex)                   spidex=$2;shift;;
 		--gerp)      				gerp=$2;shift;; 	# Y or N
 		--cosmic61)					cosmic61=$2;shift;;  # Y or N
 		--cosmic63)					cosmic63=$2;shift;;  # Y or N
 		--cosmic64)					cosmic64=$2;shift;;  # Y or N
 		--cosmic65)					cosmic65=$2;shift;;  # Y or N
 		awk 'BEGIN{
 				FS="\t";
 				OFS="\t";
 			}{
 				if(FNR>1) {
+gsub(/chr/,"",$"'"${chrcol}"'")
 					if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" += 1 };
 					if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "-" };
 					if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" +=1; $"'"${obscol}"'" = "-" };
 					if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" += 1 };
 			}
 			END{
 			}' $infile > annovarinput
 			#remove any "chr" prefixes
-			sed -i 's/chr//g' annovarinput
+			#sed -i '2,$s/chr//g' annovarinput
 			awk 'BEGIN{
 				FS="\t";
 				OFS="\t";
 			}{
 				if(FNR>=1) {
+				        gsub(/chr/,"",$"'"${chrcol}"'")
 					if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" += 1 };
 					if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "-" };
 					if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" +=1; $"'"${obscol}"'" = "-" };
 					if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" += 1 };
 			}
 			END{
 			}' $infile > originalfile
 			#remove any "chr" prefixes
-			sed -i 's/chr//g' originalfile
+			#sed -i '2,$s/chr//g' originalfile
 			sed -i 's/omosome/chromosome/g' originalfile
 	else #only rearrange columns if already 1-based coordinates
 		echo "rearranging columns "
 		awk 'BEGIN{
 				FS="\t";
 				OFS="\t";
 			}{
 				if(FNR>1) {
-					printf("%s\t%s\t%s\t%s\t%s\n",$"'"${chrcol}"'",$"'"${startcol}"'",$"'"${endcol}"'",$"'"${refcol}"'",$"'"${obscol}"'");
+printf("%s\t%s\t%s\t%s\t%s\n",$"'"${chrcol}"'",$"'"${startcol}"'",$"'"${endcol}"'",$"'"${refcol}"'",$"'"${obscol}"'");
 				}
 			}
 			END{
 			}' $infile > annovarinput
 			#remove any "chr" prefixes
-			sed -i 's/chr//g' annovarinput
+			sed -i '2,$s/chr//g' annovarinput
-			sed 's/chr//g' $infile > originalfile
+			sed '2,$s/chr//g' $infile > originalfile
 			sed -i 's/omosome/chromosome/g' originalfile
 	fi
 	echo "...finished conversion"
 			break
 		fi
 		echo -e "\ndbSNP region Annotation, version: $version"
 		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ${version} annovarinput $humandb 2>&1
+		columnname=${version}
+		if [[ $columnname == snp* ]]
+		then
+			columnname="db${version}"
+		fi
 		annovarout=annovarinput.${buildver}_${version}_dropped
-		sed -i '1i\db\tdb'${version}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
+		sed -i '1i\db\t'${columnname}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
-		joinresults originalfile $annovarout 3 4 5 6 7 B.db${version}
+		joinresults originalfile $annovarout 3 4 5 6 7 B.${columnname}
 	done
 			g1000_colheader_ALL="${version}_ALL"
 			g1000_colheader_AFR="${version}_AFR"
 			g1000_colheader_AMR="${version}_AMR"
 			g1000_colheader_ASN="${version}_ASN"
 			g1000_colheader_EUR="${version}_EUR"
+			g1000_colheader_EAS="${version}_EAS"
+			g1000_colheader_SAS="${version}_SAS"
 			g1000_colheader_CEU="${version}_CEU"
 			g1000_colheader_YRI="${version}_YRI"
 			g1000_colheader_JPTCHB="${version}_JPTCHB"
 			doALL="N"
 			doAMR="N"
 			doAFR="N"
 			doASN="N"
+			doEAS="N"
+			doSAS="N"
 			doEUR="N"
 			doCEU="N"
 			doYRI="N"
 			doJPTCHB="N"
 				then
 					doAMR="Y"
 					doAFR="Y"
 					doASN="Y"
 					doEUR="Y"
 				fi
+			elif [ $version == "1000g2014oct" ]
+			then
+				fileID="2014_10"
+				doALL="Y"
+				doAMR="Y"
+				doAFR="Y"
+				doEUR="Y"
+				doEAS="Y"
+				if [ $buildver == "hg19" ]
+				then
+					doSAS="Y"
+				fi
+			elif [[ $version == "1000g2015aug" && $buildver == "hg19" ]]
+			then
+				fileID="2015_08"
+				doALL="Y"
+				doAMR="Y"
+				doAFR="Y"
+				doEUR="Y"
+				doEAS="Y"
+				doSAS="Y"
 			elif [[ $version == "1000g2012feb" && $buildver == "hg19" ]]
 			then
 				fileID="2012_02"
 				doALL="Y"
 			elif [[ $version == "1000g2010nov" && $buildver == "hg19" ]]
 				annovarout=annovarinput.${buildver}_ASN.sites.${fileID}_dropped
 				sed -i '1i\db\t'$g1000_colheader_ASN'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
 				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_ASN
 			fi
+			# EAS
+			if [ $doEAS == "Y"  ]
+			then
+				echo -e "\n1000Genomes EAS"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_eas" annovarinput $humandb 2>&1
+				annovarout=annovarinput.${buildver}_EAS.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_EAS'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_EAS
+			fi
+			# SAS
+			if [ $doSAS == "Y"  ]
+			then
+				echo -e "\n1000Genomes SAS"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_sas" annovarinput $humandb 2>&1
+				annovarout=annovarinput.${buildver}_SAS.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_SAS'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_SAS
+			fi
 			# EUR
 			if [ $doEUR == "Y"  ]
 			then
 				echo -e "\n1000Genomes EUR"
 				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_eur" annovarinput $humandb 2>&1
 	then
 		echo -e "\nLJB2 pp2hvar Annotation"
 		$scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hvar annovarinput $humandb 2>&1
 		annovarout=annovarinput.${buildver}_ljb2_pp2hvar_dropped
+		head $annovarout
 		sed -i '1i\db\tLJB2_PolyPhen2_HVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
 		joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HVAR
 	fi
 	if [ $ljb2_lrt == "Y" ]
 			fi
 		done
 	fi
+	#ExAC-03 database
+	if [ $exac03 == "Y" ]
+	then
+		echo -e "\nExAC03 Annotation"
+		$scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver --otherinfo -dbtype exac03 annovarinput $humandb 2>&1
+		#annovarout=annovarinput.${buildver}_exac03_dropped
+		# split allelefrequency column into several columns, one per population
+		awk 'BEGIN{FS="\t"
+		           OFS="\t"
+		           }{
+		           gsub(",","\t",$2)
+		           print $0
+		           }END{}' annovarinput.${buildver}_exac03_dropped > $annovarout
+		sed -i '1i\db\tExAC_Freq\tExAC_AFR\tExAC_AMR\tExAC_EAS\tExAC_FIN\tExAC_NFE\tExAC_OTH\tExAC_SAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
+		joinresults originalfile $annovarout 10 11 12 13 14 B.ExAC_Freq,B.ExAC_AFR,B.ExAC_AMR,B.ExAC_EAS,B.ExAC_FIN,B.ExAC_NFE,B.ExAC_OTH,B.ExAC_SAS
+	fi
+#GoNL database
+	if [ $gonl == "Y" ]
+	then
+if [ $buildver == "hg19" ]
+then
+		echo -e "\nGoNL Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver --otherinfo -dbtype generic -genericdbfile ${buildver}_gonl.txt annovarinput $humandb 2>&1
+	        ls
+		annovarout=annovarinput.${buildver}_generic_dropped
+		head $annovarout
+		sed -i '1i\db\tGoNL\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
+		joinresults originalfile $annovarout 3 4 5 6 7 B.GoNL
+fi
+	fi
+	#SPIDEX database
+	if [ $spidex == "Y" ]
+	then
+if [ $buildver == "hg19" ]
+then
+			echo -e "\nSPIDEX Annotation"
+			$scriptsdir/annotate_variation.pl --filter --buildver $buildver --otherinfo -dbtype spidex annovarinput $humandb 2>&1
+			# split allelefrequency column into several columns, one per population
+		    awk 'BEGIN{FS="\t"
+		           OFS="\t"
+		           }{
+		           gsub(",","\t",$2)
+		           print $0
+		    }END{}' annovarinput.${buildver}_spidex_dropped > $annovarout
+			#annovarout=annovarinput.${buildver}_spidex_dropped
+		    #head $annovarout
+			sed -i '1i\db\tSPIDEX_dpsi_max_tissue\tSPIDEX_dpsi_zscore\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
+			joinresults originalfile $annovarout 4 5 6 7 8 B.SPIDEX_dpsi_max_tissue,B.SPIDEX_dpsi_zscore
+fi
+	fi
 	#GERP++
 	if [ $gerp == "Y" ]
 	then
 		echo -e "\nGERP++ Annotation"
 		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype gerp++gt2 annovarinput $humandb 2>&1
 		sed -i '1i\db\tCOSMIC68\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
 		joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC68
 	fi
+	if [[ $cosmic70 == "Y" && $buildver == "hg19" ]]
+	then
+		echo -e "\nCOSMIC70 Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic70 annovarinput $humandb 2>&1
+		annovarout="annovarinput.${buildver}_cosmic70_dropped"
+		sed -i '1i\db\tCOSMIC70\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
+		joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC70
+	fi
 	if [[ $clinvar == "Y" && $buildver == "hg19" ]]
 	then
 		echo -e "\nCLINVAR Annotation"
 		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype clinvar_20140211 annovarinput $humandb 2>&1
 		echo "Congrats, your input file contained no invalid lines!" > annovarinput.invalid_input
 	fi
 	cp originalfile_coords $outfile_all
 	cp annovarinput.invalid_input $outfile_invalid 2>&1
+	sed -i 's/chrchr/chr/g' $outfile_all
+	sed -i 's/chrchr/chr/g' $outfile_invalid
 fi #if $dorunannovar

Mercurial > repos > saskia-hiltemann > annovar

comparison tools/annovar/annovar.sh @ 5:4600be69b96f draft