diff DC_Genotyper.pl @ 16:7e4a9ee69f0b draft

Uploaded
author geert-vandeweyer
date Mon, 29 Sep 2014 02:03:50 -0400
parents 8cc26598eeac
children 3ba482a2dd0e
line wrap: on
line diff
--- a/DC_Genotyper.pl	Mon Sep 29 02:03:40 2014 -0400
+++ b/DC_Genotyper.pl	Mon Sep 29 02:03:50 2014 -0400
@@ -170,27 +170,38 @@
 ##########################################################
 my %dbsnp :shared;
 if ($snpfile ne '') {
-	my $bcf = `which bcftools`;
-	chomp($bcf);
-	if ($bcf ne '') {
-		my $command = "bcftools query -f '\%CHROM\\t\%POS\\t\%REF\\t\%ALT\\t\%ID\\n' -R '".$opts{'t'}."' '$snpfile' > $wd/dbsnp.txt";
-		system("$command");
-		open IN, "$wd/dbsnp.txt";
-		while (<IN>) {
-			chomp;
-			my @p = split(/\t/,$_);
-			$dbsnp{$p[0].'-'.$p[1]} = $p[2].'-'.$p[3].'-'.$p[4];
-		}
-		close IN;
-	}
-	else {
-		print "WARNING: BCFtools is not in the path. Skipping snp handling.\n";
-		$snpfile = '';
-		system("touch $wd/dbsnp.txt");
-	}
+	#my $bcf = `which bcftools`;
+	#chomp($bcf);
+	#if ($bcf ne '') {
+	#	my $command = "bcftools query -f '\%CHROM\\t\%POS\\t\%REF\\t\%ALT\\t\%ID\\n' -R '".$opts{'t'}."' '$snpfile' > $wd/dbsnp.txt";
+	#	system("$command");
+	#	open IN, "$wd/dbsnp.txt";
+	#	while (<IN>) {
+	#		chomp;
+	#		my @p = split(/\t/,$_);
+	#		$dbsnp{$p[0].'-'.$p[1]} = $p[2].'-'.$p[3].'-'.$p[4];
+	#	}
+	#	close IN;
+	#}
+	#else {
+	#	print "WARNING: BCFtools is not in the path. Skipping snp handling.\n";
+	#	$snpfile = '';
+	#	system("touch $wd/dbsnp.txt");
+	#}
+	system("tabix $snpfile -B $opts{'t'} | cut -f 1-5 > $wd/dbsnp.txt");
+	my $lc = `cat $wd/dbsnp.txt | wc -l`;
+	chomp($lc);
+	if ($lc eq '0') {
+		open SNP, ">$wd/dbsnp.txt";
+		## dummy line on chr zero
+		print SNP "chr0\t1\t.\tA\tT\n";
+		close SNP;
 }
 else {
-	system("touch $wd/dbsnp.txt");
+	open SNP, ">$wd/dbsnp.txt";
+	## dummy line on chr zero
+	print SNP "chr0\t1\t.\tA\tT\n";
+	close SNP;
 }
 
 ## now process the bam file.
@@ -278,14 +289,15 @@
 ############
 # create the R script.
 open R, ">$wd/CallSNPs.R";
+print R "\n";
 print R "args <- commandArgs(trailingOnly = TRUE)\n";
-print R "counts <- read.table(file=args[1],header=FALSE, as.is=TRUE)\n";
+print R "counts <- read.table(file = args[1],header = FALSE, as.is = TRUE)\n";
 print R "ploidy <- as.integer(args[3])\n";
 print R "chr <- args[2]\n";
 print R "snps <- read.table(file=args[5],header=FALSE,as.is=TRUE)\n";
-print R "colnames(snps) <- c('chr','pos','ref','alt','id')\n";
+print R "colnames(snps) <- c('chr','pos','id','ref','alt')\n";
 print R "colnames(counts) <- c('pos','ref','A','C','G','T','TotalDepth')\n";
-print R "dists <- read.table(file='$wd/allelic_distributions.txt',header=TRUE,as.is=TRUE)\n";
+print R "dists <- read.table(file='".$opts{'a'}."',header=TRUE,as.is=TRUE)\n";
 print R 'rownames(dists) = dists$allele'."\n";
 print R 'dists <- dists[,-1]'."\n";
 print R "vcf <- c()\n";
@@ -489,7 +501,7 @@
 	while (defined(my $allele = $alleles->dequeue())) {
 		system("sed -i 's/.\$//' '$wd/counts_$allele.$mincov"."x.$hassnp.txt'");
 		open OUT, ">$wd/GetDistribution.$allele.R";
-		print OUT "sample <- '$bam'\n";
+		print OUT "\n";
 		print OUT "nt <- '$allele'\n";
 		#print OUT "pdf(file='$wd/Distribution.$allele.$mincov"."x.$hassnp.pdf',paper='a4')\n";
 		print OUT "data <- scan(file='$wd/counts_$allele.$mincov"."x.$hassnp.txt',sep=',')\n";